1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
51 #include "tree-gimple.h"
54 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs size_cost = { /* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1212 /* Feature tests against the various tunings. */
1213 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1214 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1215 negatively, so enabling for Generic64 seems like good code size
1216 tradeoff. We can't enable it for 32bit generic because it does not
1217 work well with PPro base chips. */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1220 /* X86_TUNE_PUSH_MEMORY */
1221 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1222 | m_NOCONA | m_CORE2 | m_GENERIC,
1224 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1227 /* X86_TUNE_USE_BIT_TEST */
1230 /* X86_TUNE_UNROLL_STRLEN */
1231 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1233 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1234 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1236 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1237 on simulation result. But after P4 was made, no performance benefit
1238 was observed with branch hints. It also increases the code size.
1239 As a result, icc never generates branch hints. */
1242 /* X86_TUNE_DOUBLE_WITH_ADD */
1245 /* X86_TUNE_USE_SAHF */
1246 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1247 | m_NOCONA | m_CORE2 | m_GENERIC,
1249 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1250 partial dependencies. */
1251 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1252 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1254 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1255 register stalls on Generic32 compilation setting as well. However
1256 in current implementation the partial register stalls are not eliminated
1257 very well - they can be introduced via subregs synthesized by combine
1258 and can happen in caller/callee saving sequences. Because this option
1259 pays back little on PPro based chips and is in conflict with partial reg
1260 dependencies used by Athlon/P4 based chips, it is better to leave it off
1261 for generic32 for now. */
1264 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1265 m_CORE2 | m_GENERIC,
1267 /* X86_TUNE_USE_HIMODE_FIOP */
1268 m_386 | m_486 | m_K6_GEODE,
1270 /* X86_TUNE_USE_SIMODE_FIOP */
1271 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1273 /* X86_TUNE_USE_MOV0 */
1276 /* X86_TUNE_USE_CLTD */
1277 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1279 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1282 /* X86_TUNE_SPLIT_LONG_MOVES */
1285 /* X86_TUNE_READ_MODIFY_WRITE */
1288 /* X86_TUNE_READ_MODIFY */
1291 /* X86_TUNE_PROMOTE_QIMODE */
1292 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1293 | m_GENERIC /* | m_PENT4 ? */,
1295 /* X86_TUNE_FAST_PREFIX */
1296 ~(m_PENT | m_486 | m_386),
1298 /* X86_TUNE_SINGLE_STRINGOP */
1299 m_386 | m_PENT4 | m_NOCONA,
1301 /* X86_TUNE_QIMODE_MATH */
1304 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1305 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1306 might be considered for Generic32 if our scheme for avoiding partial
1307 stalls was more effective. */
1310 /* X86_TUNE_PROMOTE_QI_REGS */
1313 /* X86_TUNE_PROMOTE_HI_REGS */
1316 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1317 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1319 /* X86_TUNE_ADD_ESP_8 */
1320 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1321 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_SUB_ESP_4 */
1324 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_SUB_ESP_8 */
1327 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1328 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1330 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1331 for DFmode copies */
1332 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1333 | m_GENERIC | m_GEODE),
1335 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1336 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1338 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1339 conflict here in between PPro/Pentium4 based chips that thread 128bit
1340 SSE registers as single units versus K8 based chips that divide SSE
1341 registers to two 64bit halves. This knob promotes all store destinations
1342 to be 128bit to allow register renaming on 128bit SSE units, but usually
1343 results in one extra microop on 64bit SSE units. Experimental results
1344 shows that disabling this option on P4 brings over 20% SPECfp regression,
1345 while enabling it on K8 brings roughly 2.4% regression that can be partly
1346 masked by careful scheduling of moves. */
1347 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1349 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1352 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1353 are resolved on SSE register parts instead of whole registers, so we may
1354 maintain just lower part of scalar values in proper format leaving the
1355 upper part undefined. */
1358 /* X86_TUNE_SSE_TYPELESS_STORES */
1361 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1362 m_PPRO | m_PENT4 | m_NOCONA,
1364 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1365 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1367 /* X86_TUNE_PROLOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_EPILOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373 /* X86_TUNE_SHIFT1 */
1376 /* X86_TUNE_USE_FFREEP */
1379 /* X86_TUNE_INTER_UNIT_MOVES */
1380 ~(m_AMD_MULTIPLE | m_GENERIC),
1382 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1385 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1386 than 4 branch instructions in the 16 byte window. */
1387 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1389 /* X86_TUNE_SCHEDULE */
1390 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1392 /* X86_TUNE_USE_BT */
1395 /* X86_TUNE_USE_INCDEC */
1396 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1398 /* X86_TUNE_PAD_RETURNS */
1399 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_EXT_80387_CONSTANTS */
1402 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1404 /* X86_TUNE_SHORTEN_X87_SSE */
1407 /* X86_TUNE_AVOID_VECTOR_DECODE */
1410 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1411 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1414 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1415 vector path on AMD machines. */
1416 m_K8 | m_GENERIC64 | m_AMDFAM10,
1418 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1422 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1426 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1427 but one byte longer. */
1430 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1431 operand that cannot be represented using a modRM byte. The XOR
1432 replacement is long decoded, so this split helps here as well. */
1435 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1436 from integer to FP. */
1440 /* Feature tests against the various architecture variations. */
1441 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1442 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1443 ~(m_386 | m_486 | m_PENT | m_K6),
1445 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1448 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1451 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1454 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1458 static const unsigned int x86_accumulate_outgoing_args
1459 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1461 static const unsigned int x86_arch_always_fancy_math_387
1462 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1463 | m_NOCONA | m_CORE2 | m_GENERIC;
1465 static enum stringop_alg stringop_alg = no_stringop;
1467 /* In case the average insn count for single function invocation is
1468 lower than this constant, emit fast (but longer) prologue and
1470 #define FAST_PROLOGUE_INSN_COUNT 20
1472 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1473 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1474 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1475 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1477 /* Array of the smallest class containing reg number REGNO, indexed by
1478 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1480 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1482 /* ax, dx, cx, bx */
1483 AREG, DREG, CREG, BREG,
1484 /* si, di, bp, sp */
1485 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1487 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1488 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1491 /* flags, fpsr, fpcr, frame */
1492 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1494 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1497 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1500 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1501 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1502 /* SSE REX registers */
1503 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1507 /* The "default" register map used in 32bit mode. */
1509 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1511 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1512 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1513 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1514 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1515 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1516 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1517 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1520 static int const x86_64_int_parameter_registers[6] =
1522 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1523 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1526 static int const x86_64_ms_abi_int_parameter_registers[4] =
1528 2 /*RCX*/, 1 /*RDX*/,
1529 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1532 static int const x86_64_int_return_registers[4] =
1534 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1537 /* The "default" register map used in 64bit mode. */
1538 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1540 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1541 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1542 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1543 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1544 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1545 8,9,10,11,12,13,14,15, /* extended integer registers */
1546 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1549 /* Define the register numbers to be used in Dwarf debugging information.
1550 The SVR4 reference port C compiler uses the following register numbers
1551 in its Dwarf output code:
1552 0 for %eax (gcc regno = 0)
1553 1 for %ecx (gcc regno = 2)
1554 2 for %edx (gcc regno = 1)
1555 3 for %ebx (gcc regno = 3)
1556 4 for %esp (gcc regno = 7)
1557 5 for %ebp (gcc regno = 6)
1558 6 for %esi (gcc regno = 4)
1559 7 for %edi (gcc regno = 5)
1560 The following three DWARF register numbers are never generated by
1561 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1562 believes these numbers have these meanings.
1563 8 for %eip (no gcc equivalent)
1564 9 for %eflags (gcc regno = 17)
1565 10 for %trapno (no gcc equivalent)
1566 It is not at all clear how we should number the FP stack registers
1567 for the x86 architecture. If the version of SDB on x86/svr4 were
1568 a bit less brain dead with respect to floating-point then we would
1569 have a precedent to follow with respect to DWARF register numbers
1570 for x86 FP registers, but the SDB on x86/svr4 is so completely
1571 broken with respect to FP registers that it is hardly worth thinking
1572 of it as something to strive for compatibility with.
1573 The version of x86/svr4 SDB I have at the moment does (partially)
1574 seem to believe that DWARF register number 11 is associated with
1575 the x86 register %st(0), but that's about all. Higher DWARF
1576 register numbers don't seem to be associated with anything in
1577 particular, and even for DWARF regno 11, SDB only seems to under-
1578 stand that it should say that a variable lives in %st(0) (when
1579 asked via an `=' command) if we said it was in DWARF regno 11,
1580 but SDB still prints garbage when asked for the value of the
1581 variable in question (via a `/' command).
1582 (Also note that the labels SDB prints for various FP stack regs
1583 when doing an `x' command are all wrong.)
1584 Note that these problems generally don't affect the native SVR4
1585 C compiler because it doesn't allow the use of -O with -g and
1586 because when it is *not* optimizing, it allocates a memory
1587 location for each floating-point variable, and the memory
1588 location is what gets described in the DWARF AT_location
1589 attribute for the variable in question.
1590 Regardless of the severe mental illness of the x86/svr4 SDB, we
1591 do something sensible here and we use the following DWARF
1592 register numbers. Note that these are all stack-top-relative
1594 11 for %st(0) (gcc regno = 8)
1595 12 for %st(1) (gcc regno = 9)
1596 13 for %st(2) (gcc regno = 10)
1597 14 for %st(3) (gcc regno = 11)
1598 15 for %st(4) (gcc regno = 12)
1599 16 for %st(5) (gcc regno = 13)
1600 17 for %st(6) (gcc regno = 14)
1601 18 for %st(7) (gcc regno = 15)
1603 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1605 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1606 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1607 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1608 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1609 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1610 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1611 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1614 /* Test and compare insns in i386.md store the information needed to
1615 generate branch and scc insns here. */
1617 rtx ix86_compare_op0 = NULL_RTX;
1618 rtx ix86_compare_op1 = NULL_RTX;
1619 rtx ix86_compare_emitted = NULL_RTX;
1621 /* Size of the register save area. */
1622 #define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
1624 /* Define the structure for the machine field in struct function. */
1626 struct stack_local_entry GTY(())
1628 unsigned short mode;
1631 struct stack_local_entry *next;
1634 /* Structure describing stack frame layout.
1635 Stack grows downward:
1641 saved frame pointer if frame_pointer_needed
1642 <- HARD_FRAME_POINTER
1647 [va_arg registers] (
1648 > to_allocate <- FRAME_POINTER
1658 HOST_WIDE_INT frame;
1660 int outgoing_arguments_size;
1663 HOST_WIDE_INT to_allocate;
1664 /* The offsets relative to ARG_POINTER. */
1665 HOST_WIDE_INT frame_pointer_offset;
1666 HOST_WIDE_INT hard_frame_pointer_offset;
1667 HOST_WIDE_INT stack_pointer_offset;
1669 /* When save_regs_using_mov is set, emit prologue using
1670 move instead of push instructions. */
1671 bool save_regs_using_mov;
1674 /* Code model option. */
1675 enum cmodel ix86_cmodel;
1677 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1679 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1681 /* Which unit we are generating floating point math for. */
1682 enum fpmath_unit ix86_fpmath;
1684 /* Which cpu are we scheduling for. */
1685 enum processor_type ix86_tune;
1687 /* Which instruction set architecture to use. */
1688 enum processor_type ix86_arch;
1690 /* true if sse prefetch instruction is not NOOP. */
1691 int x86_prefetch_sse;
1693 /* ix86_regparm_string as a number */
1694 static int ix86_regparm;
1696 /* -mstackrealign option */
1697 extern int ix86_force_align_arg_pointer;
1698 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1700 /* Preferred alignment for stack boundary in bits. */
1701 unsigned int ix86_preferred_stack_boundary;
1703 /* Values 1-5: see jump.c */
1704 int ix86_branch_cost;
1706 /* Variables which are this size or smaller are put in the data/bss
1707 or ldata/lbss sections. */
1709 int ix86_section_threshold = 65536;
1711 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1712 char internal_label_prefix[16];
1713 int internal_label_prefix_len;
1715 /* Fence to use after loop using movnt. */
1718 /* Register class used for passing given 64bit part of the argument.
1719 These represent classes as documented by the PS ABI, with the exception
1720 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1721 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1723 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1724 whenever possible (upper half does contain padding). */
1725 enum x86_64_reg_class
1728 X86_64_INTEGER_CLASS,
1729 X86_64_INTEGERSI_CLASS,
1736 X86_64_COMPLEX_X87_CLASS,
1739 static const char * const x86_64_reg_class_name[] =
1741 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1742 "sseup", "x87", "x87up", "cplx87", "no"
1745 #define MAX_CLASSES 4
1747 /* Table of constants used by fldpi, fldln2, etc.... */
1748 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1749 static bool ext_80387_constants_init = 0;
1752 static struct machine_function * ix86_init_machine_status (void);
1753 static rtx ix86_function_value (const_tree, const_tree, bool);
1754 static int ix86_function_regparm (const_tree, const_tree);
1755 static void ix86_compute_frame_layout (struct ix86_frame *);
1756 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1760 /* The svr4 ABI for the i386 says that records and unions are returned
1762 #ifndef DEFAULT_PCC_STRUCT_RETURN
1763 #define DEFAULT_PCC_STRUCT_RETURN 1
1766 /* Bit flags that specify the ISA we are compiling for. */
1767 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1769 /* A mask of ix86_isa_flags that includes bit X if X
1770 was set or cleared on the command line. */
1771 static int ix86_isa_flags_explicit;
1773 /* Define a set of ISAs which are available when a given ISA is
1774 enabled. MMX and SSE ISAs are handled separately. */
1776 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1777 #define OPTION_MASK_ISA_3DNOW_SET \
1778 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1780 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1781 #define OPTION_MASK_ISA_SSE2_SET \
1782 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1783 #define OPTION_MASK_ISA_SSE3_SET \
1784 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1785 #define OPTION_MASK_ISA_SSSE3_SET \
1786 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1787 #define OPTION_MASK_ISA_SSE4_1_SET \
1788 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1789 #define OPTION_MASK_ISA_SSE4_2_SET \
1790 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1792 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1794 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1796 #define OPTION_MASK_ISA_SSE4A_SET \
1797 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1798 #define OPTION_MASK_ISA_SSE5_SET \
1799 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1801 /* Define a set of ISAs which aren't available when a given ISA is
1802 disabled. MMX and SSE ISAs are handled separately. */
1804 #define OPTION_MASK_ISA_MMX_UNSET \
1805 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1806 #define OPTION_MASK_ISA_3DNOW_UNSET \
1807 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1808 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1810 #define OPTION_MASK_ISA_SSE_UNSET \
1811 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1812 #define OPTION_MASK_ISA_SSE2_UNSET \
1813 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1814 #define OPTION_MASK_ISA_SSE3_UNSET \
1815 (OPTION_MASK_ISA_SSE3 \
1816 | OPTION_MASK_ISA_SSSE3_UNSET \
1817 | OPTION_MASK_ISA_SSE4A_UNSET )
1818 #define OPTION_MASK_ISA_SSSE3_UNSET \
1819 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1820 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1821 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1822 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1824 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1826 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1828 #define OPTION_MASK_ISA_SSE4A_UNSET \
1829 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1831 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1833 /* Vectorization library interface and handlers. */
1834 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1835 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1836 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1838 /* Implement TARGET_HANDLE_OPTION. */
1841 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1848 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1849 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1853 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1854 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1861 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1862 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1866 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1867 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1877 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1878 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1882 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1883 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1890 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1891 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1895 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1896 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1903 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1904 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1908 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1909 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1916 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1917 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1921 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1922 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1929 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1930 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1934 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1935 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1942 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1943 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1947 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1948 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1953 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1954 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
1958 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1959 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1965 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1966 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1970 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1971 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1978 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1979 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
1983 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
1984 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
1993 /* Sometimes certain combinations of command options do not make
1994 sense on a particular target machine. You can define a macro
1995 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1996 defined, is executed once just after all the command options have
1999 Don't use this macro to turn on various extra optimizations for
2000 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2003 override_options (void)
2006 int ix86_tune_defaulted = 0;
2007 int ix86_arch_specified = 0;
2008 unsigned int ix86_arch_mask, ix86_tune_mask;
2010 /* Comes from final.c -- no real reason to change it. */
2011 #define MAX_CODE_ALIGN 16
2015 const struct processor_costs *cost; /* Processor costs */
2016 const int align_loop; /* Default alignments. */
2017 const int align_loop_max_skip;
2018 const int align_jump;
2019 const int align_jump_max_skip;
2020 const int align_func;
2022 const processor_target_table[PROCESSOR_max] =
2024 {&i386_cost, 4, 3, 4, 3, 4},
2025 {&i486_cost, 16, 15, 16, 15, 16},
2026 {&pentium_cost, 16, 7, 16, 7, 16},
2027 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2028 {&geode_cost, 0, 0, 0, 0, 0},
2029 {&k6_cost, 32, 7, 32, 7, 32},
2030 {&athlon_cost, 16, 7, 16, 7, 16},
2031 {&pentium4_cost, 0, 0, 0, 0, 0},
2032 {&k8_cost, 16, 7, 16, 7, 16},
2033 {&nocona_cost, 0, 0, 0, 0, 0},
2034 {&core2_cost, 16, 10, 16, 10, 16},
2035 {&generic32_cost, 16, 7, 16, 7, 16},
2036 {&generic64_cost, 16, 10, 16, 10, 16},
2037 {&amdfam10_cost, 32, 24, 32, 7, 32}
2040 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2071 PTA_PREFETCH_SSE = 1 << 4,
2073 PTA_3DNOW_A = 1 << 6,
2077 PTA_POPCNT = 1 << 10,
2079 PTA_SSE4A = 1 << 12,
2080 PTA_NO_SAHF = 1 << 13,
2081 PTA_SSE4_1 = 1 << 14,
2082 PTA_SSE4_2 = 1 << 15,
2085 PTA_PCLMUL = 1 << 18
2090 const char *const name; /* processor name or nickname. */
2091 const enum processor_type processor;
2092 const unsigned /*enum pta_flags*/ flags;
2094 const processor_alias_table[] =
2096 {"i386", PROCESSOR_I386, 0},
2097 {"i486", PROCESSOR_I486, 0},
2098 {"i586", PROCESSOR_PENTIUM, 0},
2099 {"pentium", PROCESSOR_PENTIUM, 0},
2100 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2101 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2102 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2103 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2104 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2105 {"i686", PROCESSOR_PENTIUMPRO, 0},
2106 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2107 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2108 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2109 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2110 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2111 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2112 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2113 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2114 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2115 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2116 | PTA_CX16 | PTA_NO_SAHF)},
2117 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2118 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2121 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2122 |PTA_PREFETCH_SSE)},
2123 {"k6", PROCESSOR_K6, PTA_MMX},
2124 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2125 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2126 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2127 | PTA_PREFETCH_SSE)},
2128 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2129 | PTA_PREFETCH_SSE)},
2130 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2132 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2134 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2136 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2137 | PTA_MMX | PTA_SSE | PTA_SSE2
2139 {"k8", PROCESSOR_K8, (PTA_64BIT
2140 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2141 | PTA_SSE | PTA_SSE2
2143 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2144 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2145 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2147 {"opteron", PROCESSOR_K8, (PTA_64BIT
2148 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2149 | PTA_SSE | PTA_SSE2
2151 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2152 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2153 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2155 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2156 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2157 | PTA_SSE | PTA_SSE2
2159 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2160 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2161 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2163 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2164 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2165 | PTA_SSE | PTA_SSE2
2167 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2168 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2169 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2171 | PTA_CX16 | PTA_ABM)},
2172 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2173 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2174 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2176 | PTA_CX16 | PTA_ABM)},
2177 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2178 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2181 int const pta_size = ARRAY_SIZE (processor_alias_table);
2183 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2184 SUBTARGET_OVERRIDE_OPTIONS;
2187 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2188 SUBSUBTARGET_OVERRIDE_OPTIONS;
2191 /* -fPIC is the default for x86_64. */
2192 if (TARGET_MACHO && TARGET_64BIT)
2195 /* Set the default values for switches whose default depends on TARGET_64BIT
2196 in case they weren't overwritten by command line options. */
2199 /* Mach-O doesn't support omitting the frame pointer for now. */
2200 if (flag_omit_frame_pointer == 2)
2201 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2202 if (flag_asynchronous_unwind_tables == 2)
2203 flag_asynchronous_unwind_tables = 1;
2204 if (flag_pcc_struct_return == 2)
2205 flag_pcc_struct_return = 0;
2209 if (flag_omit_frame_pointer == 2)
2210 flag_omit_frame_pointer = 0;
2211 if (flag_asynchronous_unwind_tables == 2)
2212 flag_asynchronous_unwind_tables = 0;
2213 if (flag_pcc_struct_return == 2)
2214 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2217 /* Need to check -mtune=generic first. */
2218 if (ix86_tune_string)
2220 if (!strcmp (ix86_tune_string, "generic")
2221 || !strcmp (ix86_tune_string, "i686")
2222 /* As special support for cross compilers we read -mtune=native
2223 as -mtune=generic. With native compilers we won't see the
2224 -mtune=native, as it was changed by the driver. */
2225 || !strcmp (ix86_tune_string, "native"))
2228 ix86_tune_string = "generic64";
2230 ix86_tune_string = "generic32";
2232 else if (!strncmp (ix86_tune_string, "generic", 7))
2233 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2237 if (ix86_arch_string)
2238 ix86_tune_string = ix86_arch_string;
2239 if (!ix86_tune_string)
2241 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2242 ix86_tune_defaulted = 1;
2245 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2246 need to use a sensible tune option. */
2247 if (!strcmp (ix86_tune_string, "generic")
2248 || !strcmp (ix86_tune_string, "x86-64")
2249 || !strcmp (ix86_tune_string, "i686"))
2252 ix86_tune_string = "generic64";
2254 ix86_tune_string = "generic32";
2257 if (ix86_stringop_string)
2259 if (!strcmp (ix86_stringop_string, "rep_byte"))
2260 stringop_alg = rep_prefix_1_byte;
2261 else if (!strcmp (ix86_stringop_string, "libcall"))
2262 stringop_alg = libcall;
2263 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2264 stringop_alg = rep_prefix_4_byte;
2265 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2266 stringop_alg = rep_prefix_8_byte;
2267 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2268 stringop_alg = loop_1_byte;
2269 else if (!strcmp (ix86_stringop_string, "loop"))
2270 stringop_alg = loop;
2271 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2272 stringop_alg = unrolled_loop;
2274 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2276 if (!strcmp (ix86_tune_string, "x86-64"))
2277 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2278 "-mtune=generic instead as appropriate.");
2280 if (!ix86_arch_string)
2281 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2283 ix86_arch_specified = 1;
2285 if (!strcmp (ix86_arch_string, "generic"))
2286 error ("generic CPU can be used only for -mtune= switch");
2287 if (!strncmp (ix86_arch_string, "generic", 7))
2288 error ("bad value (%s) for -march= switch", ix86_arch_string);
2290 if (ix86_cmodel_string != 0)
2292 if (!strcmp (ix86_cmodel_string, "small"))
2293 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2294 else if (!strcmp (ix86_cmodel_string, "medium"))
2295 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2296 else if (!strcmp (ix86_cmodel_string, "large"))
2297 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2299 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2300 else if (!strcmp (ix86_cmodel_string, "32"))
2301 ix86_cmodel = CM_32;
2302 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2303 ix86_cmodel = CM_KERNEL;
2305 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2309 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2310 use of rip-relative addressing. This eliminates fixups that
2311 would otherwise be needed if this object is to be placed in a
2312 DLL, and is essentially just as efficient as direct addressing. */
2313 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2314 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2315 else if (TARGET_64BIT)
2316 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2318 ix86_cmodel = CM_32;
2320 if (ix86_asm_string != 0)
2323 && !strcmp (ix86_asm_string, "intel"))
2324 ix86_asm_dialect = ASM_INTEL;
2325 else if (!strcmp (ix86_asm_string, "att"))
2326 ix86_asm_dialect = ASM_ATT;
2328 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2330 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2331 error ("code model %qs not supported in the %s bit mode",
2332 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2333 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2334 sorry ("%i-bit mode not compiled in",
2335 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2337 for (i = 0; i < pta_size; i++)
2338 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2340 ix86_arch = processor_alias_table[i].processor;
2341 /* Default cpu tuning to the architecture. */
2342 ix86_tune = ix86_arch;
2344 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2345 error ("CPU you selected does not support x86-64 "
2348 if (processor_alias_table[i].flags & PTA_MMX
2349 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2350 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2351 if (processor_alias_table[i].flags & PTA_3DNOW
2352 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2353 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2354 if (processor_alias_table[i].flags & PTA_3DNOW_A
2355 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2356 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2357 if (processor_alias_table[i].flags & PTA_SSE
2358 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2359 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2360 if (processor_alias_table[i].flags & PTA_SSE2
2361 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2362 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2363 if (processor_alias_table[i].flags & PTA_SSE3
2364 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2365 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2366 if (processor_alias_table[i].flags & PTA_SSSE3
2367 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2368 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2369 if (processor_alias_table[i].flags & PTA_SSE4_1
2370 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2371 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2372 if (processor_alias_table[i].flags & PTA_SSE4_2
2373 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2374 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2375 if (processor_alias_table[i].flags & PTA_SSE4A
2376 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2377 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2378 if (processor_alias_table[i].flags & PTA_SSE5
2379 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2380 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2382 if (processor_alias_table[i].flags & PTA_ABM)
2384 if (processor_alias_table[i].flags & PTA_CX16)
2385 x86_cmpxchg16b = true;
2386 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2388 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2389 x86_prefetch_sse = true;
2390 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2392 if (processor_alias_table[i].flags & PTA_AES)
2394 if (processor_alias_table[i].flags & PTA_PCLMUL)
2401 error ("bad value (%s) for -march= switch", ix86_arch_string);
2403 ix86_arch_mask = 1u << ix86_arch;
2404 for (i = 0; i < X86_ARCH_LAST; ++i)
2405 ix86_arch_features[i] &= ix86_arch_mask;
2407 for (i = 0; i < pta_size; i++)
2408 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2410 ix86_tune = processor_alias_table[i].processor;
2411 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2413 if (ix86_tune_defaulted)
2415 ix86_tune_string = "x86-64";
2416 for (i = 0; i < pta_size; i++)
2417 if (! strcmp (ix86_tune_string,
2418 processor_alias_table[i].name))
2420 ix86_tune = processor_alias_table[i].processor;
2423 error ("CPU you selected does not support x86-64 "
2426 /* Intel CPUs have always interpreted SSE prefetch instructions as
2427 NOPs; so, we can enable SSE prefetch instructions even when
2428 -mtune (rather than -march) points us to a processor that has them.
2429 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2430 higher processors. */
2432 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2433 x86_prefetch_sse = true;
2437 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2439 /* Enable SSE2 if AES or PCLMUL is enabled. */
2440 if ((x86_aes || x86_pclmul)
2441 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2443 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2444 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2447 ix86_tune_mask = 1u << ix86_tune;
2448 for (i = 0; i < X86_TUNE_LAST; ++i)
2449 ix86_tune_features[i] &= ix86_tune_mask;
2452 ix86_cost = &size_cost;
2454 ix86_cost = processor_target_table[ix86_tune].cost;
2456 /* Arrange to set up i386_stack_locals for all functions. */
2457 init_machine_status = ix86_init_machine_status;
2459 /* Validate -mregparm= value. */
2460 if (ix86_regparm_string)
2463 warning (0, "-mregparm is ignored in 64-bit mode");
2464 i = atoi (ix86_regparm_string);
2465 if (i < 0 || i > REGPARM_MAX)
2466 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2471 ix86_regparm = REGPARM_MAX;
2473 /* If the user has provided any of the -malign-* options,
2474 warn and use that value only if -falign-* is not set.
2475 Remove this code in GCC 3.2 or later. */
2476 if (ix86_align_loops_string)
2478 warning (0, "-malign-loops is obsolete, use -falign-loops");
2479 if (align_loops == 0)
2481 i = atoi (ix86_align_loops_string);
2482 if (i < 0 || i > MAX_CODE_ALIGN)
2483 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2485 align_loops = 1 << i;
2489 if (ix86_align_jumps_string)
2491 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2492 if (align_jumps == 0)
2494 i = atoi (ix86_align_jumps_string);
2495 if (i < 0 || i > MAX_CODE_ALIGN)
2496 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2498 align_jumps = 1 << i;
2502 if (ix86_align_funcs_string)
2504 warning (0, "-malign-functions is obsolete, use -falign-functions");
2505 if (align_functions == 0)
2507 i = atoi (ix86_align_funcs_string);
2508 if (i < 0 || i > MAX_CODE_ALIGN)
2509 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2511 align_functions = 1 << i;
2515 /* Default align_* from the processor table. */
2516 if (align_loops == 0)
2518 align_loops = processor_target_table[ix86_tune].align_loop;
2519 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2521 if (align_jumps == 0)
2523 align_jumps = processor_target_table[ix86_tune].align_jump;
2524 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2526 if (align_functions == 0)
2528 align_functions = processor_target_table[ix86_tune].align_func;
2531 /* Validate -mbranch-cost= value, or provide default. */
2532 ix86_branch_cost = ix86_cost->branch_cost;
2533 if (ix86_branch_cost_string)
2535 i = atoi (ix86_branch_cost_string);
2537 error ("-mbranch-cost=%d is not between 0 and 5", i);
2539 ix86_branch_cost = i;
2541 if (ix86_section_threshold_string)
2543 i = atoi (ix86_section_threshold_string);
2545 error ("-mlarge-data-threshold=%d is negative", i);
2547 ix86_section_threshold = i;
2550 if (ix86_tls_dialect_string)
2552 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2553 ix86_tls_dialect = TLS_DIALECT_GNU;
2554 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2555 ix86_tls_dialect = TLS_DIALECT_GNU2;
2556 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2557 ix86_tls_dialect = TLS_DIALECT_SUN;
2559 error ("bad value (%s) for -mtls-dialect= switch",
2560 ix86_tls_dialect_string);
2563 if (ix87_precision_string)
2565 i = atoi (ix87_precision_string);
2566 if (i != 32 && i != 64 && i != 80)
2567 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2572 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2574 /* Enable by default the SSE and MMX builtins. Do allow the user to
2575 explicitly disable any of these. In particular, disabling SSE and
2576 MMX for kernel code is extremely useful. */
2577 if (!ix86_arch_specified)
2579 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2580 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2583 warning (0, "-mrtd is ignored in 64bit mode");
2587 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2589 if (!ix86_arch_specified)
2591 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2593 /* i386 ABI does not specify red zone. It still makes sense to use it
2594 when programmer takes care to stack from being destroyed. */
2595 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2596 target_flags |= MASK_NO_RED_ZONE;
2599 /* Keep nonleaf frame pointers. */
2600 if (flag_omit_frame_pointer)
2601 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2602 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2603 flag_omit_frame_pointer = 1;
2605 /* If we're doing fast math, we don't care about comparison order
2606 wrt NaNs. This lets us use a shorter comparison sequence. */
2607 if (flag_finite_math_only)
2608 target_flags &= ~MASK_IEEE_FP;
2610 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2611 since the insns won't need emulation. */
2612 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2613 target_flags &= ~MASK_NO_FANCY_MATH_387;
2615 /* Likewise, if the target doesn't have a 387, or we've specified
2616 software floating point, don't use 387 inline intrinsics. */
2618 target_flags |= MASK_NO_FANCY_MATH_387;
2620 /* Turn on MMX builtins for -msse. */
2623 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2624 x86_prefetch_sse = true;
2627 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2628 if (TARGET_SSE4_2 || TARGET_ABM)
2631 /* Validate -mpreferred-stack-boundary= value, or provide default.
2632 The default of 128 bits is for Pentium III's SSE __m128. We can't
2633 change it because of optimize_size. Otherwise, we can't mix object
2634 files compiled with -Os and -On. */
2635 ix86_preferred_stack_boundary = 128;
2636 if (ix86_preferred_stack_boundary_string)
2638 i = atoi (ix86_preferred_stack_boundary_string);
2639 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2640 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2641 TARGET_64BIT ? 4 : 2);
2643 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2646 /* Accept -msseregparm only if at least SSE support is enabled. */
2647 if (TARGET_SSEREGPARM
2649 error ("-msseregparm used without SSE enabled");
2651 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2652 if (ix86_fpmath_string != 0)
2654 if (! strcmp (ix86_fpmath_string, "387"))
2655 ix86_fpmath = FPMATH_387;
2656 else if (! strcmp (ix86_fpmath_string, "sse"))
2660 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2661 ix86_fpmath = FPMATH_387;
2664 ix86_fpmath = FPMATH_SSE;
2666 else if (! strcmp (ix86_fpmath_string, "387,sse")
2667 || ! strcmp (ix86_fpmath_string, "sse,387"))
2671 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2672 ix86_fpmath = FPMATH_387;
2674 else if (!TARGET_80387)
2676 warning (0, "387 instruction set disabled, using SSE arithmetics");
2677 ix86_fpmath = FPMATH_SSE;
2680 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2683 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2686 /* If the i387 is disabled, then do not return values in it. */
2688 target_flags &= ~MASK_FLOAT_RETURNS;
2690 /* Use external vectorized library in vectorizing intrinsics. */
2691 if (ix86_veclibabi_string)
2693 if (strcmp (ix86_veclibabi_string, "svml") == 0)
2694 ix86_veclib_handler = ix86_veclibabi_svml;
2695 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
2696 ix86_veclib_handler = ix86_veclibabi_acml;
2698 error ("unknown vectorization library ABI type (%s) for "
2699 "-mveclibabi= switch", ix86_veclibabi_string);
2702 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2703 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2705 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2707 /* ??? Unwind info is not correct around the CFG unless either a frame
2708 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2709 unwind info generation to be aware of the CFG and propagating states
2711 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2712 || flag_exceptions || flag_non_call_exceptions)
2713 && flag_omit_frame_pointer
2714 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2716 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2717 warning (0, "unwind tables currently require either a frame pointer "
2718 "or -maccumulate-outgoing-args for correctness");
2719 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2722 /* If stack probes are required, the space used for large function
2723 arguments on the stack must also be probed, so enable
2724 -maccumulate-outgoing-args so this happens in the prologue. */
2725 if (TARGET_STACK_PROBE
2726 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2728 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2729 warning (0, "stack probing requires -maccumulate-outgoing-args "
2731 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2734 /* For sane SSE instruction set generation we need fcomi instruction.
2735 It is safe to enable all CMOVE instructions. */
2739 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2742 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2743 p = strchr (internal_label_prefix, 'X');
2744 internal_label_prefix_len = p - internal_label_prefix;
2748 /* When scheduling description is not available, disable scheduler pass
2749 so it won't slow down the compilation and make x87 code slower. */
2750 if (!TARGET_SCHEDULE)
2751 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2753 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2754 set_param_value ("simultaneous-prefetches",
2755 ix86_cost->simultaneous_prefetches);
2756 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2757 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2758 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2759 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2760 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2761 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2763 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2764 can be optimized to ap = __builtin_next_arg (0).
2765 For abi switching it should be corrected. */
2766 if (!TARGET_64BIT || DEFAULT_ABI == MS_ABI)
2767 targetm.expand_builtin_va_start = NULL;
2770 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
2772 target_flags |= MASK_CLD & ~target_flags_explicit;
2776 /* Return true if this goes in large data/bss. */
2779 ix86_in_large_data_p (tree exp)
2781 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2784 /* Functions are never large data. */
2785 if (TREE_CODE (exp) == FUNCTION_DECL)
2788 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2790 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2791 if (strcmp (section, ".ldata") == 0
2792 || strcmp (section, ".lbss") == 0)
2798 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2800 /* If this is an incomplete type with size 0, then we can't put it
2801 in data because it might be too big when completed. */
2802 if (!size || size > ix86_section_threshold)
2809 /* Switch to the appropriate section for output of DECL.
2810 DECL is either a `VAR_DECL' node or a constant of some sort.
2811 RELOC indicates whether forming the initial value of DECL requires
2812 link-time relocations. */
2814 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2818 x86_64_elf_select_section (tree decl, int reloc,
2819 unsigned HOST_WIDE_INT align)
2821 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2822 && ix86_in_large_data_p (decl))
2824 const char *sname = NULL;
2825 unsigned int flags = SECTION_WRITE;
2826 switch (categorize_decl_for_section (decl, reloc))
2831 case SECCAT_DATA_REL:
2832 sname = ".ldata.rel";
2834 case SECCAT_DATA_REL_LOCAL:
2835 sname = ".ldata.rel.local";
2837 case SECCAT_DATA_REL_RO:
2838 sname = ".ldata.rel.ro";
2840 case SECCAT_DATA_REL_RO_LOCAL:
2841 sname = ".ldata.rel.ro.local";
2845 flags |= SECTION_BSS;
2848 case SECCAT_RODATA_MERGE_STR:
2849 case SECCAT_RODATA_MERGE_STR_INIT:
2850 case SECCAT_RODATA_MERGE_CONST:
2854 case SECCAT_SRODATA:
2861 /* We don't split these for medium model. Place them into
2862 default sections and hope for best. */
2864 case SECCAT_EMUTLS_VAR:
2865 case SECCAT_EMUTLS_TMPL:
2870 /* We might get called with string constants, but get_named_section
2871 doesn't like them as they are not DECLs. Also, we need to set
2872 flags in that case. */
2874 return get_section (sname, flags, NULL);
2875 return get_named_section (decl, sname, reloc);
2878 return default_elf_select_section (decl, reloc, align);
2881 /* Build up a unique section name, expressed as a
2882 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2883 RELOC indicates whether the initial value of EXP requires
2884 link-time relocations. */
2886 static void ATTRIBUTE_UNUSED
2887 x86_64_elf_unique_section (tree decl, int reloc)
2889 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2890 && ix86_in_large_data_p (decl))
2892 const char *prefix = NULL;
2893 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2894 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2896 switch (categorize_decl_for_section (decl, reloc))
2899 case SECCAT_DATA_REL:
2900 case SECCAT_DATA_REL_LOCAL:
2901 case SECCAT_DATA_REL_RO:
2902 case SECCAT_DATA_REL_RO_LOCAL:
2903 prefix = one_only ? ".ld" : ".ldata";
2906 prefix = one_only ? ".lb" : ".lbss";
2909 case SECCAT_RODATA_MERGE_STR:
2910 case SECCAT_RODATA_MERGE_STR_INIT:
2911 case SECCAT_RODATA_MERGE_CONST:
2912 prefix = one_only ? ".lr" : ".lrodata";
2914 case SECCAT_SRODATA:
2921 /* We don't split these for medium model. Place them into
2922 default sections and hope for best. */
2924 case SECCAT_EMUTLS_VAR:
2925 prefix = targetm.emutls.var_section;
2927 case SECCAT_EMUTLS_TMPL:
2928 prefix = targetm.emutls.tmpl_section;
2933 const char *name, *linkonce;
2936 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2937 name = targetm.strip_name_encoding (name);
2939 /* If we're using one_only, then there needs to be a .gnu.linkonce
2940 prefix to the section name. */
2941 linkonce = one_only ? ".gnu.linkonce" : "";
2943 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
2945 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
2949 default_unique_section (decl, reloc);
2952 #ifdef COMMON_ASM_OP
2953 /* This says how to output assembler code to declare an
2954 uninitialized external linkage data object.
2956 For medium model x86-64 we need to use .largecomm opcode for
2959 x86_elf_aligned_common (FILE *file,
2960 const char *name, unsigned HOST_WIDE_INT size,
2963 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2964 && size > (unsigned int)ix86_section_threshold)
2965 fprintf (file, ".largecomm\t");
2967 fprintf (file, "%s", COMMON_ASM_OP);
2968 assemble_name (file, name);
2969 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2970 size, align / BITS_PER_UNIT);
2974 /* Utility function for targets to use in implementing
2975 ASM_OUTPUT_ALIGNED_BSS. */
2978 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2979 const char *name, unsigned HOST_WIDE_INT size,
2982 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2983 && size > (unsigned int)ix86_section_threshold)
2984 switch_to_section (get_named_section (decl, ".lbss", 0));
2986 switch_to_section (bss_section);
2987 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2988 #ifdef ASM_DECLARE_OBJECT_NAME
2989 last_assemble_variable_decl = decl;
2990 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2992 /* Standard thing is just output label for the object. */
2993 ASM_OUTPUT_LABEL (file, name);
2994 #endif /* ASM_DECLARE_OBJECT_NAME */
2995 ASM_OUTPUT_SKIP (file, size ? size : 1);
2999 optimization_options (int level, int size ATTRIBUTE_UNUSED)
3001 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
3002 make the problem with not enough registers even worse. */
3003 #ifdef INSN_SCHEDULING
3005 flag_schedule_insns = 0;
3009 /* The Darwin libraries never set errno, so we might as well
3010 avoid calling them when that's the only reason we would. */
3011 flag_errno_math = 0;
3013 /* The default values of these switches depend on the TARGET_64BIT
3014 that is not known at this moment. Mark these values with 2 and
3015 let user the to override these. In case there is no command line option
3016 specifying them, we will set the defaults in override_options. */
3018 flag_omit_frame_pointer = 2;
3019 flag_pcc_struct_return = 2;
3020 flag_asynchronous_unwind_tables = 2;
3021 flag_vect_cost_model = 1;
3022 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3023 SUBTARGET_OPTIMIZATION_OPTIONS;
3027 /* Decide whether we can make a sibling call to a function. DECL is the
3028 declaration of the function being targeted by the call and EXP is the
3029 CALL_EXPR representing the call. */
3032 ix86_function_ok_for_sibcall (tree decl, tree exp)
3037 /* If we are generating position-independent code, we cannot sibcall
3038 optimize any indirect call, or a direct call to a global function,
3039 as the PLT requires %ebx be live. */
3040 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3047 func = TREE_TYPE (CALL_EXPR_FN (exp));
3048 if (POINTER_TYPE_P (func))
3049 func = TREE_TYPE (func);
3052 /* Check that the return value locations are the same. Like
3053 if we are returning floats on the 80387 register stack, we cannot
3054 make a sibcall from a function that doesn't return a float to a
3055 function that does or, conversely, from a function that does return
3056 a float to a function that doesn't; the necessary stack adjustment
3057 would not be executed. This is also the place we notice
3058 differences in the return value ABI. Note that it is ok for one
3059 of the functions to have void return type as long as the return
3060 value of the other is passed in a register. */
3061 a = ix86_function_value (TREE_TYPE (exp), func, false);
3062 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3064 if (STACK_REG_P (a) || STACK_REG_P (b))
3066 if (!rtx_equal_p (a, b))
3069 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3071 else if (!rtx_equal_p (a, b))
3074 /* If this call is indirect, we'll need to be able to use a call-clobbered
3075 register for the address of the target function. Make sure that all
3076 such registers are not used for passing parameters. */
3077 if (!decl && !TARGET_64BIT)
3081 /* We're looking at the CALL_EXPR, we need the type of the function. */
3082 type = CALL_EXPR_FN (exp); /* pointer expression */
3083 type = TREE_TYPE (type); /* pointer type */
3084 type = TREE_TYPE (type); /* function type */
3086 if (ix86_function_regparm (type, NULL) >= 3)
3088 /* ??? Need to count the actual number of registers to be used,
3089 not the possible number of registers. Fix later. */
3094 /* Dllimport'd functions are also called indirectly. */
3095 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3096 && decl && DECL_DLLIMPORT_P (decl)
3097 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3100 /* If we forced aligned the stack, then sibcalling would unalign the
3101 stack, which may break the called function. */
3102 if (cfun->machine->force_align_arg_pointer)
3105 /* Otherwise okay. That also includes certain types of indirect calls. */
3109 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3110 calling convention attributes;
3111 arguments as in struct attribute_spec.handler. */
3114 ix86_handle_cconv_attribute (tree *node, tree name,
3116 int flags ATTRIBUTE_UNUSED,
3119 if (TREE_CODE (*node) != FUNCTION_TYPE
3120 && TREE_CODE (*node) != METHOD_TYPE
3121 && TREE_CODE (*node) != FIELD_DECL
3122 && TREE_CODE (*node) != TYPE_DECL)
3124 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3125 IDENTIFIER_POINTER (name));
3126 *no_add_attrs = true;
3130 /* Can combine regparm with all attributes but fastcall. */
3131 if (is_attribute_p ("regparm", name))
3135 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3137 error ("fastcall and regparm attributes are not compatible");
3140 cst = TREE_VALUE (args);
3141 if (TREE_CODE (cst) != INTEGER_CST)
3143 warning (OPT_Wattributes,
3144 "%qs attribute requires an integer constant argument",
3145 IDENTIFIER_POINTER (name));
3146 *no_add_attrs = true;
3148 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3150 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3151 IDENTIFIER_POINTER (name), REGPARM_MAX);
3152 *no_add_attrs = true;
3156 && lookup_attribute (ix86_force_align_arg_pointer_string,
3157 TYPE_ATTRIBUTES (*node))
3158 && compare_tree_int (cst, REGPARM_MAX-1))
3160 error ("%s functions limited to %d register parameters",
3161 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3169 /* Do not warn when emulating the MS ABI. */
3170 if (TREE_CODE (*node) != FUNCTION_TYPE || !ix86_function_type_abi (*node))
3171 warning (OPT_Wattributes, "%qs attribute ignored",
3172 IDENTIFIER_POINTER (name));
3173 *no_add_attrs = true;
3177 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3178 if (is_attribute_p ("fastcall", name))
3180 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3182 error ("fastcall and cdecl attributes are not compatible");
3184 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3186 error ("fastcall and stdcall attributes are not compatible");
3188 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3190 error ("fastcall and regparm attributes are not compatible");
3194 /* Can combine stdcall with fastcall (redundant), regparm and
3196 else if (is_attribute_p ("stdcall", name))
3198 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3200 error ("stdcall and cdecl attributes are not compatible");
3202 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3204 error ("stdcall and fastcall attributes are not compatible");
3208 /* Can combine cdecl with regparm and sseregparm. */
3209 else if (is_attribute_p ("cdecl", name))
3211 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3213 error ("stdcall and cdecl attributes are not compatible");
3215 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3217 error ("fastcall and cdecl attributes are not compatible");
3221 /* Can combine sseregparm with all attributes. */
3226 /* Return 0 if the attributes for two types are incompatible, 1 if they
3227 are compatible, and 2 if they are nearly compatible (which causes a
3228 warning to be generated). */
3231 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3233 /* Check for mismatch of non-default calling convention. */
3234 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3236 if (TREE_CODE (type1) != FUNCTION_TYPE
3237 && TREE_CODE (type1) != METHOD_TYPE)
3240 /* Check for mismatched fastcall/regparm types. */
3241 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3242 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3243 || (ix86_function_regparm (type1, NULL)
3244 != ix86_function_regparm (type2, NULL)))
3247 /* Check for mismatched sseregparm types. */
3248 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3249 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3252 /* Check for mismatched return types (cdecl vs stdcall). */
3253 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3254 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3260 /* Return the regparm value for a function with the indicated TYPE and DECL.
3261 DECL may be NULL when calling function indirectly
3262 or considering a libcall. */
3265 ix86_function_regparm (const_tree type, const_tree decl)
3268 int regparm = ix86_regparm;
3270 static bool error_issued;
3274 if (ix86_function_type_abi (type) == DEFAULT_ABI)
3276 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
3279 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3283 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3285 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
3287 /* We can't use regparm(3) for nested functions because
3288 these pass static chain pointer in %ecx register. */
3289 if (!error_issued && regparm == 3
3290 && decl_function_context (decl)
3291 && !DECL_NO_STATIC_CHAIN (decl))
3293 error ("nested functions are limited to 2 register parameters");
3294 error_issued = true;
3302 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3305 /* Use register calling convention for local functions when possible. */
3306 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3307 && flag_unit_at_a_time && !profile_flag)
3309 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3310 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3313 int local_regparm, globals = 0, regno;
3316 /* Make sure no regparm register is taken by a
3317 fixed register variable. */
3318 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3319 if (fixed_regs[local_regparm])
3322 /* We can't use regparm(3) for nested functions as these use
3323 static chain pointer in third argument. */
3324 if (local_regparm == 3
3325 && (decl_function_context (decl)
3326 || ix86_force_align_arg_pointer)
3327 && !DECL_NO_STATIC_CHAIN (decl))
3330 /* If the function realigns its stackpointer, the prologue will
3331 clobber %ecx. If we've already generated code for the callee,
3332 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3333 scanning the attributes for the self-realigning property. */
3334 f = DECL_STRUCT_FUNCTION (decl);
3335 if (local_regparm == 3
3336 && (f ? !!f->machine->force_align_arg_pointer
3337 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3338 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3341 /* Each fixed register usage increases register pressure,
3342 so less registers should be used for argument passing.
3343 This functionality can be overriden by an explicit
3345 for (regno = 0; regno <= DI_REG; regno++)
3346 if (fixed_regs[regno])
3350 = globals < local_regparm ? local_regparm - globals : 0;
3352 if (local_regparm > regparm)
3353 regparm = local_regparm;
3360 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3361 DFmode (2) arguments in SSE registers for a function with the
3362 indicated TYPE and DECL. DECL may be NULL when calling function
3363 indirectly or considering a libcall. Otherwise return 0. */
3366 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3368 gcc_assert (!TARGET_64BIT);
3370 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3371 by the sseregparm attribute. */
3372 if (TARGET_SSEREGPARM
3373 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3380 error ("Calling %qD with attribute sseregparm without "
3381 "SSE/SSE2 enabled", decl);
3383 error ("Calling %qT with attribute sseregparm without "
3384 "SSE/SSE2 enabled", type);
3392 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3393 (and DFmode for SSE2) arguments in SSE registers. */
3394 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3396 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3397 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3399 return TARGET_SSE2 ? 2 : 1;
3405 /* Return true if EAX is live at the start of the function. Used by
3406 ix86_expand_prologue to determine if we need special help before
3407 calling allocate_stack_worker. */
3410 ix86_eax_live_at_start_p (void)
3412 /* Cheat. Don't bother working forward from ix86_function_regparm
3413 to the function type to whether an actual argument is located in
3414 eax. Instead just look at cfg info, which is still close enough
3415 to correct at this point. This gives false positives for broken
3416 functions that might use uninitialized data that happens to be
3417 allocated in eax, but who cares? */
3418 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3421 /* Value is the number of bytes of arguments automatically
3422 popped when returning from a subroutine call.
3423 FUNDECL is the declaration node of the function (as a tree),
3424 FUNTYPE is the data type of the function (as a tree),
3425 or for a library call it is an identifier node for the subroutine name.
3426 SIZE is the number of bytes of arguments passed on the stack.
3428 On the 80386, the RTD insn may be used to pop them if the number
3429 of args is fixed, but if the number is variable then the caller
3430 must pop them all. RTD can't be used for library calls now
3431 because the library is compiled with the Unix compiler.
3432 Use of RTD is a selectable option, since it is incompatible with
3433 standard Unix calling sequences. If the option is not selected,
3434 the caller must always pop the args.
3436 The attribute stdcall is equivalent to RTD on a per module basis. */
3439 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3443 /* None of the 64-bit ABIs pop arguments. */
3447 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3449 /* Cdecl functions override -mrtd, and never pop the stack. */
3450 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3452 /* Stdcall and fastcall functions will pop the stack if not
3454 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3455 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3458 if (rtd && ! stdarg_p (funtype))
3462 /* Lose any fake structure return argument if it is passed on the stack. */
3463 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3464 && !KEEP_AGGREGATE_RETURN_POINTER)
3466 int nregs = ix86_function_regparm (funtype, fundecl);
3468 return GET_MODE_SIZE (Pmode);
3474 /* Argument support functions. */
3476 /* Return true when register may be used to pass function parameters. */
3478 ix86_function_arg_regno_p (int regno)
3481 const int *parm_regs;
3486 return (regno < REGPARM_MAX
3487 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3489 return (regno < REGPARM_MAX
3490 || (TARGET_MMX && MMX_REGNO_P (regno)
3491 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3492 || (TARGET_SSE && SSE_REGNO_P (regno)
3493 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3498 if (SSE_REGNO_P (regno) && TARGET_SSE)
3503 if (TARGET_SSE && SSE_REGNO_P (regno)
3504 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3508 /* TODO: The function should depend on current function ABI but
3509 builtins.c would need updating then. Therefore we use the
3512 /* RAX is used as hidden argument to va_arg functions. */
3513 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
3516 if (DEFAULT_ABI == MS_ABI)
3517 parm_regs = x86_64_ms_abi_int_parameter_registers;
3519 parm_regs = x86_64_int_parameter_registers;
3520 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
3521 : X86_64_REGPARM_MAX); i++)
3522 if (regno == parm_regs[i])
3527 /* Return if we do not know how to pass TYPE solely in registers. */
3530 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3532 if (must_pass_in_stack_var_size_or_pad (mode, type))
3535 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3536 The layout_type routine is crafty and tries to trick us into passing
3537 currently unsupported vector types on the stack by using TImode. */
3538 return (!TARGET_64BIT && mode == TImode
3539 && type && TREE_CODE (type) != VECTOR_TYPE);
3542 /* It returns the size, in bytes, of the area reserved for arguments passed
3543 in registers for the function represented by fndecl dependent to the used
3546 ix86_reg_parm_stack_space (const_tree fndecl)
3549 /* For libcalls it is possible that there is no fndecl at hand.
3550 Therefore assume for this case the default abi of the target. */
3552 call_abi = DEFAULT_ABI;
3554 call_abi = ix86_function_abi (fndecl);
3560 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
3563 ix86_function_type_abi (const_tree fntype)
3565 if (TARGET_64BIT && fntype != NULL)
3568 if (DEFAULT_ABI == SYSV_ABI)
3569 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
3571 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
3573 if (DEFAULT_ABI == MS_ABI && abi == SYSV_ABI)
3574 sorry ("using sysv calling convention on target w64 is not supported");
3582 ix86_function_abi (const_tree fndecl)
3586 return ix86_function_type_abi (TREE_TYPE (fndecl));
3589 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
3592 ix86_cfun_abi (void)
3594 if (! cfun || ! TARGET_64BIT)
3596 return cfun->machine->call_abi;
3600 extern void init_regs (void);
3602 /* Implementation of call abi switching target hook. Specific to FNDECL
3603 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
3605 To prevent redudant calls of costy function init_regs (), it checks not to
3606 reset register usage for default abi. */
3608 ix86_call_abi_override (const_tree fndecl)
3610 if (fndecl == NULL_TREE)
3611 cfun->machine->call_abi = DEFAULT_ABI;
3613 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
3614 if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
3616 if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
3618 call_used_regs[4 /*RSI*/] = 0;
3619 call_used_regs[5 /*RDI*/] = 0;
3623 else if (TARGET_64BIT)
3625 if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
3627 call_used_regs[4 /*RSI*/] = 1;
3628 call_used_regs[5 /*RDI*/] = 1;
3634 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3635 for a call to a function whose data type is FNTYPE.
3636 For a library call, FNTYPE is 0. */
3639 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3640 tree fntype, /* tree ptr for function decl */
3641 rtx libname, /* SYMBOL_REF of library name or 0 */
3644 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3645 memset (cum, 0, sizeof (*cum));
3647 cum->call_abi = ix86_function_type_abi (fntype);
3648 /* Set up the number of registers to use for passing arguments. */
3649 cum->nregs = ix86_regparm;
3652 if (cum->call_abi != DEFAULT_ABI)
3653 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
3658 cum->sse_nregs = SSE_REGPARM_MAX;
3661 if (cum->call_abi != DEFAULT_ABI)
3662 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
3663 : X64_SSE_REGPARM_MAX;
3667 cum->mmx_nregs = MMX_REGPARM_MAX;
3668 cum->warn_sse = true;
3669 cum->warn_mmx = true;
3671 /* Because type might mismatch in between caller and callee, we need to
3672 use actual type of function for local calls.
3673 FIXME: cgraph_analyze can be told to actually record if function uses
3674 va_start so for local functions maybe_vaarg can be made aggressive
3676 FIXME: once typesytem is fixed, we won't need this code anymore. */
3678 fntype = TREE_TYPE (fndecl);
3679 cum->maybe_vaarg = (fntype
3680 ? (!prototype_p (fntype) || stdarg_p (fntype))
3685 /* If there are variable arguments, then we won't pass anything
3686 in registers in 32-bit mode. */
3687 if (stdarg_p (fntype))
3697 /* Use ecx and edx registers if function has fastcall attribute,
3698 else look for regparm information. */
3701 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3707 cum->nregs = ix86_function_regparm (fntype, fndecl);
3710 /* Set up the number of SSE registers used for passing SFmode
3711 and DFmode arguments. Warn for mismatching ABI. */
3712 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3716 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3717 But in the case of vector types, it is some vector mode.
3719 When we have only some of our vector isa extensions enabled, then there
3720 are some modes for which vector_mode_supported_p is false. For these
3721 modes, the generic vector support in gcc will choose some non-vector mode
3722 in order to implement the type. By computing the natural mode, we'll
3723 select the proper ABI location for the operand and not depend on whatever
3724 the middle-end decides to do with these vector types. */
3726 static enum machine_mode
3727 type_natural_mode (const_tree type)
3729 enum machine_mode mode = TYPE_MODE (type);
3731 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3733 HOST_WIDE_INT size = int_size_in_bytes (type);
3734 if ((size == 8 || size == 16)
3735 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3736 && TYPE_VECTOR_SUBPARTS (type) > 1)
3738 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3740 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3741 mode = MIN_MODE_VECTOR_FLOAT;
3743 mode = MIN_MODE_VECTOR_INT;
3745 /* Get the mode which has this inner mode and number of units. */
3746 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3747 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3748 && GET_MODE_INNER (mode) == innermode)
3758 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3759 this may not agree with the mode that the type system has chosen for the
3760 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3761 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3764 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3769 if (orig_mode != BLKmode)
3770 tmp = gen_rtx_REG (orig_mode, regno);
3773 tmp = gen_rtx_REG (mode, regno);
3774 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3775 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3781 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3782 of this code is to classify each 8bytes of incoming argument by the register
3783 class and assign registers accordingly. */
3785 /* Return the union class of CLASS1 and CLASS2.
3786 See the x86-64 PS ABI for details. */
3788 static enum x86_64_reg_class
3789 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3791 /* Rule #1: If both classes are equal, this is the resulting class. */
3792 if (class1 == class2)
3795 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3797 if (class1 == X86_64_NO_CLASS)
3799 if (class2 == X86_64_NO_CLASS)
3802 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3803 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3804 return X86_64_MEMORY_CLASS;
3806 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3807 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3808 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3809 return X86_64_INTEGERSI_CLASS;
3810 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3811 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3812 return X86_64_INTEGER_CLASS;
3814 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3816 if (class1 == X86_64_X87_CLASS
3817 || class1 == X86_64_X87UP_CLASS
3818 || class1 == X86_64_COMPLEX_X87_CLASS
3819 || class2 == X86_64_X87_CLASS
3820 || class2 == X86_64_X87UP_CLASS
3821 || class2 == X86_64_COMPLEX_X87_CLASS)
3822 return X86_64_MEMORY_CLASS;
3824 /* Rule #6: Otherwise class SSE is used. */
3825 return X86_64_SSE_CLASS;
3828 /* Classify the argument of type TYPE and mode MODE.
3829 CLASSES will be filled by the register class used to pass each word
3830 of the operand. The number of words is returned. In case the parameter
3831 should be passed in memory, 0 is returned. As a special case for zero
3832 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3834 BIT_OFFSET is used internally for handling records and specifies offset
3835 of the offset in bits modulo 256 to avoid overflow cases.
3837 See the x86-64 PS ABI for details.
3841 classify_argument (enum machine_mode mode, const_tree type,
3842 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3844 HOST_WIDE_INT bytes =
3845 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3846 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3848 /* Variable sized entities are always passed/returned in memory. */
3852 if (mode != VOIDmode
3853 && targetm.calls.must_pass_in_stack (mode, type))
3856 if (type && AGGREGATE_TYPE_P (type))
3860 enum x86_64_reg_class subclasses[MAX_CLASSES];
3862 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3866 for (i = 0; i < words; i++)
3867 classes[i] = X86_64_NO_CLASS;
3869 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3870 signalize memory class, so handle it as special case. */
3873 classes[0] = X86_64_NO_CLASS;
3877 /* Classify each field of record and merge classes. */
3878 switch (TREE_CODE (type))
3881 /* And now merge the fields of structure. */
3882 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3884 if (TREE_CODE (field) == FIELD_DECL)
3888 if (TREE_TYPE (field) == error_mark_node)
3891 /* Bitfields are always classified as integer. Handle them
3892 early, since later code would consider them to be
3893 misaligned integers. */
3894 if (DECL_BIT_FIELD (field))
3896 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3897 i < ((int_bit_position (field) + (bit_offset % 64))
3898 + tree_low_cst (DECL_SIZE (field), 0)
3901 merge_classes (X86_64_INTEGER_CLASS,
3906 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3907 TREE_TYPE (field), subclasses,
3908 (int_bit_position (field)
3909 + bit_offset) % 256);
3912 for (i = 0; i < num; i++)
3915 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3917 merge_classes (subclasses[i], classes[i + pos]);
3925 /* Arrays are handled as small records. */
3928 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3929 TREE_TYPE (type), subclasses, bit_offset);
3933 /* The partial classes are now full classes. */
3934 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3935 subclasses[0] = X86_64_SSE_CLASS;
3936 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3937 subclasses[0] = X86_64_INTEGER_CLASS;
3939 for (i = 0; i < words; i++)
3940 classes[i] = subclasses[i % num];
3945 case QUAL_UNION_TYPE:
3946 /* Unions are similar to RECORD_TYPE but offset is always 0.
3948 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3950 if (TREE_CODE (field) == FIELD_DECL)
3954 if (TREE_TYPE (field) == error_mark_node)
3957 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3958 TREE_TYPE (field), subclasses,
3962 for (i = 0; i < num; i++)
3963 classes[i] = merge_classes (subclasses[i], classes[i]);
3972 /* Final merger cleanup. */
3973 for (i = 0; i < words; i++)
3975 /* If one class is MEMORY, everything should be passed in
3977 if (classes[i] == X86_64_MEMORY_CLASS)
3980 /* The X86_64_SSEUP_CLASS should be always preceded by
3981 X86_64_SSE_CLASS. */
3982 if (classes[i] == X86_64_SSEUP_CLASS
3983 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3984 classes[i] = X86_64_SSE_CLASS;
3986 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3987 if (classes[i] == X86_64_X87UP_CLASS
3988 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3989 classes[i] = X86_64_SSE_CLASS;
3994 /* Compute alignment needed. We align all types to natural boundaries with
3995 exception of XFmode that is aligned to 64bits. */
3996 if (mode != VOIDmode && mode != BLKmode)
3998 int mode_alignment = GET_MODE_BITSIZE (mode);
4001 mode_alignment = 128;
4002 else if (mode == XCmode)
4003 mode_alignment = 256;
4004 if (COMPLEX_MODE_P (mode))
4005 mode_alignment /= 2;
4006 /* Misaligned fields are always returned in memory. */
4007 if (bit_offset % mode_alignment)
4011 /* for V1xx modes, just use the base mode */
4012 if (VECTOR_MODE_P (mode) && mode != V1DImode
4013 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
4014 mode = GET_MODE_INNER (mode);
4016 /* Classification of atomic types. */
4021 classes[0] = X86_64_SSE_CLASS;
4024 classes[0] = X86_64_SSE_CLASS;
4025 classes[1] = X86_64_SSEUP_CLASS;
4034 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4035 classes[0] = X86_64_INTEGERSI_CLASS;
4037 classes[0] = X86_64_INTEGER_CLASS;
4041 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
4046 if (!(bit_offset % 64))
4047 classes[0] = X86_64_SSESF_CLASS;
4049 classes[0] = X86_64_SSE_CLASS;
4052 classes[0] = X86_64_SSEDF_CLASS;
4055 classes[0] = X86_64_X87_CLASS;
4056 classes[1] = X86_64_X87UP_CLASS;
4059 classes[0] = X86_64_SSE_CLASS;
4060 classes[1] = X86_64_SSEUP_CLASS;
4063 classes[0] = X86_64_SSE_CLASS;
4066 classes[0] = X86_64_SSEDF_CLASS;
4067 classes[1] = X86_64_SSEDF_CLASS;
4070 classes[0] = X86_64_COMPLEX_X87_CLASS;
4073 /* This modes is larger than 16 bytes. */
4081 classes[0] = X86_64_SSE_CLASS;
4082 classes[1] = X86_64_SSEUP_CLASS;
4089 classes[0] = X86_64_SSE_CLASS;
4095 gcc_assert (VECTOR_MODE_P (mode));
4100 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
4102 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4103 classes[0] = X86_64_INTEGERSI_CLASS;
4105 classes[0] = X86_64_INTEGER_CLASS;
4106 classes[1] = X86_64_INTEGER_CLASS;
4107 return 1 + (bytes > 8);
4111 /* Examine the argument and return set number of register required in each
4112 class. Return 0 iff parameter should be passed in memory. */
4114 examine_argument (enum machine_mode mode, const_tree type, int in_return,
4115 int *int_nregs, int *sse_nregs)
4117 enum x86_64_reg_class regclass[MAX_CLASSES];
4118 int n = classify_argument (mode, type, regclass, 0);
4124 for (n--; n >= 0; n--)
4125 switch (regclass[n])
4127 case X86_64_INTEGER_CLASS:
4128 case X86_64_INTEGERSI_CLASS:
4131 case X86_64_SSE_CLASS:
4132 case X86_64_SSESF_CLASS:
4133 case X86_64_SSEDF_CLASS:
4136 case X86_64_NO_CLASS:
4137 case X86_64_SSEUP_CLASS:
4139 case X86_64_X87_CLASS:
4140 case X86_64_X87UP_CLASS:
4144 case X86_64_COMPLEX_X87_CLASS:
4145 return in_return ? 2 : 0;
4146 case X86_64_MEMORY_CLASS:
4152 /* Construct container for the argument used by GCC interface. See
4153 FUNCTION_ARG for the detailed description. */
4156 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
4157 const_tree type, int in_return, int nintregs, int nsseregs,
4158 const int *intreg, int sse_regno)
4160 /* The following variables hold the static issued_error state. */
4161 static bool issued_sse_arg_error;
4162 static bool issued_sse_ret_error;
4163 static bool issued_x87_ret_error;
4165 enum machine_mode tmpmode;
4167 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4168 enum x86_64_reg_class regclass[MAX_CLASSES];
4172 int needed_sseregs, needed_intregs;
4173 rtx exp[MAX_CLASSES];
4176 n = classify_argument (mode, type, regclass, 0);
4179 if (!examine_argument (mode, type, in_return, &needed_intregs,
4182 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4185 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
4186 some less clueful developer tries to use floating-point anyway. */
4187 if (needed_sseregs && !TARGET_SSE)
4191 if (!issued_sse_ret_error)
4193 error ("SSE register return with SSE disabled");
4194 issued_sse_ret_error = true;
4197 else if (!issued_sse_arg_error)
4199 error ("SSE register argument with SSE disabled");
4200 issued_sse_arg_error = true;
4205 /* Likewise, error if the ABI requires us to return values in the
4206 x87 registers and the user specified -mno-80387. */
4207 if (!TARGET_80387 && in_return)
4208 for (i = 0; i < n; i++)
4209 if (regclass[i] == X86_64_X87_CLASS
4210 || regclass[i] == X86_64_X87UP_CLASS
4211 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
4213 if (!issued_x87_ret_error)
4215 error ("x87 register return with x87 disabled");
4216 issued_x87_ret_error = true;
4221 /* First construct simple cases. Avoid SCmode, since we want to use
4222 single register to pass this type. */
4223 if (n == 1 && mode != SCmode)
4224 switch (regclass[0])
4226 case X86_64_INTEGER_CLASS:
4227 case X86_64_INTEGERSI_CLASS:
4228 return gen_rtx_REG (mode, intreg[0]);
4229 case X86_64_SSE_CLASS:
4230 case X86_64_SSESF_CLASS:
4231 case X86_64_SSEDF_CLASS:
4232 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4233 case X86_64_X87_CLASS:
4234 case X86_64_COMPLEX_X87_CLASS:
4235 return gen_rtx_REG (mode, FIRST_STACK_REG);
4236 case X86_64_NO_CLASS:
4237 /* Zero sized array, struct or class. */
4242 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4243 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4244 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4247 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4248 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4249 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4250 && regclass[1] == X86_64_INTEGER_CLASS
4251 && (mode == CDImode || mode == TImode || mode == TFmode)
4252 && intreg[0] + 1 == intreg[1])
4253 return gen_rtx_REG (mode, intreg[0]);
4255 /* Otherwise figure out the entries of the PARALLEL. */
4256 for (i = 0; i < n; i++)
4258 switch (regclass[i])
4260 case X86_64_NO_CLASS:
4262 case X86_64_INTEGER_CLASS:
4263 case X86_64_INTEGERSI_CLASS:
4264 /* Merge TImodes on aligned occasions here too. */
4265 if (i * 8 + 8 > bytes)
4266 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4267 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4271 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4272 if (tmpmode == BLKmode)
4274 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4275 gen_rtx_REG (tmpmode, *intreg),
4279 case X86_64_SSESF_CLASS:
4280 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4281 gen_rtx_REG (SFmode,
4282 SSE_REGNO (sse_regno)),
4286 case X86_64_SSEDF_CLASS:
4287 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4288 gen_rtx_REG (DFmode,
4289 SSE_REGNO (sse_regno)),
4293 case X86_64_SSE_CLASS:
4294 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4298 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4299 gen_rtx_REG (tmpmode,
4300 SSE_REGNO (sse_regno)),
4302 if (tmpmode == TImode)
4311 /* Empty aligned struct, union or class. */
4315 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4316 for (i = 0; i < nexps; i++)
4317 XVECEXP (ret, 0, i) = exp [i];
4321 /* Update the data in CUM to advance over an argument of mode MODE
4322 and data type TYPE. (TYPE is null for libcalls where that information
4323 may not be available.) */
4326 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4327 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4343 cum->words += words;
4344 cum->nregs -= words;
4345 cum->regno += words;
4347 if (cum->nregs <= 0)
4355 if (cum->float_in_sse < 2)
4358 if (cum->float_in_sse < 1)
4369 if (!type || !AGGREGATE_TYPE_P (type))
4371 cum->sse_words += words;
4372 cum->sse_nregs -= 1;
4373 cum->sse_regno += 1;
4374 if (cum->sse_nregs <= 0)
4387 if (!type || !AGGREGATE_TYPE_P (type))
4389 cum->mmx_words += words;
4390 cum->mmx_nregs -= 1;
4391 cum->mmx_regno += 1;
4392 if (cum->mmx_nregs <= 0)
4403 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4404 tree type, HOST_WIDE_INT words)
4406 int int_nregs, sse_nregs;
4408 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4409 cum->words += words;
4410 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4412 cum->nregs -= int_nregs;
4413 cum->sse_nregs -= sse_nregs;
4414 cum->regno += int_nregs;
4415 cum->sse_regno += sse_nregs;
4418 cum->words += words;
4422 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4423 HOST_WIDE_INT words)
4425 /* Otherwise, this should be passed indirect. */
4426 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4428 cum->words += words;
4437 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4438 tree type, int named ATTRIBUTE_UNUSED)
4440 HOST_WIDE_INT bytes, words;
4442 if (mode == BLKmode)
4443 bytes = int_size_in_bytes (type);
4445 bytes = GET_MODE_SIZE (mode);
4446 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4449 mode = type_natural_mode (type);
4451 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4452 function_arg_advance_ms_64 (cum, bytes, words);
4453 else if (TARGET_64BIT)
4454 function_arg_advance_64 (cum, mode, type, words);
4456 function_arg_advance_32 (cum, mode, type, bytes, words);
4459 /* Define where to put the arguments to a function.
4460 Value is zero to push the argument on the stack,
4461 or a hard register in which to store the argument.
4463 MODE is the argument's machine mode.
4464 TYPE is the data type of the argument (as a tree).
4465 This is null for libcalls where that information may
4467 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4468 the preceding args and about the function being called.
4469 NAMED is nonzero if this argument is a named parameter
4470 (otherwise it is an extra parameter matching an ellipsis). */
4473 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4474 enum machine_mode orig_mode, tree type,
4475 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4477 static bool warnedsse, warnedmmx;
4479 /* Avoid the AL settings for the Unix64 ABI. */
4480 if (mode == VOIDmode)
4496 if (words <= cum->nregs)
4498 int regno = cum->regno;
4500 /* Fastcall allocates the first two DWORD (SImode) or
4501 smaller arguments to ECX and EDX if it isn't an
4507 || (type && AGGREGATE_TYPE_P (type)))
4510 /* ECX not EAX is the first allocated register. */
4511 if (regno == AX_REG)
4514 return gen_rtx_REG (mode, regno);
4519 if (cum->float_in_sse < 2)
4522 if (cum->float_in_sse < 1)
4532 if (!type || !AGGREGATE_TYPE_P (type))
4534 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4537 warning (0, "SSE vector argument without SSE enabled "
4541 return gen_reg_or_parallel (mode, orig_mode,
4542 cum->sse_regno + FIRST_SSE_REG);
4551 if (!type || !AGGREGATE_TYPE_P (type))
4553 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4556 warning (0, "MMX vector argument without MMX enabled "
4560 return gen_reg_or_parallel (mode, orig_mode,
4561 cum->mmx_regno + FIRST_MMX_REG);
4570 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4571 enum machine_mode orig_mode, tree type)
4573 /* Handle a hidden AL argument containing number of registers
4574 for varargs x86-64 functions. */
4575 if (mode == VOIDmode)
4576 return GEN_INT (cum->maybe_vaarg
4577 ? (cum->sse_nregs < 0
4578 ? (cum->call_abi == DEFAULT_ABI
4580 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4581 : X64_SSE_REGPARM_MAX))
4585 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4587 &x86_64_int_parameter_registers [cum->regno],
4592 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4593 enum machine_mode orig_mode, int named,
4594 HOST_WIDE_INT bytes)
4598 /* Avoid the AL settings for the Unix64 ABI. */
4599 if (mode == VOIDmode)
4602 /* If we've run out of registers, it goes on the stack. */
4603 if (cum->nregs == 0)
4606 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4608 /* Only floating point modes are passed in anything but integer regs. */
4609 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4612 regno = cum->regno + FIRST_SSE_REG;
4617 /* Unnamed floating parameters are passed in both the
4618 SSE and integer registers. */
4619 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4620 t2 = gen_rtx_REG (mode, regno);
4621 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4622 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4623 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4626 /* Handle aggregated types passed in register. */
4627 if (orig_mode == BLKmode)
4629 if (bytes > 0 && bytes <= 8)
4630 mode = (bytes > 4 ? DImode : SImode);
4631 if (mode == BLKmode)
4635 return gen_reg_or_parallel (mode, orig_mode, regno);
4639 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4640 tree type, int named)
4642 enum machine_mode mode = omode;
4643 HOST_WIDE_INT bytes, words;
4645 if (mode == BLKmode)
4646 bytes = int_size_in_bytes (type);
4648 bytes = GET_MODE_SIZE (mode);
4649 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4651 /* To simplify the code below, represent vector types with a vector mode
4652 even if MMX/SSE are not active. */
4653 if (type && TREE_CODE (type) == VECTOR_TYPE)
4654 mode = type_natural_mode (type);
4656 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4657 return function_arg_ms_64 (cum, mode, omode, named, bytes);
4658 else if (TARGET_64BIT)
4659 return function_arg_64 (cum, mode, omode, type);
4661 return function_arg_32 (cum, mode, omode, type, bytes, words);
4664 /* A C expression that indicates when an argument must be passed by
4665 reference. If nonzero for an argument, a copy of that argument is
4666 made in memory and a pointer to the argument is passed instead of
4667 the argument itself. The pointer is passed in whatever way is
4668 appropriate for passing a pointer to that type. */
4671 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4672 enum machine_mode mode ATTRIBUTE_UNUSED,
4673 const_tree type, bool named ATTRIBUTE_UNUSED)
4675 /* See Windows x64 Software Convention. */
4676 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4678 int msize = (int) GET_MODE_SIZE (mode);
4681 /* Arrays are passed by reference. */
4682 if (TREE_CODE (type) == ARRAY_TYPE)
4685 if (AGGREGATE_TYPE_P (type))
4687 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4688 are passed by reference. */
4689 msize = int_size_in_bytes (type);
4693 /* __m128 is passed by reference. */
4695 case 1: case 2: case 4: case 8:
4701 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4707 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4710 contains_aligned_value_p (tree type)
4712 enum machine_mode mode = TYPE_MODE (type);
4713 if (((TARGET_SSE && SSE_REG_MODE_P (mode)) || mode == TDmode)
4714 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4716 if (TYPE_ALIGN (type) < 128)
4719 if (AGGREGATE_TYPE_P (type))
4721 /* Walk the aggregates recursively. */
4722 switch (TREE_CODE (type))
4726 case QUAL_UNION_TYPE:
4730 /* Walk all the structure fields. */
4731 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4733 if (TREE_CODE (field) == FIELD_DECL
4734 && contains_aligned_value_p (TREE_TYPE (field)))
4741 /* Just for use if some languages passes arrays by value. */
4742 if (contains_aligned_value_p (TREE_TYPE (type)))
4753 /* Gives the alignment boundary, in bits, of an argument with the
4754 specified mode and type. */
4757 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4762 /* Since canonical type is used for call, we convert it to
4763 canonical type if needed. */
4764 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
4765 type = TYPE_CANONICAL (type);
4766 align = TYPE_ALIGN (type);
4769 align = GET_MODE_ALIGNMENT (mode);
4770 if (align < PARM_BOUNDARY)
4771 align = PARM_BOUNDARY;
4772 /* In 32bit, only _Decimal128 is aligned to its natural boundary. */
4773 if (!TARGET_64BIT && mode != TDmode)
4775 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4776 make an exception for SSE modes since these require 128bit
4779 The handling here differs from field_alignment. ICC aligns MMX
4780 arguments to 4 byte boundaries, while structure fields are aligned
4781 to 8 byte boundaries. */
4784 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)) && mode != TDmode)
4785 align = PARM_BOUNDARY;
4789 if (!contains_aligned_value_p (type))
4790 align = PARM_BOUNDARY;
4793 if (align > BIGGEST_ALIGNMENT)
4794 align = BIGGEST_ALIGNMENT;
4798 /* Return true if N is a possible register number of function value. */
4801 ix86_function_value_regno_p (int regno)
4808 case FIRST_FLOAT_REG:
4809 /* TODO: The function should depend on current function ABI but
4810 builtins.c would need updating then. Therefore we use the
4812 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
4814 return TARGET_FLOAT_RETURNS_IN_80387;
4820 if (TARGET_MACHO || TARGET_64BIT)
4828 /* Define how to find the value returned by a function.
4829 VALTYPE is the data type of the value (as a tree).
4830 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4831 otherwise, FUNC is 0. */
4834 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4835 const_tree fntype, const_tree fn)
4839 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4840 we normally prevent this case when mmx is not available. However
4841 some ABIs may require the result to be returned like DImode. */
4842 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4843 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4845 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4846 we prevent this case when sse is not available. However some ABIs
4847 may require the result to be returned like integer TImode. */
4848 else if (mode == TImode
4849 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4850 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4852 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4853 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4854 regno = FIRST_FLOAT_REG;
4856 /* Most things go in %eax. */
4859 /* Override FP return register with %xmm0 for local functions when
4860 SSE math is enabled or for functions with sseregparm attribute. */
4861 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4863 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4864 if ((sse_level >= 1 && mode == SFmode)
4865 || (sse_level == 2 && mode == DFmode))
4866 regno = FIRST_SSE_REG;
4869 return gen_rtx_REG (orig_mode, regno);
4873 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4878 /* Handle libcalls, which don't provide a type node. */
4879 if (valtype == NULL)
4891 return gen_rtx_REG (mode, FIRST_SSE_REG);
4894 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4898 return gen_rtx_REG (mode, AX_REG);
4902 ret = construct_container (mode, orig_mode, valtype, 1,
4903 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4904 x86_64_int_return_registers, 0);
4906 /* For zero sized structures, construct_container returns NULL, but we
4907 need to keep rest of compiler happy by returning meaningful value. */
4909 ret = gen_rtx_REG (orig_mode, AX_REG);
4915 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4917 unsigned int regno = AX_REG;
4921 switch (GET_MODE_SIZE (mode))
4924 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4925 && !COMPLEX_MODE_P (mode))
4926 regno = FIRST_SSE_REG;
4930 if (mode == SFmode || mode == DFmode)
4931 regno = FIRST_SSE_REG;
4937 return gen_rtx_REG (orig_mode, regno);
4941 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4942 enum machine_mode orig_mode, enum machine_mode mode)
4944 const_tree fn, fntype;
4947 if (fntype_or_decl && DECL_P (fntype_or_decl))
4948 fn = fntype_or_decl;
4949 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4951 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
4952 return function_value_ms_64 (orig_mode, mode);
4953 else if (TARGET_64BIT)
4954 return function_value_64 (orig_mode, mode, valtype);
4956 return function_value_32 (orig_mode, mode, fntype, fn);
4960 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4961 bool outgoing ATTRIBUTE_UNUSED)
4963 enum machine_mode mode, orig_mode;
4965 orig_mode = TYPE_MODE (valtype);
4966 mode = type_natural_mode (valtype);
4967 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4971 ix86_libcall_value (enum machine_mode mode)
4973 return ix86_function_value_1 (NULL, NULL, mode, mode);
4976 /* Return true iff type is returned in memory. */
4978 static int ATTRIBUTE_UNUSED
4979 return_in_memory_32 (const_tree type, enum machine_mode mode)
4983 if (mode == BLKmode)
4986 size = int_size_in_bytes (type);
4988 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4991 if (VECTOR_MODE_P (mode) || mode == TImode)
4993 /* User-created vectors small enough to fit in EAX. */
4997 /* MMX/3dNow values are returned in MM0,
4998 except when it doesn't exits. */
5000 return (TARGET_MMX ? 0 : 1);
5002 /* SSE values are returned in XMM0, except when it doesn't exist. */
5004 return (TARGET_SSE ? 0 : 1);
5018 static int ATTRIBUTE_UNUSED
5019 return_in_memory_64 (const_tree type, enum machine_mode mode)
5021 int needed_intregs, needed_sseregs;
5022 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
5025 static int ATTRIBUTE_UNUSED
5026 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
5028 HOST_WIDE_INT size = int_size_in_bytes (type);
5030 /* __m128 is returned in xmm0. */
5031 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5032 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
5035 /* Otherwise, the size must be exactly in [1248]. */
5036 return (size != 1 && size != 2 && size != 4 && size != 8);
5040 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5042 #ifdef SUBTARGET_RETURN_IN_MEMORY
5043 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
5045 const enum machine_mode mode = type_natural_mode (type);
5047 if (TARGET_64BIT_MS_ABI)
5048 return return_in_memory_ms_64 (type, mode);
5049 else if (TARGET_64BIT)
5050 return return_in_memory_64 (type, mode);
5052 return return_in_memory_32 (type, mode);
5056 /* Return false iff TYPE is returned in memory. This version is used
5057 on Solaris 10. It is similar to the generic ix86_return_in_memory,
5058 but differs notably in that when MMX is available, 8-byte vectors
5059 are returned in memory, rather than in MMX registers. */
5062 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5065 enum machine_mode mode = type_natural_mode (type);
5068 return return_in_memory_64 (type, mode);
5070 if (mode == BLKmode)
5073 size = int_size_in_bytes (type);
5075 if (VECTOR_MODE_P (mode))
5077 /* Return in memory only if MMX registers *are* available. This
5078 seems backwards, but it is consistent with the existing
5085 else if (mode == TImode)
5087 else if (mode == XFmode)
5093 /* When returning SSE vector types, we have a choice of either
5094 (1) being abi incompatible with a -march switch, or
5095 (2) generating an error.
5096 Given no good solution, I think the safest thing is one warning.
5097 The user won't be able to use -Werror, but....
5099 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
5100 called in response to actually generating a caller or callee that
5101 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
5102 via aggregate_value_p for general type probing from tree-ssa. */
5105 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
5107 static bool warnedsse, warnedmmx;
5109 if (!TARGET_64BIT && type)
5111 /* Look at the return type of the function, not the function type. */
5112 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
5114 if (!TARGET_SSE && !warnedsse)
5117 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5120 warning (0, "SSE vector return without SSE enabled "
5125 if (!TARGET_MMX && !warnedmmx)
5127 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5130 warning (0, "MMX vector return without MMX enabled "
5140 /* Create the va_list data type. */
5143 ix86_build_builtin_va_list (void)
5145 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
5147 /* For i386 we use plain pointer to argument area. */
5148 if (!TARGET_64BIT || ix86_cfun_abi () == MS_ABI)
5149 return build_pointer_type (char_type_node);
5151 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5152 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5154 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
5155 unsigned_type_node);
5156 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
5157 unsigned_type_node);
5158 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
5160 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
5163 va_list_gpr_counter_field = f_gpr;
5164 va_list_fpr_counter_field = f_fpr;
5166 DECL_FIELD_CONTEXT (f_gpr) = record;
5167 DECL_FIELD_CONTEXT (f_fpr) = record;
5168 DECL_FIELD_CONTEXT (f_ovf) = record;
5169 DECL_FIELD_CONTEXT (f_sav) = record;
5171 TREE_CHAIN (record) = type_decl;
5172 TYPE_NAME (record) = type_decl;
5173 TYPE_FIELDS (record) = f_gpr;
5174 TREE_CHAIN (f_gpr) = f_fpr;
5175 TREE_CHAIN (f_fpr) = f_ovf;
5176 TREE_CHAIN (f_ovf) = f_sav;
5178 layout_type (record);
5180 /* The correct type is an array type of one element. */
5181 return build_array_type (record, build_index_type (size_zero_node));
5184 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
5187 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
5196 int regparm = ix86_regparm;
5198 if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
5199 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
5201 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
5204 /* Indicate to allocate space on the stack for varargs save area. */
5205 ix86_save_varrargs_registers = 1;
5206 /* We need 16-byte stack alignment to save SSE registers. If user
5207 asked for lower preferred_stack_boundary, lets just hope that he knows
5208 what he is doing and won't varargs SSE values.
5210 We also may end up assuming that only 64bit values are stored in SSE
5211 register let some floating point program work. */
5212 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
5213 crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
5215 save_area = frame_pointer_rtx;
5216 set = get_varargs_alias_set ();
5218 for (i = cum->regno;
5220 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
5223 mem = gen_rtx_MEM (Pmode,
5224 plus_constant (save_area, i * UNITS_PER_WORD));
5225 MEM_NOTRAP_P (mem) = 1;
5226 set_mem_alias_set (mem, set);
5227 emit_move_insn (mem, gen_rtx_REG (Pmode,
5228 x86_64_int_parameter_registers[i]));
5231 if (cum->sse_nregs && cfun->va_list_fpr_size)
5233 /* Now emit code to save SSE registers. The AX parameter contains number
5234 of SSE parameter registers used to call this function. We use
5235 sse_prologue_save insn template that produces computed jump across
5236 SSE saves. We need some preparation work to get this working. */
5238 label = gen_label_rtx ();
5239 label_ref = gen_rtx_LABEL_REF (Pmode, label);
5241 /* Compute address to jump to :
5242 label - 5*eax + nnamed_sse_arguments*5 */
5243 tmp_reg = gen_reg_rtx (Pmode);
5244 nsse_reg = gen_reg_rtx (Pmode);
5245 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
5246 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5247 gen_rtx_MULT (Pmode, nsse_reg,
5252 gen_rtx_CONST (DImode,
5253 gen_rtx_PLUS (DImode,
5255 GEN_INT (cum->sse_regno * 4))));
5257 emit_move_insn (nsse_reg, label_ref);
5258 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5260 /* Compute address of memory block we save into. We always use pointer
5261 pointing 127 bytes after first byte to store - this is needed to keep
5262 instruction size limited by 4 bytes. */
5263 tmp_reg = gen_reg_rtx (Pmode);
5264 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5265 plus_constant (save_area,
5266 8 * X86_64_REGPARM_MAX + 127)));
5267 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5268 MEM_NOTRAP_P (mem) = 1;
5269 set_mem_alias_set (mem, set);
5270 set_mem_align (mem, BITS_PER_WORD);
5272 /* And finally do the dirty job! */
5273 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5274 GEN_INT (cum->sse_regno), label));
5279 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5281 alias_set_type set = get_varargs_alias_set ();
5284 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
5288 mem = gen_rtx_MEM (Pmode,
5289 plus_constant (virtual_incoming_args_rtx,
5290 i * UNITS_PER_WORD));
5291 MEM_NOTRAP_P (mem) = 1;
5292 set_mem_alias_set (mem, set);
5294 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5295 emit_move_insn (mem, reg);
5300 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5301 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5304 CUMULATIVE_ARGS next_cum;
5307 /* This argument doesn't appear to be used anymore. Which is good,
5308 because the old code here didn't suppress rtl generation. */
5309 gcc_assert (!no_rtl);
5314 fntype = TREE_TYPE (current_function_decl);
5316 /* For varargs, we do not want to skip the dummy va_dcl argument.
5317 For stdargs, we do want to skip the last named argument. */
5319 if (stdarg_p (fntype))
5320 function_arg_advance (&next_cum, mode, type, 1);
5322 if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5323 setup_incoming_varargs_ms_64 (&next_cum);
5325 setup_incoming_varargs_64 (&next_cum);
5328 /* Implement va_start. */
5331 ix86_va_start (tree valist, rtx nextarg)
5333 HOST_WIDE_INT words, n_gpr, n_fpr;
5334 tree f_gpr, f_fpr, f_ovf, f_sav;
5335 tree gpr, fpr, ovf, sav, t;
5338 /* Only 64bit target needs something special. */
5339 if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
5341 std_expand_builtin_va_start (valist, nextarg);
5345 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5346 f_fpr = TREE_CHAIN (f_gpr);
5347 f_ovf = TREE_CHAIN (f_fpr);
5348 f_sav = TREE_CHAIN (f_ovf);
5350 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5351 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5352 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5353 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5354 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5356 /* Count number of gp and fp argument registers used. */
5357 words = crtl->args.info.words;
5358 n_gpr = crtl->args.info.regno;
5359 n_fpr = crtl->args.info.sse_regno;
5361 if (cfun->va_list_gpr_size)
5363 type = TREE_TYPE (gpr);
5364 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5365 build_int_cst (type, n_gpr * 8));
5366 TREE_SIDE_EFFECTS (t) = 1;
5367 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5370 if (cfun->va_list_fpr_size)
5372 type = TREE_TYPE (fpr);
5373 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5374 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
5375 TREE_SIDE_EFFECTS (t) = 1;
5376 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5379 /* Find the overflow area. */
5380 type = TREE_TYPE (ovf);
5381 t = make_tree (type, virtual_incoming_args_rtx);
5383 t = build2 (POINTER_PLUS_EXPR, type, t,
5384 size_int (words * UNITS_PER_WORD));
5385 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5386 TREE_SIDE_EFFECTS (t) = 1;
5387 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5389 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5391 /* Find the register save area.
5392 Prologue of the function save it right above stack frame. */
5393 type = TREE_TYPE (sav);
5394 t = make_tree (type, frame_pointer_rtx);
5395 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5396 TREE_SIDE_EFFECTS (t) = 1;
5397 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5401 /* Implement va_arg. */
5404 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5406 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5407 tree f_gpr, f_fpr, f_ovf, f_sav;
5408 tree gpr, fpr, ovf, sav, t;
5410 tree lab_false, lab_over = NULL_TREE;
5415 enum machine_mode nat_mode;
5417 /* Only 64bit target needs something special. */
5418 if (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI)
5419 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5421 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
5422 f_fpr = TREE_CHAIN (f_gpr);
5423 f_ovf = TREE_CHAIN (f_fpr);
5424 f_sav = TREE_CHAIN (f_ovf);
5426 valist = build_va_arg_indirect_ref (valist);
5427 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5428 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5429 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5430 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5432 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5434 type = build_pointer_type (type);
5435 size = int_size_in_bytes (type);
5436 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5438 nat_mode = type_natural_mode (type);
5439 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5440 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
5443 /* Pull the value out of the saved registers. */
5445 addr = create_tmp_var (ptr_type_node, "addr");
5446 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5450 int needed_intregs, needed_sseregs;
5452 tree int_addr, sse_addr;
5454 lab_false = create_artificial_label ();
5455 lab_over = create_artificial_label ();
5457 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5459 need_temp = (!REG_P (container)
5460 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5461 || TYPE_ALIGN (type) > 128));
5463 /* In case we are passing structure, verify that it is consecutive block
5464 on the register save area. If not we need to do moves. */
5465 if (!need_temp && !REG_P (container))
5467 /* Verify that all registers are strictly consecutive */
5468 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5472 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5474 rtx slot = XVECEXP (container, 0, i);
5475 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5476 || INTVAL (XEXP (slot, 1)) != i * 16)
5484 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5486 rtx slot = XVECEXP (container, 0, i);
5487 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5488 || INTVAL (XEXP (slot, 1)) != i * 8)
5500 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5501 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5502 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5503 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5506 /* First ensure that we fit completely in registers. */
5509 t = build_int_cst (TREE_TYPE (gpr),
5510 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
5511 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5512 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5513 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5514 gimplify_and_add (t, pre_p);
5518 t = build_int_cst (TREE_TYPE (fpr),
5519 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5520 + X86_64_REGPARM_MAX * 8);
5521 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5522 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5523 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5524 gimplify_and_add (t, pre_p);
5527 /* Compute index to start of area used for integer regs. */
5530 /* int_addr = gpr + sav; */
5531 t = fold_convert (sizetype, gpr);
5532 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5533 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5534 gimplify_and_add (t, pre_p);
5538 /* sse_addr = fpr + sav; */
5539 t = fold_convert (sizetype, fpr);
5540 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5541 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5542 gimplify_and_add (t, pre_p);
5547 tree temp = create_tmp_var (type, "va_arg_tmp");
5550 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5551 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5552 gimplify_and_add (t, pre_p);
5554 for (i = 0; i < XVECLEN (container, 0); i++)
5556 rtx slot = XVECEXP (container, 0, i);
5557 rtx reg = XEXP (slot, 0);
5558 enum machine_mode mode = GET_MODE (reg);
5559 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5560 tree addr_type = build_pointer_type (piece_type);
5563 tree dest_addr, dest;
5565 if (SSE_REGNO_P (REGNO (reg)))
5567 src_addr = sse_addr;
5568 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5572 src_addr = int_addr;
5573 src_offset = REGNO (reg) * 8;
5575 src_addr = fold_convert (addr_type, src_addr);
5576 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5577 size_int (src_offset));
5578 src = build_va_arg_indirect_ref (src_addr);
5580 dest_addr = fold_convert (addr_type, addr);
5581 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5582 size_int (INTVAL (XEXP (slot, 1))));
5583 dest = build_va_arg_indirect_ref (dest_addr);
5585 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5586 gimplify_and_add (t, pre_p);
5592 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5593 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5594 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5595 gimplify_and_add (t, pre_p);
5599 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5600 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5601 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5602 gimplify_and_add (t, pre_p);
5605 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5606 gimplify_and_add (t, pre_p);
5608 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5609 append_to_statement_list (t, pre_p);
5612 /* ... otherwise out of the overflow area. */
5614 /* Care for on-stack alignment if needed. */
5615 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5616 || integer_zerop (TYPE_SIZE (type)))
5620 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5621 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5622 size_int (align - 1));
5623 t = fold_convert (sizetype, t);
5624 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5626 t = fold_convert (TREE_TYPE (ovf), t);
5628 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5630 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5631 gimplify_and_add (t2, pre_p);
5633 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5634 size_int (rsize * UNITS_PER_WORD));
5635 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5636 gimplify_and_add (t, pre_p);
5640 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5641 append_to_statement_list (t, pre_p);
5644 ptrtype = build_pointer_type (type);
5645 addr = fold_convert (ptrtype, addr);
5648 addr = build_va_arg_indirect_ref (addr);
5649 return build_va_arg_indirect_ref (addr);
5652 /* Return nonzero if OPNUM's MEM should be matched
5653 in movabs* patterns. */
5656 ix86_check_movabs (rtx insn, int opnum)
5660 set = PATTERN (insn);
5661 if (GET_CODE (set) == PARALLEL)
5662 set = XVECEXP (set, 0, 0);
5663 gcc_assert (GET_CODE (set) == SET);
5664 mem = XEXP (set, opnum);
5665 while (GET_CODE (mem) == SUBREG)
5666 mem = SUBREG_REG (mem);
5667 gcc_assert (MEM_P (mem));
5668 return (volatile_ok || !MEM_VOLATILE_P (mem));
5671 /* Initialize the table of extra 80387 mathematical constants. */
5674 init_ext_80387_constants (void)
5676 static const char * cst[5] =
5678 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5679 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5680 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5681 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5682 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5686 for (i = 0; i < 5; i++)
5688 real_from_string (&ext_80387_constants_table[i], cst[i]);
5689 /* Ensure each constant is rounded to XFmode precision. */
5690 real_convert (&ext_80387_constants_table[i],
5691 XFmode, &ext_80387_constants_table[i]);
5694 ext_80387_constants_init = 1;
5697 /* Return true if the constant is something that can be loaded with
5698 a special instruction. */
5701 standard_80387_constant_p (rtx x)
5703 enum machine_mode mode = GET_MODE (x);
5707 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5710 if (x == CONST0_RTX (mode))
5712 if (x == CONST1_RTX (mode))
5715 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5717 /* For XFmode constants, try to find a special 80387 instruction when
5718 optimizing for size or on those CPUs that benefit from them. */
5720 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5724 if (! ext_80387_constants_init)
5725 init_ext_80387_constants ();
5727 for (i = 0; i < 5; i++)
5728 if (real_identical (&r, &ext_80387_constants_table[i]))
5732 /* Load of the constant -0.0 or -1.0 will be split as
5733 fldz;fchs or fld1;fchs sequence. */
5734 if (real_isnegzero (&r))
5736 if (real_identical (&r, &dconstm1))
5742 /* Return the opcode of the special instruction to be used to load
5746 standard_80387_constant_opcode (rtx x)
5748 switch (standard_80387_constant_p (x))
5772 /* Return the CONST_DOUBLE representing the 80387 constant that is
5773 loaded by the specified special instruction. The argument IDX
5774 matches the return value from standard_80387_constant_p. */
5777 standard_80387_constant_rtx (int idx)
5781 if (! ext_80387_constants_init)
5782 init_ext_80387_constants ();
5798 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5802 /* Return 1 if mode is a valid mode for sse. */
5804 standard_sse_mode_p (enum machine_mode mode)
5821 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5824 standard_sse_constant_p (rtx x)
5826 enum machine_mode mode = GET_MODE (x);
5828 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5830 if (vector_all_ones_operand (x, mode)
5831 && standard_sse_mode_p (mode))
5832 return TARGET_SSE2 ? 2 : -1;
5837 /* Return the opcode of the special instruction to be used to load
5841 standard_sse_constant_opcode (rtx insn, rtx x)
5843 switch (standard_sse_constant_p (x))
5846 if (get_attr_mode (insn) == MODE_V4SF)
5847 return "xorps\t%0, %0";
5848 else if (get_attr_mode (insn) == MODE_V2DF)
5849 return "xorpd\t%0, %0";
5851 return "pxor\t%0, %0";
5853 return "pcmpeqd\t%0, %0";
5858 /* Returns 1 if OP contains a symbol reference */
5861 symbolic_reference_mentioned_p (rtx op)
5866 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5869 fmt = GET_RTX_FORMAT (GET_CODE (op));
5870 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5876 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5877 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5881 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5888 /* Return 1 if it is appropriate to emit `ret' instructions in the
5889 body of a function. Do this only if the epilogue is simple, needing a
5890 couple of insns. Prior to reloading, we can't tell how many registers
5891 must be saved, so return 0 then. Return 0 if there is no frame
5892 marker to de-allocate. */
5895 ix86_can_use_return_insn_p (void)
5897 struct ix86_frame frame;
5899 if (! reload_completed || frame_pointer_needed)
5902 /* Don't allow more than 32 pop, since that's all we can do
5903 with one instruction. */
5904 if (crtl->args.pops_args
5905 && crtl->args.size >= 32768)
5908 ix86_compute_frame_layout (&frame);
5909 return frame.to_allocate == 0 && frame.nregs == 0;
5912 /* Value should be nonzero if functions must have frame pointers.
5913 Zero means the frame pointer need not be set up (and parms may
5914 be accessed via the stack pointer) in functions that seem suitable. */
5917 ix86_frame_pointer_required (void)
5919 /* If we accessed previous frames, then the generated code expects
5920 to be able to access the saved ebp value in our frame. */
5921 if (cfun->machine->accesses_prev_frame)
5924 /* Several x86 os'es need a frame pointer for other reasons,
5925 usually pertaining to setjmp. */
5926 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5929 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5930 the frame pointer by default. Turn it back on now if we've not
5931 got a leaf function. */
5932 if (TARGET_OMIT_LEAF_FRAME_POINTER
5933 && (!current_function_is_leaf
5934 || ix86_current_function_calls_tls_descriptor))
5943 /* Record that the current function accesses previous call frames. */
5946 ix86_setup_frame_addresses (void)
5948 cfun->machine->accesses_prev_frame = 1;
5951 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5952 # define USE_HIDDEN_LINKONCE 1
5954 # define USE_HIDDEN_LINKONCE 0
5957 static int pic_labels_used;
5959 /* Fills in the label name that should be used for a pc thunk for
5960 the given register. */
5963 get_pc_thunk_name (char name[32], unsigned int regno)
5965 gcc_assert (!TARGET_64BIT);
5967 if (USE_HIDDEN_LINKONCE)
5968 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5970 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5974 /* This function generates code for -fpic that loads %ebx with
5975 the return address of the caller and then returns. */
5978 ix86_file_end (void)
5983 for (regno = 0; regno < 8; ++regno)
5987 if (! ((pic_labels_used >> regno) & 1))
5990 get_pc_thunk_name (name, regno);
5995 switch_to_section (darwin_sections[text_coal_section]);
5996 fputs ("\t.weak_definition\t", asm_out_file);
5997 assemble_name (asm_out_file, name);
5998 fputs ("\n\t.private_extern\t", asm_out_file);
5999 assemble_name (asm_out_file, name);
6000 fputs ("\n", asm_out_file);
6001 ASM_OUTPUT_LABEL (asm_out_file, name);
6005 if (USE_HIDDEN_LINKONCE)
6009 decl = build_decl (FUNCTION_DECL, get_identifier (name),
6011 TREE_PUBLIC (decl) = 1;
6012 TREE_STATIC (decl) = 1;
6013 DECL_ONE_ONLY (decl) = 1;
6015 (*targetm.asm_out.unique_section) (decl, 0);
6016 switch_to_section (get_named_section (decl, NULL, 0));
6018 (*targetm.asm_out.globalize_label) (asm_out_file, name);
6019 fputs ("\t.hidden\t", asm_out_file);
6020 assemble_name (asm_out_file, name);
6021 fputc ('\n', asm_out_file);
6022 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6026 switch_to_section (text_section);
6027 ASM_OUTPUT_LABEL (asm_out_file, name);
6030 xops[0] = gen_rtx_REG (Pmode, regno);
6031 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6033 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
6035 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
6036 output_asm_insn ("ret", xops);
6039 if (NEED_INDICATE_EXEC_STACK)
6040 file_end_indicate_exec_stack ();
6043 /* Emit code for the SET_GOT patterns. */
6046 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
6052 if (TARGET_VXWORKS_RTP && flag_pic)
6054 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6055 xops[2] = gen_rtx_MEM (Pmode,
6056 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6057 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6059 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6060 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6061 an unadorned address. */
6062 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6063 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6064 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6068 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6070 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
6072 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6077 output_asm_insn ("mov{q}\t{%2, %0|%0, %2}", xops);
6079 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6082 output_asm_insn ("call\t%a2", xops);
6085 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
6086 is what will be referenced by the Mach-O PIC subsystem. */
6088 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
6091 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6092 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6097 output_asm_insn ("pop{q}\t%0", xops);
6099 output_asm_insn ("pop{l}\t%0", xops);
6105 get_pc_thunk_name (name, REGNO (dest));
6106 pic_labels_used |= 1 << REGNO (dest);
6108 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6109 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6110 output_asm_insn ("call\t%X2", xops);
6111 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
6112 is what will be referenced by the Mach-O PIC subsystem. */
6115 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
6117 targetm.asm_out.internal_label (asm_out_file, "L",
6118 CODE_LABEL_NUMBER (label));
6125 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
6128 output_asm_insn ("add{q}\t{%1, %0|%0, %1}", xops);
6130 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
6135 output_asm_insn ("add{q}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
6137 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
6143 /* Generate an "push" pattern for input ARG. */
6148 return gen_rtx_SET (VOIDmode,
6150 gen_rtx_PRE_DEC (Pmode,
6151 stack_pointer_rtx)),
6155 /* Return >= 0 if there is an unused call-clobbered register available
6156 for the entire function. */
6159 ix86_select_alt_pic_regnum (void)
6161 if (current_function_is_leaf && !crtl->profile
6162 && !ix86_current_function_calls_tls_descriptor)
6165 for (i = 2; i >= 0; --i)
6166 if (!df_regs_ever_live_p (i))
6170 return INVALID_REGNUM;
6173 /* Return 1 if we need to save REGNO. */
6175 ix86_save_reg (unsigned int regno, int maybe_eh_return)
6177 if (pic_offset_table_rtx
6178 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6179 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6181 || crtl->calls_eh_return
6182 || crtl->uses_const_pool))
6184 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
6189 if (crtl->calls_eh_return && maybe_eh_return)
6194 unsigned test = EH_RETURN_DATA_REGNO (i);
6195 if (test == INVALID_REGNUM)
6202 if (cfun->machine->force_align_arg_pointer
6203 && regno == REGNO (cfun->machine->force_align_arg_pointer))
6206 return (df_regs_ever_live_p (regno)
6207 && !call_used_regs[regno]
6208 && !fixed_regs[regno]
6209 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6212 /* Return number of registers to be saved on the stack. */
6215 ix86_nsaved_regs (void)
6220 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
6221 if (ix86_save_reg (regno, true))
6226 /* Return the offset between two registers, one to be eliminated, and the other
6227 its replacement, at the start of a routine. */
6230 ix86_initial_elimination_offset (int from, int to)
6232 struct ix86_frame frame;
6233 ix86_compute_frame_layout (&frame);
6235 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6236 return frame.hard_frame_pointer_offset;
6237 else if (from == FRAME_POINTER_REGNUM
6238 && to == HARD_FRAME_POINTER_REGNUM)
6239 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6242 gcc_assert (to == STACK_POINTER_REGNUM);
6244 if (from == ARG_POINTER_REGNUM)
6245 return frame.stack_pointer_offset;
6247 gcc_assert (from == FRAME_POINTER_REGNUM);
6248 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6252 /* Fill structure ix86_frame about frame of currently computed function. */
6255 ix86_compute_frame_layout (struct ix86_frame *frame)
6257 HOST_WIDE_INT total_size;
6258 unsigned int stack_alignment_needed;
6259 HOST_WIDE_INT offset;
6260 unsigned int preferred_alignment;
6261 HOST_WIDE_INT size = get_frame_size ();
6263 frame->nregs = ix86_nsaved_regs ();
6266 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6267 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6269 /* During reload iteration the amount of registers saved can change.
6270 Recompute the value as needed. Do not recompute when amount of registers
6271 didn't change as reload does multiple calls to the function and does not
6272 expect the decision to change within single iteration. */
6274 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
6276 int count = frame->nregs;
6278 cfun->machine->use_fast_prologue_epilogue_nregs = count;
6279 /* The fast prologue uses move instead of push to save registers. This
6280 is significantly longer, but also executes faster as modern hardware
6281 can execute the moves in parallel, but can't do that for push/pop.
6283 Be careful about choosing what prologue to emit: When function takes
6284 many instructions to execute we may use slow version as well as in
6285 case function is known to be outside hot spot (this is known with
6286 feedback only). Weight the size of function by number of registers
6287 to save as it is cheap to use one or two push instructions but very
6288 slow to use many of them. */
6290 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6291 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6292 || (flag_branch_probabilities
6293 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6294 cfun->machine->use_fast_prologue_epilogue = false;
6296 cfun->machine->use_fast_prologue_epilogue
6297 = !expensive_function_p (count);
6299 if (TARGET_PROLOGUE_USING_MOVE
6300 && cfun->machine->use_fast_prologue_epilogue)
6301 frame->save_regs_using_mov = true;
6303 frame->save_regs_using_mov = false;
6306 /* Skip return address and saved base pointer. */
6307 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6309 frame->hard_frame_pointer_offset = offset;
6311 /* Do some sanity checking of stack_alignment_needed and
6312 preferred_alignment, since i386 port is the only using those features
6313 that may break easily. */
6315 gcc_assert (!size || stack_alignment_needed);
6316 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6317 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6318 gcc_assert (stack_alignment_needed
6319 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6321 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6322 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6324 /* Register save area */
6325 offset += frame->nregs * UNITS_PER_WORD;
6328 if (ix86_save_varrargs_registers)
6330 offset += X86_64_VARARGS_SIZE;
6331 frame->va_arg_size = X86_64_VARARGS_SIZE;
6334 frame->va_arg_size = 0;
6336 /* Align start of frame for local function. */
6337 frame->padding1 = ((offset + stack_alignment_needed - 1)
6338 & -stack_alignment_needed) - offset;
6340 offset += frame->padding1;
6342 /* Frame pointer points here. */
6343 frame->frame_pointer_offset = offset;
6347 /* Add outgoing arguments area. Can be skipped if we eliminated
6348 all the function calls as dead code.
6349 Skipping is however impossible when function calls alloca. Alloca
6350 expander assumes that last crtl->outgoing_args_size
6351 of stack frame are unused. */
6352 if (ACCUMULATE_OUTGOING_ARGS
6353 && (!current_function_is_leaf || cfun->calls_alloca
6354 || ix86_current_function_calls_tls_descriptor))
6356 offset += crtl->outgoing_args_size;
6357 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6360 frame->outgoing_arguments_size = 0;
6362 /* Align stack boundary. Only needed if we're calling another function
6364 if (!current_function_is_leaf || cfun->calls_alloca
6365 || ix86_current_function_calls_tls_descriptor)
6366 frame->padding2 = ((offset + preferred_alignment - 1)
6367 & -preferred_alignment) - offset;
6369 frame->padding2 = 0;
6371 offset += frame->padding2;
6373 /* We've reached end of stack frame. */
6374 frame->stack_pointer_offset = offset;
6376 /* Size prologue needs to allocate. */
6377 frame->to_allocate =
6378 (size + frame->padding1 + frame->padding2
6379 + frame->outgoing_arguments_size + frame->va_arg_size);
6381 if ((!frame->to_allocate && frame->nregs <= 1)
6382 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6383 frame->save_regs_using_mov = false;
6385 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
6386 && current_function_is_leaf
6387 && !ix86_current_function_calls_tls_descriptor)
6389 frame->red_zone_size = frame->to_allocate;
6390 if (frame->save_regs_using_mov)
6391 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6392 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6393 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6396 frame->red_zone_size = 0;
6397 frame->to_allocate -= frame->red_zone_size;
6398 frame->stack_pointer_offset -= frame->red_zone_size;
6400 fprintf (stderr, "\n");
6401 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6402 fprintf (stderr, "size: %ld\n", (long)size);
6403 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6404 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6405 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6406 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6407 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6408 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6409 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6410 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6411 (long)frame->hard_frame_pointer_offset);
6412 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6413 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6414 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
6415 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6419 /* Emit code to save registers in the prologue. */
6422 ix86_emit_save_regs (void)
6427 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6428 if (ix86_save_reg (regno, true))
6430 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6431 RTX_FRAME_RELATED_P (insn) = 1;
6435 /* Emit code to save registers using MOV insns. First register
6436 is restored from POINTER + OFFSET. */
6438 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6443 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6444 if (ix86_save_reg (regno, true))
6446 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6448 gen_rtx_REG (Pmode, regno));
6449 RTX_FRAME_RELATED_P (insn) = 1;
6450 offset += UNITS_PER_WORD;
6454 /* Expand prologue or epilogue stack adjustment.
6455 The pattern exist to put a dependency on all ebp-based memory accesses.
6456 STYLE should be negative if instructions should be marked as frame related,
6457 zero if %r11 register is live and cannot be freely used and positive
6461 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6466 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6467 else if (x86_64_immediate_operand (offset, DImode))
6468 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6472 /* r11 is used by indirect sibcall return as well, set before the
6473 epilogue and used after the epilogue. ATM indirect sibcall
6474 shouldn't be used together with huge frame sizes in one
6475 function because of the frame_size check in sibcall.c. */
6477 r11 = gen_rtx_REG (DImode, R11_REG);
6478 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6480 RTX_FRAME_RELATED_P (insn) = 1;
6481 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6485 RTX_FRAME_RELATED_P (insn) = 1;
6488 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6491 ix86_internal_arg_pointer (void)
6493 bool has_force_align_arg_pointer =
6494 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6495 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6496 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6497 && DECL_NAME (current_function_decl)
6498 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6499 && DECL_FILE_SCOPE_P (current_function_decl))
6500 || ix86_force_align_arg_pointer
6501 || has_force_align_arg_pointer)
6503 /* Nested functions can't realign the stack due to a register
6505 if (DECL_CONTEXT (current_function_decl)
6506 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6508 if (ix86_force_align_arg_pointer)
6509 warning (0, "-mstackrealign ignored for nested functions");
6510 if (has_force_align_arg_pointer)
6511 error ("%s not supported for nested functions",
6512 ix86_force_align_arg_pointer_string);
6513 return virtual_incoming_args_rtx;
6515 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6516 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6519 return virtual_incoming_args_rtx;
6522 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6523 This is called from dwarf2out.c to emit call frame instructions
6524 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6526 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6528 rtx unspec = SET_SRC (pattern);
6529 gcc_assert (GET_CODE (unspec) == UNSPEC);
6533 case UNSPEC_REG_SAVE:
6534 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6535 SET_DEST (pattern));
6537 case UNSPEC_DEF_CFA:
6538 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6539 INTVAL (XVECEXP (unspec, 0, 0)));
6546 /* Expand the prologue into a bunch of separate insns. */
6549 ix86_expand_prologue (void)
6553 struct ix86_frame frame;
6554 HOST_WIDE_INT allocate;
6556 ix86_compute_frame_layout (&frame);
6558 if (cfun->machine->force_align_arg_pointer)
6562 /* Grab the argument pointer. */
6563 x = plus_constant (stack_pointer_rtx, 4);
6564 y = cfun->machine->force_align_arg_pointer;
6565 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6566 RTX_FRAME_RELATED_P (insn) = 1;
6568 /* The unwind info consists of two parts: install the fafp as the cfa,
6569 and record the fafp as the "save register" of the stack pointer.
6570 The later is there in order that the unwinder can see where it
6571 should restore the stack pointer across the and insn. */
6572 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6573 x = gen_rtx_SET (VOIDmode, y, x);
6574 RTX_FRAME_RELATED_P (x) = 1;
6575 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6577 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6578 RTX_FRAME_RELATED_P (y) = 1;
6579 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6580 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6581 REG_NOTES (insn) = x;
6583 /* Align the stack. */
6584 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6587 /* And here we cheat like madmen with the unwind info. We force the
6588 cfa register back to sp+4, which is exactly what it was at the
6589 start of the function. Re-pushing the return address results in
6590 the return at the same spot relative to the cfa, and thus is
6591 correct wrt the unwind info. */
6592 x = cfun->machine->force_align_arg_pointer;
6593 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6594 insn = emit_insn (gen_push (x));
6595 RTX_FRAME_RELATED_P (insn) = 1;
6598 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6599 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6600 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6601 REG_NOTES (insn) = x;
6604 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6605 slower on all targets. Also sdb doesn't like it. */
6607 if (frame_pointer_needed)
6609 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6610 RTX_FRAME_RELATED_P (insn) = 1;
6612 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6613 RTX_FRAME_RELATED_P (insn) = 1;
6616 allocate = frame.to_allocate;
6618 if (!frame.save_regs_using_mov)
6619 ix86_emit_save_regs ();
6621 allocate += frame.nregs * UNITS_PER_WORD;
6623 /* When using red zone we may start register saving before allocating
6624 the stack frame saving one cycle of the prologue. However I will
6625 avoid doing this if I am going to have to probe the stack since
6626 at least on x86_64 the stack probe can turn into a call that clobbers
6627 a red zone location */
6628 if (TARGET_RED_ZONE && frame.save_regs_using_mov
6629 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6630 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6631 : stack_pointer_rtx,
6632 -frame.nregs * UNITS_PER_WORD);
6636 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6637 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6638 GEN_INT (-allocate), -1);
6641 /* Only valid for Win32. */
6642 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6646 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
6648 if (cfun->machine->call_abi == MS_ABI)
6651 eax_live = ix86_eax_live_at_start_p ();
6655 emit_insn (gen_push (eax));
6656 allocate -= UNITS_PER_WORD;
6659 emit_move_insn (eax, GEN_INT (allocate));
6662 insn = gen_allocate_stack_worker_64 (eax);
6664 insn = gen_allocate_stack_worker_32 (eax);
6665 insn = emit_insn (insn);
6666 RTX_FRAME_RELATED_P (insn) = 1;
6667 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6668 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6669 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6670 t, REG_NOTES (insn));
6674 if (frame_pointer_needed)
6675 t = plus_constant (hard_frame_pointer_rtx,
6678 - frame.nregs * UNITS_PER_WORD);
6680 t = plus_constant (stack_pointer_rtx, allocate);
6681 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6685 if (frame.save_regs_using_mov
6686 && !(TARGET_RED_ZONE
6687 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6689 if (!frame_pointer_needed || !frame.to_allocate)
6690 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6692 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6693 -frame.nregs * UNITS_PER_WORD);
6696 pic_reg_used = false;
6697 if (pic_offset_table_rtx
6698 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6701 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6703 if (alt_pic_reg_used != INVALID_REGNUM)
6704 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6706 pic_reg_used = true;
6713 if (ix86_cmodel == CM_LARGE_PIC)
6715 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6716 rtx label = gen_label_rtx ();
6718 LABEL_PRESERVE_P (label) = 1;
6719 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6720 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6721 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6722 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6723 pic_offset_table_rtx, tmp_reg));
6726 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6729 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6732 /* Prevent function calls from being scheduled before the call to mcount.
6733 In the pic_reg_used case, make sure that the got load isn't deleted. */
6737 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6738 emit_insn (gen_blockage ());
6741 /* Emit cld instruction if stringops are used in the function. */
6742 if (TARGET_CLD && ix86_current_function_needs_cld)
6743 emit_insn (gen_cld ());
6746 /* Emit code to restore saved registers using MOV insns. First register
6747 is restored from POINTER + OFFSET. */
6749 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6750 int maybe_eh_return)
6753 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6755 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6756 if (ix86_save_reg (regno, maybe_eh_return))
6758 /* Ensure that adjust_address won't be forced to produce pointer
6759 out of range allowed by x86-64 instruction set. */
6760 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6764 r11 = gen_rtx_REG (DImode, R11_REG);
6765 emit_move_insn (r11, GEN_INT (offset));
6766 emit_insn (gen_adddi3 (r11, r11, pointer));
6767 base_address = gen_rtx_MEM (Pmode, r11);
6770 emit_move_insn (gen_rtx_REG (Pmode, regno),
6771 adjust_address (base_address, Pmode, offset));
6772 offset += UNITS_PER_WORD;
6776 /* Restore function stack, frame, and registers. */
6779 ix86_expand_epilogue (int style)
6782 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6783 struct ix86_frame frame;
6784 HOST_WIDE_INT offset;
6786 ix86_compute_frame_layout (&frame);
6788 /* Calculate start of saved registers relative to ebp. Special care
6789 must be taken for the normal return case of a function using
6790 eh_return: the eax and edx registers are marked as saved, but not
6791 restored along this path. */
6792 offset = frame.nregs;
6793 if (crtl->calls_eh_return && style != 2)
6795 offset *= -UNITS_PER_WORD;
6797 /* If we're only restoring one register and sp is not valid then
6798 using a move instruction to restore the register since it's
6799 less work than reloading sp and popping the register.
6801 The default code result in stack adjustment using add/lea instruction,
6802 while this code results in LEAVE instruction (or discrete equivalent),
6803 so it is profitable in some other cases as well. Especially when there
6804 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6805 and there is exactly one register to pop. This heuristic may need some
6806 tuning in future. */
6807 if ((!sp_valid && frame.nregs <= 1)
6808 || (TARGET_EPILOGUE_USING_MOVE
6809 && cfun->machine->use_fast_prologue_epilogue
6810 && (frame.nregs > 1 || frame.to_allocate))
6811 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6812 || (frame_pointer_needed && TARGET_USE_LEAVE
6813 && cfun->machine->use_fast_prologue_epilogue
6814 && frame.nregs == 1)
6815 || crtl->calls_eh_return)
6817 /* Restore registers. We can use ebp or esp to address the memory
6818 locations. If both are available, default to ebp, since offsets
6819 are known to be small. Only exception is esp pointing directly to the
6820 end of block of saved registers, where we may simplify addressing
6823 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6824 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6825 frame.to_allocate, style == 2);
6827 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6828 offset, style == 2);
6830 /* eh_return epilogues need %ecx added to the stack pointer. */
6833 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6835 if (frame_pointer_needed)
6837 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6838 tmp = plus_constant (tmp, UNITS_PER_WORD);
6839 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6841 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6842 emit_move_insn (hard_frame_pointer_rtx, tmp);
6844 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6849 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6850 tmp = plus_constant (tmp, (frame.to_allocate
6851 + frame.nregs * UNITS_PER_WORD));
6852 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6855 else if (!frame_pointer_needed)
6856 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6857 GEN_INT (frame.to_allocate
6858 + frame.nregs * UNITS_PER_WORD),
6860 /* If not an i386, mov & pop is faster than "leave". */
6861 else if (TARGET_USE_LEAVE || optimize_size
6862 || !cfun->machine->use_fast_prologue_epilogue)
6863 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6866 pro_epilogue_adjust_stack (stack_pointer_rtx,
6867 hard_frame_pointer_rtx,
6870 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6872 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6877 /* First step is to deallocate the stack frame so that we can
6878 pop the registers. */
6881 gcc_assert (frame_pointer_needed);
6882 pro_epilogue_adjust_stack (stack_pointer_rtx,
6883 hard_frame_pointer_rtx,
6884 GEN_INT (offset), style);
6886 else if (frame.to_allocate)
6887 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6888 GEN_INT (frame.to_allocate), style);
6890 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6891 if (ix86_save_reg (regno, false))
6894 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6896 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6898 if (frame_pointer_needed)
6900 /* Leave results in shorter dependency chains on CPUs that are
6901 able to grok it fast. */
6902 if (TARGET_USE_LEAVE)
6903 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6904 else if (TARGET_64BIT)
6905 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6907 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6911 if (cfun->machine->force_align_arg_pointer)
6913 emit_insn (gen_addsi3 (stack_pointer_rtx,
6914 cfun->machine->force_align_arg_pointer,
6918 /* Sibcall epilogues don't want a return instruction. */
6922 if (crtl->args.pops_args && crtl->args.size)
6924 rtx popc = GEN_INT (crtl->args.pops_args);
6926 /* i386 can only pop 64K bytes. If asked to pop more, pop
6927 return address, do explicit add, and jump indirectly to the
6930 if (crtl->args.pops_args >= 65536)
6932 rtx ecx = gen_rtx_REG (SImode, CX_REG);
6934 /* There is no "pascal" calling convention in any 64bit ABI. */
6935 gcc_assert (!TARGET_64BIT);
6937 emit_insn (gen_popsi1 (ecx));
6938 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6939 emit_jump_insn (gen_return_indirect_internal (ecx));
6942 emit_jump_insn (gen_return_pop_internal (popc));
6945 emit_jump_insn (gen_return_internal ());
6948 /* Reset from the function's potential modifications. */
6951 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6952 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6954 if (pic_offset_table_rtx)
6955 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6957 /* Mach-O doesn't support labels at the end of objects, so if
6958 it looks like we might want one, insert a NOP. */
6960 rtx insn = get_last_insn ();
6963 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6964 insn = PREV_INSN (insn);
6968 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6969 fputs ("\tnop\n", file);
6975 /* Extract the parts of an RTL expression that is a valid memory address
6976 for an instruction. Return 0 if the structure of the address is
6977 grossly off. Return -1 if the address contains ASHIFT, so it is not
6978 strictly valid, but still used for computing length of lea instruction. */
6981 ix86_decompose_address (rtx addr, struct ix86_address *out)
6983 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6984 rtx base_reg, index_reg;
6985 HOST_WIDE_INT scale = 1;
6986 rtx scale_rtx = NULL_RTX;
6988 enum ix86_address_seg seg = SEG_DEFAULT;
6990 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6992 else if (GET_CODE (addr) == PLUS)
7002 addends[n++] = XEXP (op, 1);
7005 while (GET_CODE (op) == PLUS);
7010 for (i = n; i >= 0; --i)
7013 switch (GET_CODE (op))
7018 index = XEXP (op, 0);
7019 scale_rtx = XEXP (op, 1);
7023 if (XINT (op, 1) == UNSPEC_TP
7024 && TARGET_TLS_DIRECT_SEG_REFS
7025 && seg == SEG_DEFAULT)
7026 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
7055 else if (GET_CODE (addr) == MULT)
7057 index = XEXP (addr, 0); /* index*scale */
7058 scale_rtx = XEXP (addr, 1);
7060 else if (GET_CODE (addr) == ASHIFT)
7064 /* We're called for lea too, which implements ashift on occasion. */
7065 index = XEXP (addr, 0);
7066 tmp = XEXP (addr, 1);
7067 if (!CONST_INT_P (tmp))
7069 scale = INTVAL (tmp);
7070 if ((unsigned HOST_WIDE_INT) scale > 3)
7076 disp = addr; /* displacement */
7078 /* Extract the integral value of scale. */
7081 if (!CONST_INT_P (scale_rtx))
7083 scale = INTVAL (scale_rtx);
7086 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
7087 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
7089 /* Allow arg pointer and stack pointer as index if there is not scaling. */
7090 if (base_reg && index_reg && scale == 1
7091 && (index_reg == arg_pointer_rtx
7092 || index_reg == frame_pointer_rtx
7093 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
7096 tmp = base, base = index, index = tmp;
7097 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
7100 /* Special case: %ebp cannot be encoded as a base without a displacement. */
7101 if ((base_reg == hard_frame_pointer_rtx
7102 || base_reg == frame_pointer_rtx
7103 || base_reg == arg_pointer_rtx) && !disp)
7106 /* Special case: on K6, [%esi] makes the instruction vector decoded.
7107 Avoid this by transforming to [%esi+0]. */
7108 if (TARGET_K6 && !optimize_size
7109 && base_reg && !index_reg && !disp
7111 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
7114 /* Special case: encode reg+reg instead of reg*2. */
7115 if (!base && index && scale && scale == 2)
7116 base = index, base_reg = index_reg, scale = 1;
7118 /* Special case: scaling cannot be encoded without base or displacement. */
7119 if (!base && !disp && index && scale != 1)
7131 /* Return cost of the memory address x.
7132 For i386, it is better to use a complex address than let gcc copy
7133 the address into a reg and make a new pseudo. But not if the address
7134 requires to two regs - that would mean more pseudos with longer
7137 ix86_address_cost (rtx x)
7139 struct ix86_address parts;
7141 int ok = ix86_decompose_address (x, &parts);
7145 if (parts.base && GET_CODE (parts.base) == SUBREG)
7146 parts.base = SUBREG_REG (parts.base);
7147 if (parts.index && GET_CODE (parts.index) == SUBREG)
7148 parts.index = SUBREG_REG (parts.index);
7150 /* Attempt to minimize number of registers in the address. */
7152 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
7154 && (!REG_P (parts.index)
7155 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
7159 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
7161 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
7162 && parts.base != parts.index)
7165 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
7166 since it's predecode logic can't detect the length of instructions
7167 and it degenerates to vector decoded. Increase cost of such
7168 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
7169 to split such addresses or even refuse such addresses at all.
7171 Following addressing modes are affected:
7176 The first and last case may be avoidable by explicitly coding the zero in
7177 memory address, but I don't have AMD-K6 machine handy to check this
7181 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
7182 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
7183 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
7189 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
7190 this is used for to form addresses to local data when -fPIC is in
7194 darwin_local_data_pic (rtx disp)
7196 if (GET_CODE (disp) == MINUS)
7198 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
7199 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
7200 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
7202 const char *sym_name = XSTR (XEXP (disp, 1), 0);
7203 if (! strcmp (sym_name, "<pic base>"))
7211 /* Determine if a given RTX is a valid constant. We already know this
7212 satisfies CONSTANT_P. */
7215 legitimate_constant_p (rtx x)
7217 switch (GET_CODE (x))
7222 if (GET_CODE (x) == PLUS)
7224 if (!CONST_INT_P (XEXP (x, 1)))
7229 if (TARGET_MACHO && darwin_local_data_pic (x))
7232 /* Only some unspecs are valid as "constants". */
7233 if (GET_CODE (x) == UNSPEC)
7234 switch (XINT (x, 1))
7239 return TARGET_64BIT;
7242 x = XVECEXP (x, 0, 0);
7243 return (GET_CODE (x) == SYMBOL_REF
7244 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7246 x = XVECEXP (x, 0, 0);
7247 return (GET_CODE (x) == SYMBOL_REF
7248 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
7253 /* We must have drilled down to a symbol. */
7254 if (GET_CODE (x) == LABEL_REF)
7256 if (GET_CODE (x) != SYMBOL_REF)
7261 /* TLS symbols are never valid. */
7262 if (SYMBOL_REF_TLS_MODEL (x))
7265 /* DLLIMPORT symbols are never valid. */
7266 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7267 && SYMBOL_REF_DLLIMPORT_P (x))
7272 if (GET_MODE (x) == TImode
7273 && x != CONST0_RTX (TImode)
7279 if (x == CONST0_RTX (GET_MODE (x)))
7287 /* Otherwise we handle everything else in the move patterns. */
7291 /* Determine if it's legal to put X into the constant pool. This
7292 is not possible for the address of thread-local symbols, which
7293 is checked above. */
7296 ix86_cannot_force_const_mem (rtx x)
7298 /* We can always put integral constants and vectors in memory. */
7299 switch (GET_CODE (x))
7309 return !legitimate_constant_p (x);
7312 /* Determine if a given RTX is a valid constant address. */
7315 constant_address_p (rtx x)
7317 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7320 /* Nonzero if the constant value X is a legitimate general operand
7321 when generating PIC code. It is given that flag_pic is on and
7322 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7325 legitimate_pic_operand_p (rtx x)
7329 switch (GET_CODE (x))
7332 inner = XEXP (x, 0);
7333 if (GET_CODE (inner) == PLUS
7334 && CONST_INT_P (XEXP (inner, 1)))
7335 inner = XEXP (inner, 0);
7337 /* Only some unspecs are valid as "constants". */
7338 if (GET_CODE (inner) == UNSPEC)
7339 switch (XINT (inner, 1))
7344 return TARGET_64BIT;
7346 x = XVECEXP (inner, 0, 0);
7347 return (GET_CODE (x) == SYMBOL_REF
7348 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7356 return legitimate_pic_address_disp_p (x);
7363 /* Determine if a given CONST RTX is a valid memory displacement
7367 legitimate_pic_address_disp_p (rtx disp)
7371 /* In 64bit mode we can allow direct addresses of symbols and labels
7372 when they are not dynamic symbols. */
7375 rtx op0 = disp, op1;
7377 switch (GET_CODE (disp))
7383 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7385 op0 = XEXP (XEXP (disp, 0), 0);
7386 op1 = XEXP (XEXP (disp, 0), 1);
7387 if (!CONST_INT_P (op1)
7388 || INTVAL (op1) >= 16*1024*1024
7389 || INTVAL (op1) < -16*1024*1024)
7391 if (GET_CODE (op0) == LABEL_REF)
7393 if (GET_CODE (op0) != SYMBOL_REF)
7398 /* TLS references should always be enclosed in UNSPEC. */
7399 if (SYMBOL_REF_TLS_MODEL (op0))
7401 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7402 && ix86_cmodel != CM_LARGE_PIC)
7410 if (GET_CODE (disp) != CONST)
7412 disp = XEXP (disp, 0);
7416 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7417 of GOT tables. We should not need these anyway. */
7418 if (GET_CODE (disp) != UNSPEC
7419 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7420 && XINT (disp, 1) != UNSPEC_GOTOFF
7421 && XINT (disp, 1) != UNSPEC_PLTOFF))
7424 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7425 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7431 if (GET_CODE (disp) == PLUS)
7433 if (!CONST_INT_P (XEXP (disp, 1)))
7435 disp = XEXP (disp, 0);
7439 if (TARGET_MACHO && darwin_local_data_pic (disp))
7442 if (GET_CODE (disp) != UNSPEC)
7445 switch (XINT (disp, 1))
7450 /* We need to check for both symbols and labels because VxWorks loads
7451 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7453 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7454 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7456 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7457 While ABI specify also 32bit relocation but we don't produce it in
7458 small PIC model at all. */
7459 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7460 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7462 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7464 case UNSPEC_GOTTPOFF:
7465 case UNSPEC_GOTNTPOFF:
7466 case UNSPEC_INDNTPOFF:
7469 disp = XVECEXP (disp, 0, 0);
7470 return (GET_CODE (disp) == SYMBOL_REF
7471 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7473 disp = XVECEXP (disp, 0, 0);
7474 return (GET_CODE (disp) == SYMBOL_REF
7475 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7477 disp = XVECEXP (disp, 0, 0);
7478 return (GET_CODE (disp) == SYMBOL_REF
7479 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7485 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7486 memory address for an instruction. The MODE argument is the machine mode
7487 for the MEM expression that wants to use this address.
7489 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7490 convert common non-canonical forms to canonical form so that they will
7494 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7495 rtx addr, int strict)
7497 struct ix86_address parts;
7498 rtx base, index, disp;
7499 HOST_WIDE_INT scale;
7500 const char *reason = NULL;
7501 rtx reason_rtx = NULL_RTX;
7503 if (ix86_decompose_address (addr, &parts) <= 0)
7505 reason = "decomposition failed";
7510 index = parts.index;
7512 scale = parts.scale;
7514 /* Validate base register.
7516 Don't allow SUBREG's that span more than a word here. It can lead to spill
7517 failures when the base is one word out of a two word structure, which is
7518 represented internally as a DImode int. */
7527 else if (GET_CODE (base) == SUBREG
7528 && REG_P (SUBREG_REG (base))
7529 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7531 reg = SUBREG_REG (base);
7534 reason = "base is not a register";
7538 if (GET_MODE (base) != Pmode)
7540 reason = "base is not in Pmode";
7544 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7545 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7547 reason = "base is not valid";
7552 /* Validate index register.
7554 Don't allow SUBREG's that span more than a word here -- same as above. */
7563 else if (GET_CODE (index) == SUBREG
7564 && REG_P (SUBREG_REG (index))
7565 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7567 reg = SUBREG_REG (index);
7570 reason = "index is not a register";
7574 if (GET_MODE (index) != Pmode)
7576 reason = "index is not in Pmode";
7580 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7581 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7583 reason = "index is not valid";
7588 /* Validate scale factor. */
7591 reason_rtx = GEN_INT (scale);
7594 reason = "scale without index";
7598 if (scale != 2 && scale != 4 && scale != 8)
7600 reason = "scale is not a valid multiplier";
7605 /* Validate displacement. */
7610 if (GET_CODE (disp) == CONST
7611 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7612 switch (XINT (XEXP (disp, 0), 1))
7614 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7615 used. While ABI specify also 32bit relocations, we don't produce
7616 them at all and use IP relative instead. */
7619 gcc_assert (flag_pic);
7621 goto is_legitimate_pic;
7622 reason = "64bit address unspec";
7625 case UNSPEC_GOTPCREL:
7626 gcc_assert (flag_pic);
7627 goto is_legitimate_pic;
7629 case UNSPEC_GOTTPOFF:
7630 case UNSPEC_GOTNTPOFF:
7631 case UNSPEC_INDNTPOFF:
7637 reason = "invalid address unspec";
7641 else if (SYMBOLIC_CONST (disp)
7645 && MACHOPIC_INDIRECT
7646 && !machopic_operand_p (disp)
7652 if (TARGET_64BIT && (index || base))
7654 /* foo@dtpoff(%rX) is ok. */
7655 if (GET_CODE (disp) != CONST
7656 || GET_CODE (XEXP (disp, 0)) != PLUS
7657 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7658 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7659 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7660 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7662 reason = "non-constant pic memory reference";
7666 else if (! legitimate_pic_address_disp_p (disp))
7668 reason = "displacement is an invalid pic construct";
7672 /* This code used to verify that a symbolic pic displacement
7673 includes the pic_offset_table_rtx register.
7675 While this is good idea, unfortunately these constructs may
7676 be created by "adds using lea" optimization for incorrect
7685 This code is nonsensical, but results in addressing
7686 GOT table with pic_offset_table_rtx base. We can't
7687 just refuse it easily, since it gets matched by
7688 "addsi3" pattern, that later gets split to lea in the
7689 case output register differs from input. While this
7690 can be handled by separate addsi pattern for this case
7691 that never results in lea, this seems to be easier and
7692 correct fix for crash to disable this test. */
7694 else if (GET_CODE (disp) != LABEL_REF
7695 && !CONST_INT_P (disp)
7696 && (GET_CODE (disp) != CONST
7697 || !legitimate_constant_p (disp))
7698 && (GET_CODE (disp) != SYMBOL_REF
7699 || !legitimate_constant_p (disp)))
7701 reason = "displacement is not constant";
7704 else if (TARGET_64BIT
7705 && !x86_64_immediate_operand (disp, VOIDmode))
7707 reason = "displacement is out of range";
7712 /* Everything looks valid. */
7719 /* Return a unique alias set for the GOT. */
7721 static alias_set_type
7722 ix86_GOT_alias_set (void)
7724 static alias_set_type set = -1;
7726 set = new_alias_set ();
7730 /* Return a legitimate reference for ORIG (an address) using the
7731 register REG. If REG is 0, a new pseudo is generated.
7733 There are two types of references that must be handled:
7735 1. Global data references must load the address from the GOT, via
7736 the PIC reg. An insn is emitted to do this load, and the reg is
7739 2. Static data references, constant pool addresses, and code labels
7740 compute the address as an offset from the GOT, whose base is in
7741 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7742 differentiate them from global data objects. The returned
7743 address is the PIC reg + an unspec constant.
7745 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7746 reg also appears in the address. */
7749 legitimize_pic_address (rtx orig, rtx reg)
7756 if (TARGET_MACHO && !TARGET_64BIT)
7759 reg = gen_reg_rtx (Pmode);
7760 /* Use the generic Mach-O PIC machinery. */
7761 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7765 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7767 else if (TARGET_64BIT
7768 && ix86_cmodel != CM_SMALL_PIC
7769 && gotoff_operand (addr, Pmode))
7772 /* This symbol may be referenced via a displacement from the PIC
7773 base address (@GOTOFF). */
7775 if (reload_in_progress)
7776 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7777 if (GET_CODE (addr) == CONST)
7778 addr = XEXP (addr, 0);
7779 if (GET_CODE (addr) == PLUS)
7781 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7783 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7786 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7787 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7789 tmpreg = gen_reg_rtx (Pmode);
7792 emit_move_insn (tmpreg, new_rtx);
7796 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7797 tmpreg, 1, OPTAB_DIRECT);
7800 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7802 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7804 /* This symbol may be referenced via a displacement from the PIC
7805 base address (@GOTOFF). */
7807 if (reload_in_progress)
7808 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7809 if (GET_CODE (addr) == CONST)
7810 addr = XEXP (addr, 0);
7811 if (GET_CODE (addr) == PLUS)
7813 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7815 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7818 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7819 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7820 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7824 emit_move_insn (reg, new_rtx);
7828 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7829 /* We can't use @GOTOFF for text labels on VxWorks;
7830 see gotoff_operand. */
7831 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7833 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7835 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
7836 return legitimize_dllimport_symbol (addr, true);
7837 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
7838 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7839 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
7841 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
7842 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
7846 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7848 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7849 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7850 new_rtx = gen_const_mem (Pmode, new_rtx);
7851 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7854 reg = gen_reg_rtx (Pmode);
7855 /* Use directly gen_movsi, otherwise the address is loaded
7856 into register for CSE. We don't want to CSE this addresses,
7857 instead we CSE addresses from the GOT table, so skip this. */
7858 emit_insn (gen_movsi (reg, new_rtx));
7863 /* This symbol must be referenced via a load from the
7864 Global Offset Table (@GOT). */
7866 if (reload_in_progress)
7867 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7868 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7869 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7871 new_rtx = force_reg (Pmode, new_rtx);
7872 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7873 new_rtx = gen_const_mem (Pmode, new_rtx);
7874 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7877 reg = gen_reg_rtx (Pmode);
7878 emit_move_insn (reg, new_rtx);
7884 if (CONST_INT_P (addr)
7885 && !x86_64_immediate_operand (addr, VOIDmode))
7889 emit_move_insn (reg, addr);
7893 new_rtx = force_reg (Pmode, addr);
7895 else if (GET_CODE (addr) == CONST)
7897 addr = XEXP (addr, 0);
7899 /* We must match stuff we generate before. Assume the only
7900 unspecs that can get here are ours. Not that we could do
7901 anything with them anyway.... */
7902 if (GET_CODE (addr) == UNSPEC
7903 || (GET_CODE (addr) == PLUS
7904 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7906 gcc_assert (GET_CODE (addr) == PLUS);
7908 if (GET_CODE (addr) == PLUS)
7910 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7912 /* Check first to see if this is a constant offset from a @GOTOFF
7913 symbol reference. */
7914 if (gotoff_operand (op0, Pmode)
7915 && CONST_INT_P (op1))
7919 if (reload_in_progress)
7920 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7921 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7923 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7924 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7925 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7929 emit_move_insn (reg, new_rtx);
7935 if (INTVAL (op1) < -16*1024*1024
7936 || INTVAL (op1) >= 16*1024*1024)
7938 if (!x86_64_immediate_operand (op1, Pmode))
7939 op1 = force_reg (Pmode, op1);
7940 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7946 base = legitimize_pic_address (XEXP (addr, 0), reg);
7947 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7948 base == reg ? NULL_RTX : reg);
7950 if (CONST_INT_P (new_rtx))
7951 new_rtx = plus_constant (base, INTVAL (new_rtx));
7954 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7956 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7957 new_rtx = XEXP (new_rtx, 1);
7959 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7967 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7970 get_thread_pointer (int to_reg)
7974 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7978 reg = gen_reg_rtx (Pmode);
7979 insn = gen_rtx_SET (VOIDmode, reg, tp);
7980 insn = emit_insn (insn);
7985 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7986 false if we expect this to be used for a memory address and true if
7987 we expect to load the address into a register. */
7990 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7992 rtx dest, base, off, pic, tp;
7997 case TLS_MODEL_GLOBAL_DYNAMIC:
7998 dest = gen_reg_rtx (Pmode);
7999 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8001 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8003 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
8006 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
8007 insns = get_insns ();
8010 RTL_CONST_CALL_P (insns) = 1;
8011 emit_libcall_block (insns, dest, rax, x);
8013 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8014 emit_insn (gen_tls_global_dynamic_64 (dest, x));
8016 emit_insn (gen_tls_global_dynamic_32 (dest, x));
8018 if (TARGET_GNU2_TLS)
8020 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
8022 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8026 case TLS_MODEL_LOCAL_DYNAMIC:
8027 base = gen_reg_rtx (Pmode);
8028 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8030 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8032 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
8035 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
8036 insns = get_insns ();
8039 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
8040 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
8041 RTL_CONST_CALL_P (insns) = 1;
8042 emit_libcall_block (insns, base, rax, note);
8044 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8045 emit_insn (gen_tls_local_dynamic_base_64 (base));
8047 emit_insn (gen_tls_local_dynamic_base_32 (base));
8049 if (TARGET_GNU2_TLS)
8051 rtx x = ix86_tls_module_base ();
8053 set_unique_reg_note (get_last_insn (), REG_EQUIV,
8054 gen_rtx_MINUS (Pmode, x, tp));
8057 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
8058 off = gen_rtx_CONST (Pmode, off);
8060 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
8062 if (TARGET_GNU2_TLS)
8064 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
8066 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8071 case TLS_MODEL_INITIAL_EXEC:
8075 type = UNSPEC_GOTNTPOFF;
8079 if (reload_in_progress)
8080 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8081 pic = pic_offset_table_rtx;
8082 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
8084 else if (!TARGET_ANY_GNU_TLS)
8086 pic = gen_reg_rtx (Pmode);
8087 emit_insn (gen_set_got (pic));
8088 type = UNSPEC_GOTTPOFF;
8093 type = UNSPEC_INDNTPOFF;
8096 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
8097 off = gen_rtx_CONST (Pmode, off);
8099 off = gen_rtx_PLUS (Pmode, pic, off);
8100 off = gen_const_mem (Pmode, off);
8101 set_mem_alias_set (off, ix86_GOT_alias_set ());
8103 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8105 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8106 off = force_reg (Pmode, off);
8107 return gen_rtx_PLUS (Pmode, base, off);
8111 base = get_thread_pointer (true);
8112 dest = gen_reg_rtx (Pmode);
8113 emit_insn (gen_subsi3 (dest, base, off));
8117 case TLS_MODEL_LOCAL_EXEC:
8118 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
8119 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8120 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
8121 off = gen_rtx_CONST (Pmode, off);
8123 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8125 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8126 return gen_rtx_PLUS (Pmode, base, off);
8130 base = get_thread_pointer (true);
8131 dest = gen_reg_rtx (Pmode);
8132 emit_insn (gen_subsi3 (dest, base, off));
8143 /* Create or return the unique __imp_DECL dllimport symbol corresponding
8146 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
8147 htab_t dllimport_map;
8150 get_dllimport_decl (tree decl)
8152 struct tree_map *h, in;
8156 size_t namelen, prefixlen;
8162 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
8164 in.hash = htab_hash_pointer (decl);
8165 in.base.from = decl;
8166 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
8167 h = (struct tree_map *) *loc;
8171 *loc = h = GGC_NEW (struct tree_map);
8173 h->base.from = decl;
8174 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
8175 DECL_ARTIFICIAL (to) = 1;
8176 DECL_IGNORED_P (to) = 1;
8177 DECL_EXTERNAL (to) = 1;
8178 TREE_READONLY (to) = 1;
8180 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
8181 name = targetm.strip_name_encoding (name);
8182 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
8183 namelen = strlen (name);
8184 prefixlen = strlen (prefix);
8185 imp_name = (char *) alloca (namelen + prefixlen + 1);
8186 memcpy (imp_name, prefix, prefixlen);
8187 memcpy (imp_name + prefixlen, name, namelen + 1);
8189 name = ggc_alloc_string (imp_name, namelen + prefixlen);
8190 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
8191 SET_SYMBOL_REF_DECL (rtl, to);
8192 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
8194 rtl = gen_const_mem (Pmode, rtl);
8195 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
8197 SET_DECL_RTL (to, rtl);
8198 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
8203 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
8204 true if we require the result be a register. */
8207 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
8212 gcc_assert (SYMBOL_REF_DECL (symbol));
8213 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
8215 x = DECL_RTL (imp_decl);
8217 x = force_reg (Pmode, x);
8221 /* Try machine-dependent ways of modifying an illegitimate address
8222 to be legitimate. If we find one, return the new, valid address.
8223 This macro is used in only one place: `memory_address' in explow.c.
8225 OLDX is the address as it was before break_out_memory_refs was called.
8226 In some cases it is useful to look at this to decide what needs to be done.
8228 MODE and WIN are passed so that this macro can use
8229 GO_IF_LEGITIMATE_ADDRESS.
8231 It is always safe for this macro to do nothing. It exists to recognize
8232 opportunities to optimize the output.
8234 For the 80386, we handle X+REG by loading X into a register R and
8235 using R+REG. R will go in a general reg and indexing will be used.
8236 However, if REG is a broken-out memory address or multiplication,
8237 nothing needs to be done because REG can certainly go in a general reg.
8239 When -fpic is used, special handling is needed for symbolic references.
8240 See comments by legitimize_pic_address in i386.c for details. */
8243 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
8248 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
8250 return legitimize_tls_address (x, (enum tls_model) log, false);
8251 if (GET_CODE (x) == CONST
8252 && GET_CODE (XEXP (x, 0)) == PLUS
8253 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8254 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
8256 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
8257 (enum tls_model) log, false);
8258 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8261 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8263 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8264 return legitimize_dllimport_symbol (x, true);
8265 if (GET_CODE (x) == CONST
8266 && GET_CODE (XEXP (x, 0)) == PLUS
8267 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8268 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8270 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8271 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8275 if (flag_pic && SYMBOLIC_CONST (x))
8276 return legitimize_pic_address (x, 0);
8278 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8279 if (GET_CODE (x) == ASHIFT
8280 && CONST_INT_P (XEXP (x, 1))
8281 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
8284 log = INTVAL (XEXP (x, 1));
8285 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8286 GEN_INT (1 << log));
8289 if (GET_CODE (x) == PLUS)
8291 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
8293 if (GET_CODE (XEXP (x, 0)) == ASHIFT
8294 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8295 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
8298 log = INTVAL (XEXP (XEXP (x, 0), 1));
8299 XEXP (x, 0) = gen_rtx_MULT (Pmode,
8300 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8301 GEN_INT (1 << log));
8304 if (GET_CODE (XEXP (x, 1)) == ASHIFT
8305 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8306 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8309 log = INTVAL (XEXP (XEXP (x, 1), 1));
8310 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8311 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8312 GEN_INT (1 << log));
8315 /* Put multiply first if it isn't already. */
8316 if (GET_CODE (XEXP (x, 1)) == MULT)
8318 rtx tmp = XEXP (x, 0);
8319 XEXP (x, 0) = XEXP (x, 1);
8324 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8325 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8326 created by virtual register instantiation, register elimination, and
8327 similar optimizations. */
8328 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8331 x = gen_rtx_PLUS (Pmode,
8332 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8333 XEXP (XEXP (x, 1), 0)),
8334 XEXP (XEXP (x, 1), 1));
8338 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8339 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8340 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8341 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8342 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8343 && CONSTANT_P (XEXP (x, 1)))
8346 rtx other = NULL_RTX;
8348 if (CONST_INT_P (XEXP (x, 1)))
8350 constant = XEXP (x, 1);
8351 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8353 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8355 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8356 other = XEXP (x, 1);
8364 x = gen_rtx_PLUS (Pmode,
8365 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8366 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8367 plus_constant (other, INTVAL (constant)));
8371 if (changed && legitimate_address_p (mode, x, FALSE))
8374 if (GET_CODE (XEXP (x, 0)) == MULT)
8377 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8380 if (GET_CODE (XEXP (x, 1)) == MULT)
8383 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8387 && REG_P (XEXP (x, 1))
8388 && REG_P (XEXP (x, 0)))
8391 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8394 x = legitimize_pic_address (x, 0);
8397 if (changed && legitimate_address_p (mode, x, FALSE))
8400 if (REG_P (XEXP (x, 0)))
8402 rtx temp = gen_reg_rtx (Pmode);
8403 rtx val = force_operand (XEXP (x, 1), temp);
8405 emit_move_insn (temp, val);
8411 else if (REG_P (XEXP (x, 1)))
8413 rtx temp = gen_reg_rtx (Pmode);
8414 rtx val = force_operand (XEXP (x, 0), temp);
8416 emit_move_insn (temp, val);
8426 /* Print an integer constant expression in assembler syntax. Addition
8427 and subtraction are the only arithmetic that may appear in these
8428 expressions. FILE is the stdio stream to write to, X is the rtx, and
8429 CODE is the operand print code from the output string. */
8432 output_pic_addr_const (FILE *file, rtx x, int code)
8436 switch (GET_CODE (x))
8439 gcc_assert (flag_pic);
8444 if (! TARGET_MACHO || TARGET_64BIT)
8445 output_addr_const (file, x);
8448 const char *name = XSTR (x, 0);
8450 /* Mark the decl as referenced so that cgraph will
8451 output the function. */
8452 if (SYMBOL_REF_DECL (x))
8453 mark_decl_referenced (SYMBOL_REF_DECL (x));
8456 if (MACHOPIC_INDIRECT
8457 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8458 name = machopic_indirection_name (x, /*stub_p=*/true);
8460 assemble_name (file, name);
8462 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
8463 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8464 fputs ("@PLT", file);
8471 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8472 assemble_name (asm_out_file, buf);
8476 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8480 /* This used to output parentheses around the expression,
8481 but that does not work on the 386 (either ATT or BSD assembler). */
8482 output_pic_addr_const (file, XEXP (x, 0), code);
8486 if (GET_MODE (x) == VOIDmode)
8488 /* We can use %d if the number is <32 bits and positive. */
8489 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8490 fprintf (file, "0x%lx%08lx",
8491 (unsigned long) CONST_DOUBLE_HIGH (x),
8492 (unsigned long) CONST_DOUBLE_LOW (x));
8494 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8497 /* We can't handle floating point constants;
8498 PRINT_OPERAND must handle them. */
8499 output_operand_lossage ("floating constant misused");
8503 /* Some assemblers need integer constants to appear first. */
8504 if (CONST_INT_P (XEXP (x, 0)))
8506 output_pic_addr_const (file, XEXP (x, 0), code);
8508 output_pic_addr_const (file, XEXP (x, 1), code);
8512 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8513 output_pic_addr_const (file, XEXP (x, 1), code);
8515 output_pic_addr_const (file, XEXP (x, 0), code);
8521 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8522 output_pic_addr_const (file, XEXP (x, 0), code);
8524 output_pic_addr_const (file, XEXP (x, 1), code);
8526 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8530 gcc_assert (XVECLEN (x, 0) == 1);
8531 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8532 switch (XINT (x, 1))
8535 fputs ("@GOT", file);
8538 fputs ("@GOTOFF", file);
8541 fputs ("@PLTOFF", file);
8543 case UNSPEC_GOTPCREL:
8544 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8545 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8547 case UNSPEC_GOTTPOFF:
8548 /* FIXME: This might be @TPOFF in Sun ld too. */
8549 fputs ("@GOTTPOFF", file);
8552 fputs ("@TPOFF", file);
8556 fputs ("@TPOFF", file);
8558 fputs ("@NTPOFF", file);
8561 fputs ("@DTPOFF", file);
8563 case UNSPEC_GOTNTPOFF:
8565 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8566 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8568 fputs ("@GOTNTPOFF", file);
8570 case UNSPEC_INDNTPOFF:
8571 fputs ("@INDNTPOFF", file);
8574 output_operand_lossage ("invalid UNSPEC as operand");
8580 output_operand_lossage ("invalid expression as operand");
8584 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8585 We need to emit DTP-relative relocations. */
8587 static void ATTRIBUTE_UNUSED
8588 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8590 fputs (ASM_LONG, file);
8591 output_addr_const (file, x);
8592 fputs ("@DTPOFF", file);
8598 fputs (", 0", file);
8605 /* In the name of slightly smaller debug output, and to cater to
8606 general assembler lossage, recognize PIC+GOTOFF and turn it back
8607 into a direct symbol reference.
8609 On Darwin, this is necessary to avoid a crash, because Darwin
8610 has a different PIC label for each routine but the DWARF debugging
8611 information is not associated with any particular routine, so it's
8612 necessary to remove references to the PIC label from RTL stored by
8613 the DWARF output code. */
8616 ix86_delegitimize_address (rtx orig_x)
8619 /* reg_addend is NULL or a multiple of some register. */
8620 rtx reg_addend = NULL_RTX;
8621 /* const_addend is NULL or a const_int. */
8622 rtx const_addend = NULL_RTX;
8623 /* This is the result, or NULL. */
8624 rtx result = NULL_RTX;
8631 if (GET_CODE (x) != CONST
8632 || GET_CODE (XEXP (x, 0)) != UNSPEC
8633 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8636 return XVECEXP (XEXP (x, 0), 0, 0);
8639 if (GET_CODE (x) != PLUS
8640 || GET_CODE (XEXP (x, 1)) != CONST)
8643 if (REG_P (XEXP (x, 0))
8644 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8645 /* %ebx + GOT/GOTOFF */
8647 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8649 /* %ebx + %reg * scale + GOT/GOTOFF */
8650 reg_addend = XEXP (x, 0);
8651 if (REG_P (XEXP (reg_addend, 0))
8652 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8653 reg_addend = XEXP (reg_addend, 1);
8654 else if (REG_P (XEXP (reg_addend, 1))
8655 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8656 reg_addend = XEXP (reg_addend, 0);
8659 if (!REG_P (reg_addend)
8660 && GET_CODE (reg_addend) != MULT
8661 && GET_CODE (reg_addend) != ASHIFT)
8667 x = XEXP (XEXP (x, 1), 0);
8668 if (GET_CODE (x) == PLUS
8669 && CONST_INT_P (XEXP (x, 1)))
8671 const_addend = XEXP (x, 1);
8675 if (GET_CODE (x) == UNSPEC
8676 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8677 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8678 result = XVECEXP (x, 0, 0);
8680 if (TARGET_MACHO && darwin_local_data_pic (x)
8682 result = XEXP (x, 0);
8688 result = gen_rtx_PLUS (Pmode, result, const_addend);
8690 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8694 /* If X is a machine specific address (i.e. a symbol or label being
8695 referenced as a displacement from the GOT implemented using an
8696 UNSPEC), then return the base term. Otherwise return X. */
8699 ix86_find_base_term (rtx x)
8705 if (GET_CODE (x) != CONST)
8708 if (GET_CODE (term) == PLUS
8709 && (CONST_INT_P (XEXP (term, 1))
8710 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8711 term = XEXP (term, 0);
8712 if (GET_CODE (term) != UNSPEC
8713 || XINT (term, 1) != UNSPEC_GOTPCREL)
8716 term = XVECEXP (term, 0, 0);
8718 if (GET_CODE (term) != SYMBOL_REF
8719 && GET_CODE (term) != LABEL_REF)
8725 term = ix86_delegitimize_address (x);
8727 if (GET_CODE (term) != SYMBOL_REF
8728 && GET_CODE (term) != LABEL_REF)
8735 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8740 if (mode == CCFPmode || mode == CCFPUmode)
8742 enum rtx_code second_code, bypass_code;
8743 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8744 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8745 code = ix86_fp_compare_code_to_integer (code);
8749 code = reverse_condition (code);
8800 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8804 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8805 Those same assemblers have the same but opposite lossage on cmov. */
8807 suffix = fp ? "nbe" : "a";
8808 else if (mode == CCCmode)
8831 gcc_assert (mode == CCmode || mode == CCCmode);
8853 gcc_assert (mode == CCmode || mode == CCCmode);
8854 suffix = fp ? "nb" : "ae";
8857 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8864 else if (mode == CCCmode)
8865 suffix = fp ? "nb" : "ae";
8870 suffix = fp ? "u" : "p";
8873 suffix = fp ? "nu" : "np";
8878 fputs (suffix, file);
8881 /* Print the name of register X to FILE based on its machine mode and number.
8882 If CODE is 'w', pretend the mode is HImode.
8883 If CODE is 'b', pretend the mode is QImode.
8884 If CODE is 'k', pretend the mode is SImode.
8885 If CODE is 'q', pretend the mode is DImode.
8886 If CODE is 'h', pretend the reg is the 'high' byte register.
8887 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8890 print_reg (rtx x, int code, FILE *file)
8892 gcc_assert (x == pc_rtx
8893 || (REGNO (x) != ARG_POINTER_REGNUM
8894 && REGNO (x) != FRAME_POINTER_REGNUM
8895 && REGNO (x) != FLAGS_REG
8896 && REGNO (x) != FPSR_REG
8897 && REGNO (x) != FPCR_REG));
8899 if (ASSEMBLER_DIALECT == ASM_ATT)
8904 gcc_assert (TARGET_64BIT);
8905 fputs ("rip", file);
8909 if (code == 'w' || MMX_REG_P (x))
8911 else if (code == 'b')
8913 else if (code == 'k')
8915 else if (code == 'q')
8917 else if (code == 'y')
8919 else if (code == 'h')
8922 code = GET_MODE_SIZE (GET_MODE (x));
8924 /* Irritatingly, AMD extended registers use different naming convention
8925 from the normal registers. */
8926 if (REX_INT_REG_P (x))
8928 gcc_assert (TARGET_64BIT);
8932 error ("extended registers have no high halves");
8935 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8938 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8941 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8944 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8947 error ("unsupported operand size for extended register");
8955 if (STACK_TOP_P (x))
8957 fputs ("st(0)", file);
8964 if (! ANY_FP_REG_P (x))
8965 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8970 fputs (hi_reg_name[REGNO (x)], file);
8973 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8975 fputs (qi_reg_name[REGNO (x)], file);
8978 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8980 fputs (qi_high_reg_name[REGNO (x)], file);
8987 /* Locate some local-dynamic symbol still in use by this function
8988 so that we can print its name in some tls_local_dynamic_base
8992 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8996 if (GET_CODE (x) == SYMBOL_REF
8997 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8999 cfun->machine->some_ld_name = XSTR (x, 0);
9007 get_some_local_dynamic_name (void)
9011 if (cfun->machine->some_ld_name)
9012 return cfun->machine->some_ld_name;
9014 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9016 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9017 return cfun->machine->some_ld_name;
9023 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
9024 C -- print opcode suffix for set/cmov insn.
9025 c -- like C, but print reversed condition
9026 F,f -- likewise, but for floating-point.
9027 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
9029 R -- print the prefix for register names.
9030 z -- print the opcode suffix for the size of the current operand.
9031 * -- print a star (in certain assembler syntax)
9032 A -- print an absolute memory reference.
9033 w -- print the operand as if it's a "word" (HImode) even if it isn't.
9034 s -- print a shift double count, followed by the assemblers argument
9036 b -- print the QImode name of the register for the indicated operand.
9037 %b0 would print %al if operands[0] is reg 0.
9038 w -- likewise, print the HImode name of the register.
9039 k -- likewise, print the SImode name of the register.
9040 q -- likewise, print the DImode name of the register.
9041 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
9042 y -- print "st(0)" instead of "st" as a register.
9043 D -- print condition for SSE cmp instruction.
9044 P -- if PIC, print an @PLT suffix.
9045 X -- don't print any sort of PIC '@' suffix for a symbol.
9046 & -- print some in-use local-dynamic symbol name.
9047 H -- print a memory address offset by 8; used for sse high-parts
9048 Y -- print condition for SSE5 com* instruction.
9049 + -- print a branch hint as 'cs' or 'ds' prefix
9050 ; -- print a semicolon (after prefixes due to bug in older gas).
9054 print_operand (FILE *file, rtx x, int code)
9061 if (ASSEMBLER_DIALECT == ASM_ATT)
9066 assemble_name (file, get_some_local_dynamic_name ());
9070 switch (ASSEMBLER_DIALECT)
9077 /* Intel syntax. For absolute addresses, registers should not
9078 be surrounded by braces. */
9082 PRINT_OPERAND (file, x, 0);
9092 PRINT_OPERAND (file, x, 0);
9097 if (ASSEMBLER_DIALECT == ASM_ATT)
9102 if (ASSEMBLER_DIALECT == ASM_ATT)
9107 if (ASSEMBLER_DIALECT == ASM_ATT)
9112 if (ASSEMBLER_DIALECT == ASM_ATT)
9117 if (ASSEMBLER_DIALECT == ASM_ATT)
9122 if (ASSEMBLER_DIALECT == ASM_ATT)
9127 /* 387 opcodes don't get size suffixes if the operands are
9129 if (STACK_REG_P (x))
9132 /* Likewise if using Intel opcodes. */
9133 if (ASSEMBLER_DIALECT == ASM_INTEL)
9136 /* This is the size of op from size of operand. */
9137 switch (GET_MODE_SIZE (GET_MODE (x)))
9146 #ifdef HAVE_GAS_FILDS_FISTS
9156 if (GET_MODE (x) == SFmode)
9171 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
9173 #ifdef GAS_MNEMONICS
9199 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
9201 PRINT_OPERAND (file, x, 0);
9207 /* Little bit of braindamage here. The SSE compare instructions
9208 does use completely different names for the comparisons that the
9209 fp conditional moves. */
9210 switch (GET_CODE (x))
9225 fputs ("unord", file);
9229 fputs ("neq", file);
9233 fputs ("nlt", file);
9237 fputs ("nle", file);
9240 fputs ("ord", file);
9247 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9248 if (ASSEMBLER_DIALECT == ASM_ATT)
9250 switch (GET_MODE (x))
9252 case HImode: putc ('w', file); break;
9254 case SFmode: putc ('l', file); break;
9256 case DFmode: putc ('q', file); break;
9257 default: gcc_unreachable ();
9264 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
9267 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9268 if (ASSEMBLER_DIALECT == ASM_ATT)
9271 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
9274 /* Like above, but reverse condition */
9276 /* Check to see if argument to %c is really a constant
9277 and not a condition code which needs to be reversed. */
9278 if (!COMPARISON_P (x))
9280 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9283 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9286 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9287 if (ASSEMBLER_DIALECT == ASM_ATT)
9290 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
9294 /* It doesn't actually matter what mode we use here, as we're
9295 only going to use this for printing. */
9296 x = adjust_address_nv (x, DImode, 8);
9303 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9306 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9309 int pred_val = INTVAL (XEXP (x, 0));
9311 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9312 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9314 int taken = pred_val > REG_BR_PROB_BASE / 2;
9315 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9317 /* Emit hints only in the case default branch prediction
9318 heuristics would fail. */
9319 if (taken != cputaken)
9321 /* We use 3e (DS) prefix for taken branches and
9322 2e (CS) prefix for not taken branches. */
9324 fputs ("ds ; ", file);
9326 fputs ("cs ; ", file);
9334 switch (GET_CODE (x))
9337 fputs ("neq", file);
9344 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9348 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9359 fputs ("unord", file);
9362 fputs ("ord", file);
9365 fputs ("ueq", file);
9368 fputs ("nlt", file);
9371 fputs ("nle", file);
9374 fputs ("ule", file);
9377 fputs ("ult", file);
9380 fputs ("une", file);
9389 fputs (" ; ", file);
9396 output_operand_lossage ("invalid operand code '%c'", code);
9401 print_reg (x, code, file);
9405 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9406 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9407 && GET_MODE (x) != BLKmode)
9410 switch (GET_MODE_SIZE (GET_MODE (x)))
9412 case 1: size = "BYTE"; break;
9413 case 2: size = "WORD"; break;
9414 case 4: size = "DWORD"; break;
9415 case 8: size = "QWORD"; break;
9416 case 12: size = "XWORD"; break;
9418 if (GET_MODE (x) == XFmode)
9427 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9430 else if (code == 'w')
9432 else if (code == 'k')
9436 fputs (" PTR ", file);
9440 /* Avoid (%rip) for call operands. */
9441 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9442 && !CONST_INT_P (x))
9443 output_addr_const (file, x);
9444 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9445 output_operand_lossage ("invalid constraints for operand");
9450 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9455 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9456 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9458 if (ASSEMBLER_DIALECT == ASM_ATT)
9460 fprintf (file, "0x%08lx", (long unsigned int) l);
9463 /* These float cases don't actually occur as immediate operands. */
9464 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9468 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9469 fprintf (file, "%s", dstr);
9472 else if (GET_CODE (x) == CONST_DOUBLE
9473 && GET_MODE (x) == XFmode)
9477 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9478 fprintf (file, "%s", dstr);
9483 /* We have patterns that allow zero sets of memory, for instance.
9484 In 64-bit mode, we should probably support all 8-byte vectors,
9485 since we can in fact encode that into an immediate. */
9486 if (GET_CODE (x) == CONST_VECTOR)
9488 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9494 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9496 if (ASSEMBLER_DIALECT == ASM_ATT)
9499 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9500 || GET_CODE (x) == LABEL_REF)
9502 if (ASSEMBLER_DIALECT == ASM_ATT)
9505 fputs ("OFFSET FLAT:", file);
9508 if (CONST_INT_P (x))
9509 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9511 output_pic_addr_const (file, x, code);
9513 output_addr_const (file, x);
9517 /* Print a memory operand whose address is ADDR. */
9520 print_operand_address (FILE *file, rtx addr)
9522 struct ix86_address parts;
9523 rtx base, index, disp;
9525 int ok = ix86_decompose_address (addr, &parts);
9530 index = parts.index;
9532 scale = parts.scale;
9540 if (ASSEMBLER_DIALECT == ASM_ATT)
9542 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9548 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9549 if (TARGET_64BIT && !base && !index)
9553 if (GET_CODE (disp) == CONST
9554 && GET_CODE (XEXP (disp, 0)) == PLUS
9555 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9556 symbol = XEXP (XEXP (disp, 0), 0);
9558 if (GET_CODE (symbol) == LABEL_REF
9559 || (GET_CODE (symbol) == SYMBOL_REF
9560 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9563 if (!base && !index)
9565 /* Displacement only requires special attention. */
9567 if (CONST_INT_P (disp))
9569 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9570 fputs ("ds:", file);
9571 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9574 output_pic_addr_const (file, disp, 0);
9576 output_addr_const (file, disp);
9580 if (ASSEMBLER_DIALECT == ASM_ATT)
9585 output_pic_addr_const (file, disp, 0);
9586 else if (GET_CODE (disp) == LABEL_REF)
9587 output_asm_label (disp);
9589 output_addr_const (file, disp);
9594 print_reg (base, 0, file);
9598 print_reg (index, 0, file);
9600 fprintf (file, ",%d", scale);
9606 rtx offset = NULL_RTX;
9610 /* Pull out the offset of a symbol; print any symbol itself. */
9611 if (GET_CODE (disp) == CONST
9612 && GET_CODE (XEXP (disp, 0)) == PLUS
9613 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9615 offset = XEXP (XEXP (disp, 0), 1);
9616 disp = gen_rtx_CONST (VOIDmode,
9617 XEXP (XEXP (disp, 0), 0));
9621 output_pic_addr_const (file, disp, 0);
9622 else if (GET_CODE (disp) == LABEL_REF)
9623 output_asm_label (disp);
9624 else if (CONST_INT_P (disp))
9627 output_addr_const (file, disp);
9633 print_reg (base, 0, file);
9636 if (INTVAL (offset) >= 0)
9638 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9642 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9649 print_reg (index, 0, file);
9651 fprintf (file, "*%d", scale);
9659 output_addr_const_extra (FILE *file, rtx x)
9663 if (GET_CODE (x) != UNSPEC)
9666 op = XVECEXP (x, 0, 0);
9667 switch (XINT (x, 1))
9669 case UNSPEC_GOTTPOFF:
9670 output_addr_const (file, op);
9671 /* FIXME: This might be @TPOFF in Sun ld. */
9672 fputs ("@GOTTPOFF", file);
9675 output_addr_const (file, op);
9676 fputs ("@TPOFF", file);
9679 output_addr_const (file, op);
9681 fputs ("@TPOFF", file);
9683 fputs ("@NTPOFF", file);
9686 output_addr_const (file, op);
9687 fputs ("@DTPOFF", file);
9689 case UNSPEC_GOTNTPOFF:
9690 output_addr_const (file, op);
9692 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9693 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9695 fputs ("@GOTNTPOFF", file);
9697 case UNSPEC_INDNTPOFF:
9698 output_addr_const (file, op);
9699 fputs ("@INDNTPOFF", file);
9709 /* Split one or more DImode RTL references into pairs of SImode
9710 references. The RTL can be REG, offsettable MEM, integer constant, or
9711 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9712 split and "num" is its length. lo_half and hi_half are output arrays
9713 that parallel "operands". */
9716 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9720 rtx op = operands[num];
9722 /* simplify_subreg refuse to split volatile memory addresses,
9723 but we still have to handle it. */
9726 lo_half[num] = adjust_address (op, SImode, 0);
9727 hi_half[num] = adjust_address (op, SImode, 4);
9731 lo_half[num] = simplify_gen_subreg (SImode, op,
9732 GET_MODE (op) == VOIDmode
9733 ? DImode : GET_MODE (op), 0);
9734 hi_half[num] = simplify_gen_subreg (SImode, op,
9735 GET_MODE (op) == VOIDmode
9736 ? DImode : GET_MODE (op), 4);
9740 /* Split one or more TImode RTL references into pairs of DImode
9741 references. The RTL can be REG, offsettable MEM, integer constant, or
9742 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9743 split and "num" is its length. lo_half and hi_half are output arrays
9744 that parallel "operands". */
9747 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9751 rtx op = operands[num];
9753 /* simplify_subreg refuse to split volatile memory addresses, but we
9754 still have to handle it. */
9757 lo_half[num] = adjust_address (op, DImode, 0);
9758 hi_half[num] = adjust_address (op, DImode, 8);
9762 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9763 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9768 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9769 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9770 is the expression of the binary operation. The output may either be
9771 emitted here, or returned to the caller, like all output_* functions.
9773 There is no guarantee that the operands are the same mode, as they
9774 might be within FLOAT or FLOAT_EXTEND expressions. */
9776 #ifndef SYSV386_COMPAT
9777 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9778 wants to fix the assemblers because that causes incompatibility
9779 with gcc. No-one wants to fix gcc because that causes
9780 incompatibility with assemblers... You can use the option of
9781 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9782 #define SYSV386_COMPAT 1
9786 output_387_binary_op (rtx insn, rtx *operands)
9788 static char buf[30];
9791 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9793 #ifdef ENABLE_CHECKING
9794 /* Even if we do not want to check the inputs, this documents input
9795 constraints. Which helps in understanding the following code. */
9796 if (STACK_REG_P (operands[0])
9797 && ((REG_P (operands[1])
9798 && REGNO (operands[0]) == REGNO (operands[1])
9799 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9800 || (REG_P (operands[2])
9801 && REGNO (operands[0]) == REGNO (operands[2])
9802 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9803 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9806 gcc_assert (is_sse);
9809 switch (GET_CODE (operands[3]))
9812 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9813 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9821 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9822 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9830 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9831 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9839 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9840 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9854 if (GET_MODE (operands[0]) == SFmode)
9855 strcat (buf, "ss\t{%2, %0|%0, %2}");
9857 strcat (buf, "sd\t{%2, %0|%0, %2}");
9862 switch (GET_CODE (operands[3]))
9866 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9868 rtx temp = operands[2];
9869 operands[2] = operands[1];
9873 /* know operands[0] == operands[1]. */
9875 if (MEM_P (operands[2]))
9881 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9883 if (STACK_TOP_P (operands[0]))
9884 /* How is it that we are storing to a dead operand[2]?
9885 Well, presumably operands[1] is dead too. We can't
9886 store the result to st(0) as st(0) gets popped on this
9887 instruction. Instead store to operands[2] (which I
9888 think has to be st(1)). st(1) will be popped later.
9889 gcc <= 2.8.1 didn't have this check and generated
9890 assembly code that the Unixware assembler rejected. */
9891 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9893 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9897 if (STACK_TOP_P (operands[0]))
9898 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9900 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9905 if (MEM_P (operands[1]))
9911 if (MEM_P (operands[2]))
9917 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9920 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9921 derived assemblers, confusingly reverse the direction of
9922 the operation for fsub{r} and fdiv{r} when the
9923 destination register is not st(0). The Intel assembler
9924 doesn't have this brain damage. Read !SYSV386_COMPAT to
9925 figure out what the hardware really does. */
9926 if (STACK_TOP_P (operands[0]))
9927 p = "{p\t%0, %2|rp\t%2, %0}";
9929 p = "{rp\t%2, %0|p\t%0, %2}";
9931 if (STACK_TOP_P (operands[0]))
9932 /* As above for fmul/fadd, we can't store to st(0). */
9933 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9935 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9940 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9943 if (STACK_TOP_P (operands[0]))
9944 p = "{rp\t%0, %1|p\t%1, %0}";
9946 p = "{p\t%1, %0|rp\t%0, %1}";
9948 if (STACK_TOP_P (operands[0]))
9949 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9951 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9956 if (STACK_TOP_P (operands[0]))
9958 if (STACK_TOP_P (operands[1]))
9959 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9961 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9964 else if (STACK_TOP_P (operands[1]))
9967 p = "{\t%1, %0|r\t%0, %1}";
9969 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9975 p = "{r\t%2, %0|\t%0, %2}";
9977 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9990 /* Return needed mode for entity in optimize_mode_switching pass. */
9993 ix86_mode_needed (int entity, rtx insn)
9995 enum attr_i387_cw mode;
9997 /* The mode UNINITIALIZED is used to store control word after a
9998 function call or ASM pattern. The mode ANY specify that function
9999 has no requirements on the control word and make no changes in the
10000 bits we are interested in. */
10003 || (NONJUMP_INSN_P (insn)
10004 && (asm_noperands (PATTERN (insn)) >= 0
10005 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
10006 return I387_CW_UNINITIALIZED;
10008 if (recog_memoized (insn) < 0)
10009 return I387_CW_ANY;
10011 mode = get_attr_i387_cw (insn);
10016 if (mode == I387_CW_TRUNC)
10021 if (mode == I387_CW_FLOOR)
10026 if (mode == I387_CW_CEIL)
10031 if (mode == I387_CW_MASK_PM)
10036 gcc_unreachable ();
10039 return I387_CW_ANY;
10042 /* Output code to initialize control word copies used by trunc?f?i and
10043 rounding patterns. CURRENT_MODE is set to current control word,
10044 while NEW_MODE is set to new control word. */
10047 emit_i387_cw_initialization (int mode)
10049 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
10052 enum ix86_stack_slot slot;
10054 rtx reg = gen_reg_rtx (HImode);
10056 emit_insn (gen_x86_fnstcw_1 (stored_mode));
10057 emit_move_insn (reg, copy_rtx (stored_mode));
10059 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
10063 case I387_CW_TRUNC:
10064 /* round toward zero (truncate) */
10065 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
10066 slot = SLOT_CW_TRUNC;
10069 case I387_CW_FLOOR:
10070 /* round down toward -oo */
10071 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10072 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
10073 slot = SLOT_CW_FLOOR;
10077 /* round up toward +oo */
10078 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10079 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
10080 slot = SLOT_CW_CEIL;
10083 case I387_CW_MASK_PM:
10084 /* mask precision exception for nearbyint() */
10085 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
10086 slot = SLOT_CW_MASK_PM;
10090 gcc_unreachable ();
10097 case I387_CW_TRUNC:
10098 /* round toward zero (truncate) */
10099 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
10100 slot = SLOT_CW_TRUNC;
10103 case I387_CW_FLOOR:
10104 /* round down toward -oo */
10105 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
10106 slot = SLOT_CW_FLOOR;
10110 /* round up toward +oo */
10111 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
10112 slot = SLOT_CW_CEIL;
10115 case I387_CW_MASK_PM:
10116 /* mask precision exception for nearbyint() */
10117 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
10118 slot = SLOT_CW_MASK_PM;
10122 gcc_unreachable ();
10126 gcc_assert (slot < MAX_386_STACK_LOCALS);
10128 new_mode = assign_386_stack_local (HImode, slot);
10129 emit_move_insn (new_mode, reg);
10132 /* Output code for INSN to convert a float to a signed int. OPERANDS
10133 are the insn operands. The output may be [HSD]Imode and the input
10134 operand may be [SDX]Fmode. */
10137 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
10139 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10140 int dimode_p = GET_MODE (operands[0]) == DImode;
10141 int round_mode = get_attr_i387_cw (insn);
10143 /* Jump through a hoop or two for DImode, since the hardware has no
10144 non-popping instruction. We used to do this a different way, but
10145 that was somewhat fragile and broke with post-reload splitters. */
10146 if ((dimode_p || fisttp) && !stack_top_dies)
10147 output_asm_insn ("fld\t%y1", operands);
10149 gcc_assert (STACK_TOP_P (operands[1]));
10150 gcc_assert (MEM_P (operands[0]));
10151 gcc_assert (GET_MODE (operands[1]) != TFmode);
10154 output_asm_insn ("fisttp%z0\t%0", operands);
10157 if (round_mode != I387_CW_ANY)
10158 output_asm_insn ("fldcw\t%3", operands);
10159 if (stack_top_dies || dimode_p)
10160 output_asm_insn ("fistp%z0\t%0", operands);
10162 output_asm_insn ("fist%z0\t%0", operands);
10163 if (round_mode != I387_CW_ANY)
10164 output_asm_insn ("fldcw\t%2", operands);
10170 /* Output code for x87 ffreep insn. The OPNO argument, which may only
10171 have the values zero or one, indicates the ffreep insn's operand
10172 from the OPERANDS array. */
10174 static const char *
10175 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
10177 if (TARGET_USE_FFREEP)
10178 #if HAVE_AS_IX86_FFREEP
10179 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
10182 static char retval[] = ".word\t0xc_df";
10183 int regno = REGNO (operands[opno]);
10185 gcc_assert (FP_REGNO_P (regno));
10187 retval[9] = '0' + (regno - FIRST_STACK_REG);
10192 return opno ? "fstp\t%y1" : "fstp\t%y0";
10196 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
10197 should be used. UNORDERED_P is true when fucom should be used. */
10200 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
10202 int stack_top_dies;
10203 rtx cmp_op0, cmp_op1;
10204 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
10208 cmp_op0 = operands[0];
10209 cmp_op1 = operands[1];
10213 cmp_op0 = operands[1];
10214 cmp_op1 = operands[2];
10219 if (GET_MODE (operands[0]) == SFmode)
10221 return "ucomiss\t{%1, %0|%0, %1}";
10223 return "comiss\t{%1, %0|%0, %1}";
10226 return "ucomisd\t{%1, %0|%0, %1}";
10228 return "comisd\t{%1, %0|%0, %1}";
10231 gcc_assert (STACK_TOP_P (cmp_op0));
10233 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10235 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
10237 if (stack_top_dies)
10239 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
10240 return output_387_ffreep (operands, 1);
10243 return "ftst\n\tfnstsw\t%0";
10246 if (STACK_REG_P (cmp_op1)
10248 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
10249 && REGNO (cmp_op1) != FIRST_STACK_REG)
10251 /* If both the top of the 387 stack dies, and the other operand
10252 is also a stack register that dies, then this must be a
10253 `fcompp' float compare */
10257 /* There is no double popping fcomi variant. Fortunately,
10258 eflags is immune from the fstp's cc clobbering. */
10260 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
10262 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
10263 return output_387_ffreep (operands, 0);
10268 return "fucompp\n\tfnstsw\t%0";
10270 return "fcompp\n\tfnstsw\t%0";
10275 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
10277 static const char * const alt[16] =
10279 "fcom%z2\t%y2\n\tfnstsw\t%0",
10280 "fcomp%z2\t%y2\n\tfnstsw\t%0",
10281 "fucom%z2\t%y2\n\tfnstsw\t%0",
10282 "fucomp%z2\t%y2\n\tfnstsw\t%0",
10284 "ficom%z2\t%y2\n\tfnstsw\t%0",
10285 "ficomp%z2\t%y2\n\tfnstsw\t%0",
10289 "fcomi\t{%y1, %0|%0, %y1}",
10290 "fcomip\t{%y1, %0|%0, %y1}",
10291 "fucomi\t{%y1, %0|%0, %y1}",
10292 "fucomip\t{%y1, %0|%0, %y1}",
10303 mask = eflags_p << 3;
10304 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10305 mask |= unordered_p << 1;
10306 mask |= stack_top_dies;
10308 gcc_assert (mask < 16);
10317 ix86_output_addr_vec_elt (FILE *file, int value)
10319 const char *directive = ASM_LONG;
10323 directive = ASM_QUAD;
10325 gcc_assert (!TARGET_64BIT);
10328 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10332 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10334 const char *directive = ASM_LONG;
10337 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10338 directive = ASM_QUAD;
10340 gcc_assert (!TARGET_64BIT);
10342 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10343 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10344 fprintf (file, "%s%s%d-%s%d\n",
10345 directive, LPREFIX, value, LPREFIX, rel);
10346 else if (HAVE_AS_GOTOFF_IN_DATA)
10347 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10349 else if (TARGET_MACHO)
10351 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10352 machopic_output_function_base_name (file);
10353 fprintf(file, "\n");
10357 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10358 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10361 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10365 ix86_expand_clear (rtx dest)
10369 /* We play register width games, which are only valid after reload. */
10370 gcc_assert (reload_completed);
10372 /* Avoid HImode and its attendant prefix byte. */
10373 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10374 dest = gen_rtx_REG (SImode, REGNO (dest));
10375 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10377 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10378 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10380 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10381 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10387 /* X is an unchanging MEM. If it is a constant pool reference, return
10388 the constant pool rtx, else NULL. */
10391 maybe_get_pool_constant (rtx x)
10393 x = ix86_delegitimize_address (XEXP (x, 0));
10395 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10396 return get_pool_constant (x);
10402 ix86_expand_move (enum machine_mode mode, rtx operands[])
10405 enum tls_model model;
10410 if (GET_CODE (op1) == SYMBOL_REF)
10412 model = SYMBOL_REF_TLS_MODEL (op1);
10415 op1 = legitimize_tls_address (op1, model, true);
10416 op1 = force_operand (op1, op0);
10420 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10421 && SYMBOL_REF_DLLIMPORT_P (op1))
10422 op1 = legitimize_dllimport_symbol (op1, false);
10424 else if (GET_CODE (op1) == CONST
10425 && GET_CODE (XEXP (op1, 0)) == PLUS
10426 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10428 rtx addend = XEXP (XEXP (op1, 0), 1);
10429 rtx symbol = XEXP (XEXP (op1, 0), 0);
10432 model = SYMBOL_REF_TLS_MODEL (symbol);
10434 tmp = legitimize_tls_address (symbol, model, true);
10435 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10436 && SYMBOL_REF_DLLIMPORT_P (symbol))
10437 tmp = legitimize_dllimport_symbol (symbol, true);
10441 tmp = force_operand (tmp, NULL);
10442 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10443 op0, 1, OPTAB_DIRECT);
10449 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10451 if (TARGET_MACHO && !TARGET_64BIT)
10456 rtx temp = ((reload_in_progress
10457 || ((op0 && REG_P (op0))
10459 ? op0 : gen_reg_rtx (Pmode));
10460 op1 = machopic_indirect_data_reference (op1, temp);
10461 op1 = machopic_legitimize_pic_address (op1, mode,
10462 temp == op1 ? 0 : temp);
10464 else if (MACHOPIC_INDIRECT)
10465 op1 = machopic_indirect_data_reference (op1, 0);
10473 op1 = force_reg (Pmode, op1);
10474 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10476 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10477 op1 = legitimize_pic_address (op1, reg);
10486 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10487 || !push_operand (op0, mode))
10489 op1 = force_reg (mode, op1);
10491 if (push_operand (op0, mode)
10492 && ! general_no_elim_operand (op1, mode))
10493 op1 = copy_to_mode_reg (mode, op1);
10495 /* Force large constants in 64bit compilation into register
10496 to get them CSEed. */
10497 if (can_create_pseudo_p ()
10498 && (mode == DImode) && TARGET_64BIT
10499 && immediate_operand (op1, mode)
10500 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10501 && !register_operand (op0, mode)
10503 op1 = copy_to_mode_reg (mode, op1);
10505 if (can_create_pseudo_p ()
10506 && FLOAT_MODE_P (mode)
10507 && GET_CODE (op1) == CONST_DOUBLE)
10509 /* If we are loading a floating point constant to a register,
10510 force the value to memory now, since we'll get better code
10511 out the back end. */
10513 op1 = validize_mem (force_const_mem (mode, op1));
10514 if (!register_operand (op0, mode))
10516 rtx temp = gen_reg_rtx (mode);
10517 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10518 emit_move_insn (op0, temp);
10524 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10528 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10530 rtx op0 = operands[0], op1 = operands[1];
10531 unsigned int align = GET_MODE_ALIGNMENT (mode);
10533 /* Force constants other than zero into memory. We do not know how
10534 the instructions used to build constants modify the upper 64 bits
10535 of the register, once we have that information we may be able
10536 to handle some of them more efficiently. */
10537 if (can_create_pseudo_p ()
10538 && register_operand (op0, mode)
10539 && (CONSTANT_P (op1)
10540 || (GET_CODE (op1) == SUBREG
10541 && CONSTANT_P (SUBREG_REG (op1))))
10542 && standard_sse_constant_p (op1) <= 0)
10543 op1 = validize_mem (force_const_mem (mode, op1));
10545 /* We need to check memory alignment for SSE mode since attribute
10546 can make operands unaligned. */
10547 if (can_create_pseudo_p ()
10548 && SSE_REG_MODE_P (mode)
10549 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10550 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10554 /* ix86_expand_vector_move_misalign() does not like constants ... */
10555 if (CONSTANT_P (op1)
10556 || (GET_CODE (op1) == SUBREG
10557 && CONSTANT_P (SUBREG_REG (op1))))
10558 op1 = validize_mem (force_const_mem (mode, op1));
10560 /* ... nor both arguments in memory. */
10561 if (!register_operand (op0, mode)
10562 && !register_operand (op1, mode))
10563 op1 = force_reg (mode, op1);
10565 tmp[0] = op0; tmp[1] = op1;
10566 ix86_expand_vector_move_misalign (mode, tmp);
10570 /* Make operand1 a register if it isn't already. */
10571 if (can_create_pseudo_p ()
10572 && !register_operand (op0, mode)
10573 && !register_operand (op1, mode))
10575 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10579 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10582 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10583 straight to ix86_expand_vector_move. */
10584 /* Code generation for scalar reg-reg moves of single and double precision data:
10585 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10589 if (x86_sse_partial_reg_dependency == true)
10594 Code generation for scalar loads of double precision data:
10595 if (x86_sse_split_regs == true)
10596 movlpd mem, reg (gas syntax)
10600 Code generation for unaligned packed loads of single precision data
10601 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10602 if (x86_sse_unaligned_move_optimal)
10605 if (x86_sse_partial_reg_dependency == true)
10617 Code generation for unaligned packed loads of double precision data
10618 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10619 if (x86_sse_unaligned_move_optimal)
10622 if (x86_sse_split_regs == true)
10635 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10644 /* If we're optimizing for size, movups is the smallest. */
10647 op0 = gen_lowpart (V4SFmode, op0);
10648 op1 = gen_lowpart (V4SFmode, op1);
10649 emit_insn (gen_sse_movups (op0, op1));
10653 /* ??? If we have typed data, then it would appear that using
10654 movdqu is the only way to get unaligned data loaded with
10656 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10658 op0 = gen_lowpart (V16QImode, op0);
10659 op1 = gen_lowpart (V16QImode, op1);
10660 emit_insn (gen_sse2_movdqu (op0, op1));
10664 if (TARGET_SSE2 && mode == V2DFmode)
10668 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10670 op0 = gen_lowpart (V2DFmode, op0);
10671 op1 = gen_lowpart (V2DFmode, op1);
10672 emit_insn (gen_sse2_movupd (op0, op1));
10676 /* When SSE registers are split into halves, we can avoid
10677 writing to the top half twice. */
10678 if (TARGET_SSE_SPLIT_REGS)
10680 emit_clobber (op0);
10685 /* ??? Not sure about the best option for the Intel chips.
10686 The following would seem to satisfy; the register is
10687 entirely cleared, breaking the dependency chain. We
10688 then store to the upper half, with a dependency depth
10689 of one. A rumor has it that Intel recommends two movsd
10690 followed by an unpacklpd, but this is unconfirmed. And
10691 given that the dependency depth of the unpacklpd would
10692 still be one, I'm not sure why this would be better. */
10693 zero = CONST0_RTX (V2DFmode);
10696 m = adjust_address (op1, DFmode, 0);
10697 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10698 m = adjust_address (op1, DFmode, 8);
10699 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10703 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10705 op0 = gen_lowpart (V4SFmode, op0);
10706 op1 = gen_lowpart (V4SFmode, op1);
10707 emit_insn (gen_sse_movups (op0, op1));
10711 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10712 emit_move_insn (op0, CONST0_RTX (mode));
10714 emit_clobber (op0);
10716 if (mode != V4SFmode)
10717 op0 = gen_lowpart (V4SFmode, op0);
10718 m = adjust_address (op1, V2SFmode, 0);
10719 emit_insn (gen_sse_loadlps (op0, op0, m));
10720 m = adjust_address (op1, V2SFmode, 8);
10721 emit_insn (gen_sse_loadhps (op0, op0, m));
10724 else if (MEM_P (op0))
10726 /* If we're optimizing for size, movups is the smallest. */
10729 op0 = gen_lowpart (V4SFmode, op0);
10730 op1 = gen_lowpart (V4SFmode, op1);
10731 emit_insn (gen_sse_movups (op0, op1));
10735 /* ??? Similar to above, only less clear because of quote
10736 typeless stores unquote. */
10737 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10738 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10740 op0 = gen_lowpart (V16QImode, op0);
10741 op1 = gen_lowpart (V16QImode, op1);
10742 emit_insn (gen_sse2_movdqu (op0, op1));
10746 if (TARGET_SSE2 && mode == V2DFmode)
10748 m = adjust_address (op0, DFmode, 0);
10749 emit_insn (gen_sse2_storelpd (m, op1));
10750 m = adjust_address (op0, DFmode, 8);
10751 emit_insn (gen_sse2_storehpd (m, op1));
10755 if (mode != V4SFmode)
10756 op1 = gen_lowpart (V4SFmode, op1);
10757 m = adjust_address (op0, V2SFmode, 0);
10758 emit_insn (gen_sse_storelps (m, op1));
10759 m = adjust_address (op0, V2SFmode, 8);
10760 emit_insn (gen_sse_storehps (m, op1));
10764 gcc_unreachable ();
10767 /* Expand a push in MODE. This is some mode for which we do not support
10768 proper push instructions, at least from the registers that we expect
10769 the value to live in. */
10772 ix86_expand_push (enum machine_mode mode, rtx x)
10776 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10777 GEN_INT (-GET_MODE_SIZE (mode)),
10778 stack_pointer_rtx, 1, OPTAB_DIRECT);
10779 if (tmp != stack_pointer_rtx)
10780 emit_move_insn (stack_pointer_rtx, tmp);
10782 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10783 emit_move_insn (tmp, x);
10786 /* Helper function of ix86_fixup_binary_operands to canonicalize
10787 operand order. Returns true if the operands should be swapped. */
10790 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10793 rtx dst = operands[0];
10794 rtx src1 = operands[1];
10795 rtx src2 = operands[2];
10797 /* If the operation is not commutative, we can't do anything. */
10798 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10801 /* Highest priority is that src1 should match dst. */
10802 if (rtx_equal_p (dst, src1))
10804 if (rtx_equal_p (dst, src2))
10807 /* Next highest priority is that immediate constants come second. */
10808 if (immediate_operand (src2, mode))
10810 if (immediate_operand (src1, mode))
10813 /* Lowest priority is that memory references should come second. */
10823 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10824 destination to use for the operation. If different from the true
10825 destination in operands[0], a copy operation will be required. */
10828 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10831 rtx dst = operands[0];
10832 rtx src1 = operands[1];
10833 rtx src2 = operands[2];
10835 /* Canonicalize operand order. */
10836 if (ix86_swap_binary_operands_p (code, mode, operands))
10840 /* It is invalid to swap operands of different modes. */
10841 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
10848 /* Both source operands cannot be in memory. */
10849 if (MEM_P (src1) && MEM_P (src2))
10851 /* Optimization: Only read from memory once. */
10852 if (rtx_equal_p (src1, src2))
10854 src2 = force_reg (mode, src2);
10858 src2 = force_reg (mode, src2);
10861 /* If the destination is memory, and we do not have matching source
10862 operands, do things in registers. */
10863 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10864 dst = gen_reg_rtx (mode);
10866 /* Source 1 cannot be a constant. */
10867 if (CONSTANT_P (src1))
10868 src1 = force_reg (mode, src1);
10870 /* Source 1 cannot be a non-matching memory. */
10871 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10872 src1 = force_reg (mode, src1);
10874 operands[1] = src1;
10875 operands[2] = src2;
10879 /* Similarly, but assume that the destination has already been
10880 set up properly. */
10883 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10884 enum machine_mode mode, rtx operands[])
10886 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10887 gcc_assert (dst == operands[0]);
10890 /* Attempt to expand a binary operator. Make the expansion closer to the
10891 actual machine, then just general_operand, which will allow 3 separate
10892 memory references (one output, two input) in a single insn. */
10895 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10898 rtx src1, src2, dst, op, clob;
10900 dst = ix86_fixup_binary_operands (code, mode, operands);
10901 src1 = operands[1];
10902 src2 = operands[2];
10904 /* Emit the instruction. */
10906 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10907 if (reload_in_progress)
10909 /* Reload doesn't know about the flags register, and doesn't know that
10910 it doesn't want to clobber it. We can only do this with PLUS. */
10911 gcc_assert (code == PLUS);
10916 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10917 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10920 /* Fix up the destination if needed. */
10921 if (dst != operands[0])
10922 emit_move_insn (operands[0], dst);
10925 /* Return TRUE or FALSE depending on whether the binary operator meets the
10926 appropriate constraints. */
10929 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10932 rtx dst = operands[0];
10933 rtx src1 = operands[1];
10934 rtx src2 = operands[2];
10936 /* Both source operands cannot be in memory. */
10937 if (MEM_P (src1) && MEM_P (src2))
10940 /* Canonicalize operand order for commutative operators. */
10941 if (ix86_swap_binary_operands_p (code, mode, operands))
10948 /* If the destination is memory, we must have a matching source operand. */
10949 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10952 /* Source 1 cannot be a constant. */
10953 if (CONSTANT_P (src1))
10956 /* Source 1 cannot be a non-matching memory. */
10957 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10963 /* Attempt to expand a unary operator. Make the expansion closer to the
10964 actual machine, then just general_operand, which will allow 2 separate
10965 memory references (one output, one input) in a single insn. */
10968 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10971 int matching_memory;
10972 rtx src, dst, op, clob;
10977 /* If the destination is memory, and we do not have matching source
10978 operands, do things in registers. */
10979 matching_memory = 0;
10982 if (rtx_equal_p (dst, src))
10983 matching_memory = 1;
10985 dst = gen_reg_rtx (mode);
10988 /* When source operand is memory, destination must match. */
10989 if (MEM_P (src) && !matching_memory)
10990 src = force_reg (mode, src);
10992 /* Emit the instruction. */
10994 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10995 if (reload_in_progress || code == NOT)
10997 /* Reload doesn't know about the flags register, and doesn't know that
10998 it doesn't want to clobber it. */
10999 gcc_assert (code == NOT);
11004 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11005 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
11008 /* Fix up the destination if needed. */
11009 if (dst != operands[0])
11010 emit_move_insn (operands[0], dst);
11013 /* Return TRUE or FALSE depending on whether the unary operator meets the
11014 appropriate constraints. */
11017 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
11018 enum machine_mode mode ATTRIBUTE_UNUSED,
11019 rtx operands[2] ATTRIBUTE_UNUSED)
11021 /* If one of operands is memory, source and destination must match. */
11022 if ((MEM_P (operands[0])
11023 || MEM_P (operands[1]))
11024 && ! rtx_equal_p (operands[0], operands[1]))
11029 /* Post-reload splitter for converting an SF or DFmode value in an
11030 SSE register into an unsigned SImode. */
11033 ix86_split_convert_uns_si_sse (rtx operands[])
11035 enum machine_mode vecmode;
11036 rtx value, large, zero_or_two31, input, two31, x;
11038 large = operands[1];
11039 zero_or_two31 = operands[2];
11040 input = operands[3];
11041 two31 = operands[4];
11042 vecmode = GET_MODE (large);
11043 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
11045 /* Load up the value into the low element. We must ensure that the other
11046 elements are valid floats -- zero is the easiest such value. */
11049 if (vecmode == V4SFmode)
11050 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
11052 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
11056 input = gen_rtx_REG (vecmode, REGNO (input));
11057 emit_move_insn (value, CONST0_RTX (vecmode));
11058 if (vecmode == V4SFmode)
11059 emit_insn (gen_sse_movss (value, value, input));
11061 emit_insn (gen_sse2_movsd (value, value, input));
11064 emit_move_insn (large, two31);
11065 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
11067 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
11068 emit_insn (gen_rtx_SET (VOIDmode, large, x));
11070 x = gen_rtx_AND (vecmode, zero_or_two31, large);
11071 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
11073 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
11074 emit_insn (gen_rtx_SET (VOIDmode, value, x));
11076 large = gen_rtx_REG (V4SImode, REGNO (large));
11077 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
11079 x = gen_rtx_REG (V4SImode, REGNO (value));
11080 if (vecmode == V4SFmode)
11081 emit_insn (gen_sse2_cvttps2dq (x, value));
11083 emit_insn (gen_sse2_cvttpd2dq (x, value));
11086 emit_insn (gen_xorv4si3 (value, value, large));
11089 /* Convert an unsigned DImode value into a DFmode, using only SSE.
11090 Expects the 64-bit DImode to be supplied in a pair of integral
11091 registers. Requires SSE2; will use SSE3 if available. For x86_32,
11092 -mfpmath=sse, !optimize_size only. */
11095 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
11097 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
11098 rtx int_xmm, fp_xmm;
11099 rtx biases, exponents;
11102 int_xmm = gen_reg_rtx (V4SImode);
11103 if (TARGET_INTER_UNIT_MOVES)
11104 emit_insn (gen_movdi_to_sse (int_xmm, input));
11105 else if (TARGET_SSE_SPLIT_REGS)
11107 emit_clobber (int_xmm);
11108 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
11112 x = gen_reg_rtx (V2DImode);
11113 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
11114 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
11117 x = gen_rtx_CONST_VECTOR (V4SImode,
11118 gen_rtvec (4, GEN_INT (0x43300000UL),
11119 GEN_INT (0x45300000UL),
11120 const0_rtx, const0_rtx));
11121 exponents = validize_mem (force_const_mem (V4SImode, x));
11123 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
11124 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
11126 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
11127 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
11128 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
11129 (0x1.0p84 + double(fp_value_hi_xmm)).
11130 Note these exponents differ by 32. */
11132 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
11134 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
11135 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
11136 real_ldexp (&bias_lo_rvt, &dconst1, 52);
11137 real_ldexp (&bias_hi_rvt, &dconst1, 84);
11138 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
11139 x = const_double_from_real_value (bias_hi_rvt, DFmode);
11140 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
11141 biases = validize_mem (force_const_mem (V2DFmode, biases));
11142 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
11144 /* Add the upper and lower DFmode values together. */
11146 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
11149 x = copy_to_mode_reg (V2DFmode, fp_xmm);
11150 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
11151 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
11154 ix86_expand_vector_extract (false, target, fp_xmm, 0);
11157 /* Not used, but eases macroization of patterns. */
11159 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
11160 rtx input ATTRIBUTE_UNUSED)
11162 gcc_unreachable ();
11165 /* Convert an unsigned SImode value into a DFmode. Only currently used
11166 for SSE, but applicable anywhere. */
11169 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
11171 REAL_VALUE_TYPE TWO31r;
11174 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
11175 NULL, 1, OPTAB_DIRECT);
11177 fp = gen_reg_rtx (DFmode);
11178 emit_insn (gen_floatsidf2 (fp, x));
11180 real_ldexp (&TWO31r, &dconst1, 31);
11181 x = const_double_from_real_value (TWO31r, DFmode);
11183 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
11185 emit_move_insn (target, x);
11188 /* Convert a signed DImode value into a DFmode. Only used for SSE in
11189 32-bit mode; otherwise we have a direct convert instruction. */
11192 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
11194 REAL_VALUE_TYPE TWO32r;
11195 rtx fp_lo, fp_hi, x;
11197 fp_lo = gen_reg_rtx (DFmode);
11198 fp_hi = gen_reg_rtx (DFmode);
11200 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
11202 real_ldexp (&TWO32r, &dconst1, 32);
11203 x = const_double_from_real_value (TWO32r, DFmode);
11204 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
11206 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
11208 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
11211 emit_move_insn (target, x);
11214 /* Convert an unsigned SImode value into a SFmode, using only SSE.
11215 For x86_32, -mfpmath=sse, !optimize_size only. */
11217 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
11219 REAL_VALUE_TYPE ONE16r;
11220 rtx fp_hi, fp_lo, int_hi, int_lo, x;
11222 real_ldexp (&ONE16r, &dconst1, 16);
11223 x = const_double_from_real_value (ONE16r, SFmode);
11224 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
11225 NULL, 0, OPTAB_DIRECT);
11226 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
11227 NULL, 0, OPTAB_DIRECT);
11228 fp_hi = gen_reg_rtx (SFmode);
11229 fp_lo = gen_reg_rtx (SFmode);
11230 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
11231 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
11232 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
11234 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
11236 if (!rtx_equal_p (target, fp_hi))
11237 emit_move_insn (target, fp_hi);
11240 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
11241 then replicate the value for all elements of the vector
11245 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
11252 v = gen_rtvec (4, value, value, value, value);
11253 return gen_rtx_CONST_VECTOR (V4SImode, v);
11257 v = gen_rtvec (2, value, value);
11258 return gen_rtx_CONST_VECTOR (V2DImode, v);
11262 v = gen_rtvec (4, value, value, value, value);
11264 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
11265 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11266 return gen_rtx_CONST_VECTOR (V4SFmode, v);
11270 v = gen_rtvec (2, value, value);
11272 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
11273 return gen_rtx_CONST_VECTOR (V2DFmode, v);
11276 gcc_unreachable ();
11280 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11281 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
11282 for an SSE register. If VECT is true, then replicate the mask for
11283 all elements of the vector register. If INVERT is true, then create
11284 a mask excluding the sign bit. */
11287 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11289 enum machine_mode vec_mode, imode;
11290 HOST_WIDE_INT hi, lo;
11295 /* Find the sign bit, sign extended to 2*HWI. */
11301 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11302 lo = 0x80000000, hi = lo < 0;
11308 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11309 if (HOST_BITS_PER_WIDE_INT >= 64)
11310 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11312 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11318 vec_mode = VOIDmode;
11319 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
11320 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11324 gcc_unreachable ();
11328 lo = ~lo, hi = ~hi;
11330 /* Force this value into the low part of a fp vector constant. */
11331 mask = immed_double_const (lo, hi, imode);
11332 mask = gen_lowpart (mode, mask);
11334 if (vec_mode == VOIDmode)
11335 return force_reg (mode, mask);
11337 v = ix86_build_const_vector (mode, vect, mask);
11338 return force_reg (vec_mode, v);
11341 /* Generate code for floating point ABS or NEG. */
11344 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11347 rtx mask, set, use, clob, dst, src;
11348 bool use_sse = false;
11349 bool vector_mode = VECTOR_MODE_P (mode);
11350 enum machine_mode elt_mode = mode;
11354 elt_mode = GET_MODE_INNER (mode);
11357 else if (mode == TFmode)
11359 else if (TARGET_SSE_MATH)
11360 use_sse = SSE_FLOAT_MODE_P (mode);
11362 /* NEG and ABS performed with SSE use bitwise mask operations.
11363 Create the appropriate mask now. */
11365 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11374 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11375 set = gen_rtx_SET (VOIDmode, dst, set);
11380 set = gen_rtx_fmt_e (code, mode, src);
11381 set = gen_rtx_SET (VOIDmode, dst, set);
11384 use = gen_rtx_USE (VOIDmode, mask);
11385 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11386 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11387 gen_rtvec (3, set, use, clob)));
11394 /* Expand a copysign operation. Special case operand 0 being a constant. */
11397 ix86_expand_copysign (rtx operands[])
11399 enum machine_mode mode;
11400 rtx dest, op0, op1, mask, nmask;
11402 dest = operands[0];
11406 mode = GET_MODE (dest);
11408 if (GET_CODE (op0) == CONST_DOUBLE)
11410 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11412 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11413 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11415 if (mode == SFmode || mode == DFmode)
11417 enum machine_mode vmode;
11419 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11421 if (op0 == CONST0_RTX (mode))
11422 op0 = CONST0_RTX (vmode);
11427 if (mode == SFmode)
11428 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11429 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11431 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11433 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11436 else if (op0 != CONST0_RTX (mode))
11437 op0 = force_reg (mode, op0);
11439 mask = ix86_build_signbit_mask (mode, 0, 0);
11441 if (mode == SFmode)
11442 copysign_insn = gen_copysignsf3_const;
11443 else if (mode == DFmode)
11444 copysign_insn = gen_copysigndf3_const;
11446 copysign_insn = gen_copysigntf3_const;
11448 emit_insn (copysign_insn (dest, op0, op1, mask));
11452 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11454 nmask = ix86_build_signbit_mask (mode, 0, 1);
11455 mask = ix86_build_signbit_mask (mode, 0, 0);
11457 if (mode == SFmode)
11458 copysign_insn = gen_copysignsf3_var;
11459 else if (mode == DFmode)
11460 copysign_insn = gen_copysigndf3_var;
11462 copysign_insn = gen_copysigntf3_var;
11464 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11468 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11469 be a constant, and so has already been expanded into a vector constant. */
11472 ix86_split_copysign_const (rtx operands[])
11474 enum machine_mode mode, vmode;
11475 rtx dest, op0, op1, mask, x;
11477 dest = operands[0];
11480 mask = operands[3];
11482 mode = GET_MODE (dest);
11483 vmode = GET_MODE (mask);
11485 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11486 x = gen_rtx_AND (vmode, dest, mask);
11487 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11489 if (op0 != CONST0_RTX (vmode))
11491 x = gen_rtx_IOR (vmode, dest, op0);
11492 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11496 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11497 so we have to do two masks. */
11500 ix86_split_copysign_var (rtx operands[])
11502 enum machine_mode mode, vmode;
11503 rtx dest, scratch, op0, op1, mask, nmask, x;
11505 dest = operands[0];
11506 scratch = operands[1];
11509 nmask = operands[4];
11510 mask = operands[5];
11512 mode = GET_MODE (dest);
11513 vmode = GET_MODE (mask);
11515 if (rtx_equal_p (op0, op1))
11517 /* Shouldn't happen often (it's useless, obviously), but when it does
11518 we'd generate incorrect code if we continue below. */
11519 emit_move_insn (dest, op0);
11523 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11525 gcc_assert (REGNO (op1) == REGNO (scratch));
11527 x = gen_rtx_AND (vmode, scratch, mask);
11528 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11531 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11532 x = gen_rtx_NOT (vmode, dest);
11533 x = gen_rtx_AND (vmode, x, op0);
11534 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11538 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11540 x = gen_rtx_AND (vmode, scratch, mask);
11542 else /* alternative 2,4 */
11544 gcc_assert (REGNO (mask) == REGNO (scratch));
11545 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11546 x = gen_rtx_AND (vmode, scratch, op1);
11548 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11550 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11552 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11553 x = gen_rtx_AND (vmode, dest, nmask);
11555 else /* alternative 3,4 */
11557 gcc_assert (REGNO (nmask) == REGNO (dest));
11559 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11560 x = gen_rtx_AND (vmode, dest, op0);
11562 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11565 x = gen_rtx_IOR (vmode, dest, scratch);
11566 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11569 /* Return TRUE or FALSE depending on whether the first SET in INSN
11570 has source and destination with matching CC modes, and that the
11571 CC mode is at least as constrained as REQ_MODE. */
11574 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11577 enum machine_mode set_mode;
11579 set = PATTERN (insn);
11580 if (GET_CODE (set) == PARALLEL)
11581 set = XVECEXP (set, 0, 0);
11582 gcc_assert (GET_CODE (set) == SET);
11583 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11585 set_mode = GET_MODE (SET_DEST (set));
11589 if (req_mode != CCNOmode
11590 && (req_mode != CCmode
11591 || XEXP (SET_SRC (set), 1) != const0_rtx))
11595 if (req_mode == CCGCmode)
11599 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11603 if (req_mode == CCZmode)
11610 gcc_unreachable ();
11613 return (GET_MODE (SET_SRC (set)) == set_mode);
11616 /* Generate insn patterns to do an integer compare of OPERANDS. */
11619 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11621 enum machine_mode cmpmode;
11624 cmpmode = SELECT_CC_MODE (code, op0, op1);
11625 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11627 /* This is very simple, but making the interface the same as in the
11628 FP case makes the rest of the code easier. */
11629 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11630 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11632 /* Return the test that should be put into the flags user, i.e.
11633 the bcc, scc, or cmov instruction. */
11634 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11637 /* Figure out whether to use ordered or unordered fp comparisons.
11638 Return the appropriate mode to use. */
11641 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11643 /* ??? In order to make all comparisons reversible, we do all comparisons
11644 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11645 all forms trapping and nontrapping comparisons, we can make inequality
11646 comparisons trapping again, since it results in better code when using
11647 FCOM based compares. */
11648 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11652 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11654 enum machine_mode mode = GET_MODE (op0);
11656 if (SCALAR_FLOAT_MODE_P (mode))
11658 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11659 return ix86_fp_compare_mode (code);
11664 /* Only zero flag is needed. */
11665 case EQ: /* ZF=0 */
11666 case NE: /* ZF!=0 */
11668 /* Codes needing carry flag. */
11669 case GEU: /* CF=0 */
11670 case LTU: /* CF=1 */
11671 /* Detect overflow checks. They need just the carry flag. */
11672 if (GET_CODE (op0) == PLUS
11673 && rtx_equal_p (op1, XEXP (op0, 0)))
11677 case GTU: /* CF=0 & ZF=0 */
11678 case LEU: /* CF=1 | ZF=1 */
11679 /* Detect overflow checks. They need just the carry flag. */
11680 if (GET_CODE (op0) == MINUS
11681 && rtx_equal_p (op1, XEXP (op0, 0)))
11685 /* Codes possibly doable only with sign flag when
11686 comparing against zero. */
11687 case GE: /* SF=OF or SF=0 */
11688 case LT: /* SF<>OF or SF=1 */
11689 if (op1 == const0_rtx)
11692 /* For other cases Carry flag is not required. */
11694 /* Codes doable only with sign flag when comparing
11695 against zero, but we miss jump instruction for it
11696 so we need to use relational tests against overflow
11697 that thus needs to be zero. */
11698 case GT: /* ZF=0 & SF=OF */
11699 case LE: /* ZF=1 | SF<>OF */
11700 if (op1 == const0_rtx)
11704 /* strcmp pattern do (use flags) and combine may ask us for proper
11709 gcc_unreachable ();
11713 /* Return the fixed registers used for condition codes. */
11716 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11723 /* If two condition code modes are compatible, return a condition code
11724 mode which is compatible with both. Otherwise, return
11727 static enum machine_mode
11728 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11733 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11736 if ((m1 == CCGCmode && m2 == CCGOCmode)
11737 || (m1 == CCGOCmode && m2 == CCGCmode))
11743 gcc_unreachable ();
11773 /* These are only compatible with themselves, which we already
11779 /* Split comparison code CODE into comparisons we can do using branch
11780 instructions. BYPASS_CODE is comparison code for branch that will
11781 branch around FIRST_CODE and SECOND_CODE. If some of branches
11782 is not required, set value to UNKNOWN.
11783 We never require more than two branches. */
11786 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11787 enum rtx_code *first_code,
11788 enum rtx_code *second_code)
11790 *first_code = code;
11791 *bypass_code = UNKNOWN;
11792 *second_code = UNKNOWN;
11794 /* The fcomi comparison sets flags as follows:
11804 case GT: /* GTU - CF=0 & ZF=0 */
11805 case GE: /* GEU - CF=0 */
11806 case ORDERED: /* PF=0 */
11807 case UNORDERED: /* PF=1 */
11808 case UNEQ: /* EQ - ZF=1 */
11809 case UNLT: /* LTU - CF=1 */
11810 case UNLE: /* LEU - CF=1 | ZF=1 */
11811 case LTGT: /* EQ - ZF=0 */
11813 case LT: /* LTU - CF=1 - fails on unordered */
11814 *first_code = UNLT;
11815 *bypass_code = UNORDERED;
11817 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11818 *first_code = UNLE;
11819 *bypass_code = UNORDERED;
11821 case EQ: /* EQ - ZF=1 - fails on unordered */
11822 *first_code = UNEQ;
11823 *bypass_code = UNORDERED;
11825 case NE: /* NE - ZF=0 - fails on unordered */
11826 *first_code = LTGT;
11827 *second_code = UNORDERED;
11829 case UNGE: /* GEU - CF=0 - fails on unordered */
11831 *second_code = UNORDERED;
11833 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11835 *second_code = UNORDERED;
11838 gcc_unreachable ();
11840 if (!TARGET_IEEE_FP)
11842 *second_code = UNKNOWN;
11843 *bypass_code = UNKNOWN;
11847 /* Return cost of comparison done fcom + arithmetics operations on AX.
11848 All following functions do use number of instructions as a cost metrics.
11849 In future this should be tweaked to compute bytes for optimize_size and
11850 take into account performance of various instructions on various CPUs. */
11852 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11854 if (!TARGET_IEEE_FP)
11856 /* The cost of code output by ix86_expand_fp_compare. */
11880 gcc_unreachable ();
11884 /* Return cost of comparison done using fcomi operation.
11885 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11887 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11889 enum rtx_code bypass_code, first_code, second_code;
11890 /* Return arbitrarily high cost when instruction is not supported - this
11891 prevents gcc from using it. */
11894 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11895 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11898 /* Return cost of comparison done using sahf operation.
11899 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11901 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11903 enum rtx_code bypass_code, first_code, second_code;
11904 /* Return arbitrarily high cost when instruction is not preferred - this
11905 avoids gcc from using it. */
11906 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11908 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11909 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11912 /* Compute cost of the comparison done using any method.
11913 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11915 ix86_fp_comparison_cost (enum rtx_code code)
11917 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11920 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11921 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11923 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11924 if (min > sahf_cost)
11926 if (min > fcomi_cost)
11931 /* Return true if we should use an FCOMI instruction for this
11935 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11937 enum rtx_code swapped_code = swap_condition (code);
11939 return ((ix86_fp_comparison_cost (code)
11940 == ix86_fp_comparison_fcomi_cost (code))
11941 || (ix86_fp_comparison_cost (swapped_code)
11942 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11945 /* Swap, force into registers, or otherwise massage the two operands
11946 to a fp comparison. The operands are updated in place; the new
11947 comparison code is returned. */
11949 static enum rtx_code
11950 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11952 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11953 rtx op0 = *pop0, op1 = *pop1;
11954 enum machine_mode op_mode = GET_MODE (op0);
11955 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11957 /* All of the unordered compare instructions only work on registers.
11958 The same is true of the fcomi compare instructions. The XFmode
11959 compare instructions require registers except when comparing
11960 against zero or when converting operand 1 from fixed point to
11964 && (fpcmp_mode == CCFPUmode
11965 || (op_mode == XFmode
11966 && ! (standard_80387_constant_p (op0) == 1
11967 || standard_80387_constant_p (op1) == 1)
11968 && GET_CODE (op1) != FLOAT)
11969 || ix86_use_fcomi_compare (code)))
11971 op0 = force_reg (op_mode, op0);
11972 op1 = force_reg (op_mode, op1);
11976 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11977 things around if they appear profitable, otherwise force op0
11978 into a register. */
11980 if (standard_80387_constant_p (op0) == 0
11982 && ! (standard_80387_constant_p (op1) == 0
11986 tmp = op0, op0 = op1, op1 = tmp;
11987 code = swap_condition (code);
11991 op0 = force_reg (op_mode, op0);
11993 if (CONSTANT_P (op1))
11995 int tmp = standard_80387_constant_p (op1);
11997 op1 = validize_mem (force_const_mem (op_mode, op1));
12001 op1 = force_reg (op_mode, op1);
12004 op1 = force_reg (op_mode, op1);
12008 /* Try to rearrange the comparison to make it cheaper. */
12009 if (ix86_fp_comparison_cost (code)
12010 > ix86_fp_comparison_cost (swap_condition (code))
12011 && (REG_P (op1) || can_create_pseudo_p ()))
12014 tmp = op0, op0 = op1, op1 = tmp;
12015 code = swap_condition (code);
12017 op0 = force_reg (op_mode, op0);
12025 /* Convert comparison codes we use to represent FP comparison to integer
12026 code that will result in proper branch. Return UNKNOWN if no such code
12030 ix86_fp_compare_code_to_integer (enum rtx_code code)
12059 /* Generate insn patterns to do a floating point compare of OPERANDS. */
12062 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
12063 rtx *second_test, rtx *bypass_test)
12065 enum machine_mode fpcmp_mode, intcmp_mode;
12067 int cost = ix86_fp_comparison_cost (code);
12068 enum rtx_code bypass_code, first_code, second_code;
12070 fpcmp_mode = ix86_fp_compare_mode (code);
12071 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
12074 *second_test = NULL_RTX;
12076 *bypass_test = NULL_RTX;
12078 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12080 /* Do fcomi/sahf based test when profitable. */
12081 if (ix86_fp_comparison_arithmetics_cost (code) > cost
12082 && (bypass_code == UNKNOWN || bypass_test)
12083 && (second_code == UNKNOWN || second_test))
12085 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
12086 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
12092 gcc_assert (TARGET_SAHF);
12095 scratch = gen_reg_rtx (HImode);
12096 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
12098 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
12101 /* The FP codes work out to act like unsigned. */
12102 intcmp_mode = fpcmp_mode;
12104 if (bypass_code != UNKNOWN)
12105 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
12106 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12108 if (second_code != UNKNOWN)
12109 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
12110 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12115 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
12116 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
12117 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
12119 scratch = gen_reg_rtx (HImode);
12120 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
12122 /* In the unordered case, we have to check C2 for NaN's, which
12123 doesn't happen to work out to anything nice combination-wise.
12124 So do some bit twiddling on the value we've got in AH to come
12125 up with an appropriate set of condition codes. */
12127 intcmp_mode = CCNOmode;
12132 if (code == GT || !TARGET_IEEE_FP)
12134 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12139 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12140 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12141 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
12142 intcmp_mode = CCmode;
12148 if (code == LT && TARGET_IEEE_FP)
12150 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12151 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
12152 intcmp_mode = CCmode;
12157 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
12163 if (code == GE || !TARGET_IEEE_FP)
12165 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
12170 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12171 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12178 if (code == LE && TARGET_IEEE_FP)
12180 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12181 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12182 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12183 intcmp_mode = CCmode;
12188 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12194 if (code == EQ && TARGET_IEEE_FP)
12196 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12197 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12198 intcmp_mode = CCmode;
12203 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12210 if (code == NE && TARGET_IEEE_FP)
12212 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12213 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12219 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12225 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12229 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12234 gcc_unreachable ();
12238 /* Return the test that should be put into the flags user, i.e.
12239 the bcc, scc, or cmov instruction. */
12240 return gen_rtx_fmt_ee (code, VOIDmode,
12241 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12246 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
12249 op0 = ix86_compare_op0;
12250 op1 = ix86_compare_op1;
12253 *second_test = NULL_RTX;
12255 *bypass_test = NULL_RTX;
12257 if (ix86_compare_emitted)
12259 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
12260 ix86_compare_emitted = NULL_RTX;
12262 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
12264 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
12265 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12266 second_test, bypass_test);
12269 ret = ix86_expand_int_compare (code, op0, op1);
12274 /* Return true if the CODE will result in nontrivial jump sequence. */
12276 ix86_fp_jump_nontrivial_p (enum rtx_code code)
12278 enum rtx_code bypass_code, first_code, second_code;
12281 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12282 return bypass_code != UNKNOWN || second_code != UNKNOWN;
12286 ix86_expand_branch (enum rtx_code code, rtx label)
12290 /* If we have emitted a compare insn, go straight to simple.
12291 ix86_expand_compare won't emit anything if ix86_compare_emitted
12293 if (ix86_compare_emitted)
12296 switch (GET_MODE (ix86_compare_op0))
12302 tmp = ix86_expand_compare (code, NULL, NULL);
12303 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12304 gen_rtx_LABEL_REF (VOIDmode, label),
12306 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12315 enum rtx_code bypass_code, first_code, second_code;
12317 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12318 &ix86_compare_op1);
12320 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12322 /* Check whether we will use the natural sequence with one jump. If
12323 so, we can expand jump early. Otherwise delay expansion by
12324 creating compound insn to not confuse optimizers. */
12325 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12327 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12328 gen_rtx_LABEL_REF (VOIDmode, label),
12329 pc_rtx, NULL_RTX, NULL_RTX);
12333 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12334 ix86_compare_op0, ix86_compare_op1);
12335 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12336 gen_rtx_LABEL_REF (VOIDmode, label),
12338 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12340 use_fcomi = ix86_use_fcomi_compare (code);
12341 vec = rtvec_alloc (3 + !use_fcomi);
12342 RTVEC_ELT (vec, 0) = tmp;
12344 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12346 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12349 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12351 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12360 /* Expand DImode branch into multiple compare+branch. */
12362 rtx lo[2], hi[2], label2;
12363 enum rtx_code code1, code2, code3;
12364 enum machine_mode submode;
12366 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12368 tmp = ix86_compare_op0;
12369 ix86_compare_op0 = ix86_compare_op1;
12370 ix86_compare_op1 = tmp;
12371 code = swap_condition (code);
12373 if (GET_MODE (ix86_compare_op0) == DImode)
12375 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12376 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12381 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12382 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12386 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12387 avoid two branches. This costs one extra insn, so disable when
12388 optimizing for size. */
12390 if ((code == EQ || code == NE)
12392 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12397 if (hi[1] != const0_rtx)
12398 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12399 NULL_RTX, 0, OPTAB_WIDEN);
12402 if (lo[1] != const0_rtx)
12403 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12404 NULL_RTX, 0, OPTAB_WIDEN);
12406 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12407 NULL_RTX, 0, OPTAB_WIDEN);
12409 ix86_compare_op0 = tmp;
12410 ix86_compare_op1 = const0_rtx;
12411 ix86_expand_branch (code, label);
12415 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12416 op1 is a constant and the low word is zero, then we can just
12417 examine the high word. Similarly for low word -1 and
12418 less-or-equal-than or greater-than. */
12420 if (CONST_INT_P (hi[1]))
12423 case LT: case LTU: case GE: case GEU:
12424 if (lo[1] == const0_rtx)
12426 ix86_compare_op0 = hi[0];
12427 ix86_compare_op1 = hi[1];
12428 ix86_expand_branch (code, label);
12432 case LE: case LEU: case GT: case GTU:
12433 if (lo[1] == constm1_rtx)
12435 ix86_compare_op0 = hi[0];
12436 ix86_compare_op1 = hi[1];
12437 ix86_expand_branch (code, label);
12445 /* Otherwise, we need two or three jumps. */
12447 label2 = gen_label_rtx ();
12450 code2 = swap_condition (code);
12451 code3 = unsigned_condition (code);
12455 case LT: case GT: case LTU: case GTU:
12458 case LE: code1 = LT; code2 = GT; break;
12459 case GE: code1 = GT; code2 = LT; break;
12460 case LEU: code1 = LTU; code2 = GTU; break;
12461 case GEU: code1 = GTU; code2 = LTU; break;
12463 case EQ: code1 = UNKNOWN; code2 = NE; break;
12464 case NE: code2 = UNKNOWN; break;
12467 gcc_unreachable ();
12472 * if (hi(a) < hi(b)) goto true;
12473 * if (hi(a) > hi(b)) goto false;
12474 * if (lo(a) < lo(b)) goto true;
12478 ix86_compare_op0 = hi[0];
12479 ix86_compare_op1 = hi[1];
12481 if (code1 != UNKNOWN)
12482 ix86_expand_branch (code1, label);
12483 if (code2 != UNKNOWN)
12484 ix86_expand_branch (code2, label2);
12486 ix86_compare_op0 = lo[0];
12487 ix86_compare_op1 = lo[1];
12488 ix86_expand_branch (code3, label);
12490 if (code2 != UNKNOWN)
12491 emit_label (label2);
12496 gcc_unreachable ();
12500 /* Split branch based on floating point condition. */
12502 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12503 rtx target1, rtx target2, rtx tmp, rtx pushed)
12505 rtx second, bypass;
12506 rtx label = NULL_RTX;
12508 int bypass_probability = -1, second_probability = -1, probability = -1;
12511 if (target2 != pc_rtx)
12514 code = reverse_condition_maybe_unordered (code);
12519 condition = ix86_expand_fp_compare (code, op1, op2,
12520 tmp, &second, &bypass);
12522 /* Remove pushed operand from stack. */
12524 ix86_free_from_memory (GET_MODE (pushed));
12526 if (split_branch_probability >= 0)
12528 /* Distribute the probabilities across the jumps.
12529 Assume the BYPASS and SECOND to be always test
12531 probability = split_branch_probability;
12533 /* Value of 1 is low enough to make no need for probability
12534 to be updated. Later we may run some experiments and see
12535 if unordered values are more frequent in practice. */
12537 bypass_probability = 1;
12539 second_probability = 1;
12541 if (bypass != NULL_RTX)
12543 label = gen_label_rtx ();
12544 i = emit_jump_insn (gen_rtx_SET
12546 gen_rtx_IF_THEN_ELSE (VOIDmode,
12548 gen_rtx_LABEL_REF (VOIDmode,
12551 if (bypass_probability >= 0)
12553 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12554 GEN_INT (bypass_probability),
12557 i = emit_jump_insn (gen_rtx_SET
12559 gen_rtx_IF_THEN_ELSE (VOIDmode,
12560 condition, target1, target2)));
12561 if (probability >= 0)
12563 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12564 GEN_INT (probability),
12566 if (second != NULL_RTX)
12568 i = emit_jump_insn (gen_rtx_SET
12570 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12572 if (second_probability >= 0)
12574 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12575 GEN_INT (second_probability),
12578 if (label != NULL_RTX)
12579 emit_label (label);
12583 ix86_expand_setcc (enum rtx_code code, rtx dest)
12585 rtx ret, tmp, tmpreg, equiv;
12586 rtx second_test, bypass_test;
12588 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12589 return 0; /* FAIL */
12591 gcc_assert (GET_MODE (dest) == QImode);
12593 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12594 PUT_MODE (ret, QImode);
12599 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12600 if (bypass_test || second_test)
12602 rtx test = second_test;
12604 rtx tmp2 = gen_reg_rtx (QImode);
12607 gcc_assert (!second_test);
12608 test = bypass_test;
12610 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12612 PUT_MODE (test, QImode);
12613 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12616 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12618 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12621 /* Attach a REG_EQUAL note describing the comparison result. */
12622 if (ix86_compare_op0 && ix86_compare_op1)
12624 equiv = simplify_gen_relational (code, QImode,
12625 GET_MODE (ix86_compare_op0),
12626 ix86_compare_op0, ix86_compare_op1);
12627 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12630 return 1; /* DONE */
12633 /* Expand comparison setting or clearing carry flag. Return true when
12634 successful and set pop for the operation. */
12636 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12638 enum machine_mode mode =
12639 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12641 /* Do not handle DImode compares that go through special path. */
12642 if (mode == (TARGET_64BIT ? TImode : DImode))
12645 if (SCALAR_FLOAT_MODE_P (mode))
12647 rtx second_test = NULL, bypass_test = NULL;
12648 rtx compare_op, compare_seq;
12650 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12652 /* Shortcut: following common codes never translate
12653 into carry flag compares. */
12654 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12655 || code == ORDERED || code == UNORDERED)
12658 /* These comparisons require zero flag; swap operands so they won't. */
12659 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12660 && !TARGET_IEEE_FP)
12665 code = swap_condition (code);
12668 /* Try to expand the comparison and verify that we end up with
12669 carry flag based comparison. This fails to be true only when
12670 we decide to expand comparison using arithmetic that is not
12671 too common scenario. */
12673 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12674 &second_test, &bypass_test);
12675 compare_seq = get_insns ();
12678 if (second_test || bypass_test)
12681 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12682 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12683 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12685 code = GET_CODE (compare_op);
12687 if (code != LTU && code != GEU)
12690 emit_insn (compare_seq);
12695 if (!INTEGRAL_MODE_P (mode))
12704 /* Convert a==0 into (unsigned)a<1. */
12707 if (op1 != const0_rtx)
12710 code = (code == EQ ? LTU : GEU);
12713 /* Convert a>b into b<a or a>=b-1. */
12716 if (CONST_INT_P (op1))
12718 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12719 /* Bail out on overflow. We still can swap operands but that
12720 would force loading of the constant into register. */
12721 if (op1 == const0_rtx
12722 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12724 code = (code == GTU ? GEU : LTU);
12731 code = (code == GTU ? LTU : GEU);
12735 /* Convert a>=0 into (unsigned)a<0x80000000. */
12738 if (mode == DImode || op1 != const0_rtx)
12740 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12741 code = (code == LT ? GEU : LTU);
12745 if (mode == DImode || op1 != constm1_rtx)
12747 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12748 code = (code == LE ? GEU : LTU);
12754 /* Swapping operands may cause constant to appear as first operand. */
12755 if (!nonimmediate_operand (op0, VOIDmode))
12757 if (!can_create_pseudo_p ())
12759 op0 = force_reg (mode, op0);
12761 ix86_compare_op0 = op0;
12762 ix86_compare_op1 = op1;
12763 *pop = ix86_expand_compare (code, NULL, NULL);
12764 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12769 ix86_expand_int_movcc (rtx operands[])
12771 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12772 rtx compare_seq, compare_op;
12773 rtx second_test, bypass_test;
12774 enum machine_mode mode = GET_MODE (operands[0]);
12775 bool sign_bit_compare_p = false;;
12778 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12779 compare_seq = get_insns ();
12782 compare_code = GET_CODE (compare_op);
12784 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12785 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12786 sign_bit_compare_p = true;
12788 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12789 HImode insns, we'd be swallowed in word prefix ops. */
12791 if ((mode != HImode || TARGET_FAST_PREFIX)
12792 && (mode != (TARGET_64BIT ? TImode : DImode))
12793 && CONST_INT_P (operands[2])
12794 && CONST_INT_P (operands[3]))
12796 rtx out = operands[0];
12797 HOST_WIDE_INT ct = INTVAL (operands[2]);
12798 HOST_WIDE_INT cf = INTVAL (operands[3]);
12799 HOST_WIDE_INT diff;
12802 /* Sign bit compares are better done using shifts than we do by using
12804 if (sign_bit_compare_p
12805 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12806 ix86_compare_op1, &compare_op))
12808 /* Detect overlap between destination and compare sources. */
12811 if (!sign_bit_compare_p)
12813 bool fpcmp = false;
12815 compare_code = GET_CODE (compare_op);
12817 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12818 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12821 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12824 /* To simplify rest of code, restrict to the GEU case. */
12825 if (compare_code == LTU)
12827 HOST_WIDE_INT tmp = ct;
12830 compare_code = reverse_condition (compare_code);
12831 code = reverse_condition (code);
12836 PUT_CODE (compare_op,
12837 reverse_condition_maybe_unordered
12838 (GET_CODE (compare_op)));
12840 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12844 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12845 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12846 tmp = gen_reg_rtx (mode);
12848 if (mode == DImode)
12849 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12851 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12855 if (code == GT || code == GE)
12856 code = reverse_condition (code);
12859 HOST_WIDE_INT tmp = ct;
12864 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12865 ix86_compare_op1, VOIDmode, 0, -1);
12878 tmp = expand_simple_binop (mode, PLUS,
12880 copy_rtx (tmp), 1, OPTAB_DIRECT);
12891 tmp = expand_simple_binop (mode, IOR,
12893 copy_rtx (tmp), 1, OPTAB_DIRECT);
12895 else if (diff == -1 && ct)
12905 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12907 tmp = expand_simple_binop (mode, PLUS,
12908 copy_rtx (tmp), GEN_INT (cf),
12909 copy_rtx (tmp), 1, OPTAB_DIRECT);
12917 * andl cf - ct, dest
12927 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12930 tmp = expand_simple_binop (mode, AND,
12932 gen_int_mode (cf - ct, mode),
12933 copy_rtx (tmp), 1, OPTAB_DIRECT);
12935 tmp = expand_simple_binop (mode, PLUS,
12936 copy_rtx (tmp), GEN_INT (ct),
12937 copy_rtx (tmp), 1, OPTAB_DIRECT);
12940 if (!rtx_equal_p (tmp, out))
12941 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12943 return 1; /* DONE */
12948 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12951 tmp = ct, ct = cf, cf = tmp;
12954 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12956 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12958 /* We may be reversing unordered compare to normal compare, that
12959 is not valid in general (we may convert non-trapping condition
12960 to trapping one), however on i386 we currently emit all
12961 comparisons unordered. */
12962 compare_code = reverse_condition_maybe_unordered (compare_code);
12963 code = reverse_condition_maybe_unordered (code);
12967 compare_code = reverse_condition (compare_code);
12968 code = reverse_condition (code);
12972 compare_code = UNKNOWN;
12973 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12974 && CONST_INT_P (ix86_compare_op1))
12976 if (ix86_compare_op1 == const0_rtx
12977 && (code == LT || code == GE))
12978 compare_code = code;
12979 else if (ix86_compare_op1 == constm1_rtx)
12983 else if (code == GT)
12988 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12989 if (compare_code != UNKNOWN
12990 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12991 && (cf == -1 || ct == -1))
12993 /* If lea code below could be used, only optimize
12994 if it results in a 2 insn sequence. */
12996 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12997 || diff == 3 || diff == 5 || diff == 9)
12998 || (compare_code == LT && ct == -1)
12999 || (compare_code == GE && cf == -1))
13002 * notl op1 (if necessary)
13010 code = reverse_condition (code);
13013 out = emit_store_flag (out, code, ix86_compare_op0,
13014 ix86_compare_op1, VOIDmode, 0, -1);
13016 out = expand_simple_binop (mode, IOR,
13018 out, 1, OPTAB_DIRECT);
13019 if (out != operands[0])
13020 emit_move_insn (operands[0], out);
13022 return 1; /* DONE */
13027 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
13028 || diff == 3 || diff == 5 || diff == 9)
13029 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
13031 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
13037 * lea cf(dest*(ct-cf)),dest
13041 * This also catches the degenerate setcc-only case.
13047 out = emit_store_flag (out, code, ix86_compare_op0,
13048 ix86_compare_op1, VOIDmode, 0, 1);
13051 /* On x86_64 the lea instruction operates on Pmode, so we need
13052 to get arithmetics done in proper mode to match. */
13054 tmp = copy_rtx (out);
13058 out1 = copy_rtx (out);
13059 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
13063 tmp = gen_rtx_PLUS (mode, tmp, out1);
13069 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
13072 if (!rtx_equal_p (tmp, out))
13075 out = force_operand (tmp, copy_rtx (out));
13077 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
13079 if (!rtx_equal_p (out, operands[0]))
13080 emit_move_insn (operands[0], copy_rtx (out));
13082 return 1; /* DONE */
13086 * General case: Jumpful:
13087 * xorl dest,dest cmpl op1, op2
13088 * cmpl op1, op2 movl ct, dest
13089 * setcc dest jcc 1f
13090 * decl dest movl cf, dest
13091 * andl (cf-ct),dest 1:
13094 * Size 20. Size 14.
13096 * This is reasonably steep, but branch mispredict costs are
13097 * high on modern cpus, so consider failing only if optimizing
13101 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13102 && BRANCH_COST >= 2)
13106 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
13111 if (SCALAR_FLOAT_MODE_P (cmp_mode))
13113 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
13115 /* We may be reversing unordered compare to normal compare,
13116 that is not valid in general (we may convert non-trapping
13117 condition to trapping one), however on i386 we currently
13118 emit all comparisons unordered. */
13119 code = reverse_condition_maybe_unordered (code);
13123 code = reverse_condition (code);
13124 if (compare_code != UNKNOWN)
13125 compare_code = reverse_condition (compare_code);
13129 if (compare_code != UNKNOWN)
13131 /* notl op1 (if needed)
13136 For x < 0 (resp. x <= -1) there will be no notl,
13137 so if possible swap the constants to get rid of the
13139 True/false will be -1/0 while code below (store flag
13140 followed by decrement) is 0/-1, so the constants need
13141 to be exchanged once more. */
13143 if (compare_code == GE || !cf)
13145 code = reverse_condition (code);
13150 HOST_WIDE_INT tmp = cf;
13155 out = emit_store_flag (out, code, ix86_compare_op0,
13156 ix86_compare_op1, VOIDmode, 0, -1);
13160 out = emit_store_flag (out, code, ix86_compare_op0,
13161 ix86_compare_op1, VOIDmode, 0, 1);
13163 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
13164 copy_rtx (out), 1, OPTAB_DIRECT);
13167 out = expand_simple_binop (mode, AND, copy_rtx (out),
13168 gen_int_mode (cf - ct, mode),
13169 copy_rtx (out), 1, OPTAB_DIRECT);
13171 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
13172 copy_rtx (out), 1, OPTAB_DIRECT);
13173 if (!rtx_equal_p (out, operands[0]))
13174 emit_move_insn (operands[0], copy_rtx (out));
13176 return 1; /* DONE */
13180 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13182 /* Try a few things more with specific constants and a variable. */
13185 rtx var, orig_out, out, tmp;
13187 if (BRANCH_COST <= 2)
13188 return 0; /* FAIL */
13190 /* If one of the two operands is an interesting constant, load a
13191 constant with the above and mask it in with a logical operation. */
13193 if (CONST_INT_P (operands[2]))
13196 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
13197 operands[3] = constm1_rtx, op = and_optab;
13198 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
13199 operands[3] = const0_rtx, op = ior_optab;
13201 return 0; /* FAIL */
13203 else if (CONST_INT_P (operands[3]))
13206 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
13207 operands[2] = constm1_rtx, op = and_optab;
13208 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
13209 operands[2] = const0_rtx, op = ior_optab;
13211 return 0; /* FAIL */
13214 return 0; /* FAIL */
13216 orig_out = operands[0];
13217 tmp = gen_reg_rtx (mode);
13220 /* Recurse to get the constant loaded. */
13221 if (ix86_expand_int_movcc (operands) == 0)
13222 return 0; /* FAIL */
13224 /* Mask in the interesting variable. */
13225 out = expand_binop (mode, op, var, tmp, orig_out, 0,
13227 if (!rtx_equal_p (out, orig_out))
13228 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
13230 return 1; /* DONE */
13234 * For comparison with above,
13244 if (! nonimmediate_operand (operands[2], mode))
13245 operands[2] = force_reg (mode, operands[2]);
13246 if (! nonimmediate_operand (operands[3], mode))
13247 operands[3] = force_reg (mode, operands[3]);
13249 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13251 rtx tmp = gen_reg_rtx (mode);
13252 emit_move_insn (tmp, operands[3]);
13255 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13257 rtx tmp = gen_reg_rtx (mode);
13258 emit_move_insn (tmp, operands[2]);
13262 if (! register_operand (operands[2], VOIDmode)
13264 || ! register_operand (operands[3], VOIDmode)))
13265 operands[2] = force_reg (mode, operands[2]);
13268 && ! register_operand (operands[3], VOIDmode))
13269 operands[3] = force_reg (mode, operands[3]);
13271 emit_insn (compare_seq);
13272 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13273 gen_rtx_IF_THEN_ELSE (mode,
13274 compare_op, operands[2],
13277 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13278 gen_rtx_IF_THEN_ELSE (mode,
13280 copy_rtx (operands[3]),
13281 copy_rtx (operands[0]))));
13283 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13284 gen_rtx_IF_THEN_ELSE (mode,
13286 copy_rtx (operands[2]),
13287 copy_rtx (operands[0]))));
13289 return 1; /* DONE */
13292 /* Swap, force into registers, or otherwise massage the two operands
13293 to an sse comparison with a mask result. Thus we differ a bit from
13294 ix86_prepare_fp_compare_args which expects to produce a flags result.
13296 The DEST operand exists to help determine whether to commute commutative
13297 operators. The POP0/POP1 operands are updated in place. The new
13298 comparison code is returned, or UNKNOWN if not implementable. */
13300 static enum rtx_code
13301 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13302 rtx *pop0, rtx *pop1)
13310 /* We have no LTGT as an operator. We could implement it with
13311 NE & ORDERED, but this requires an extra temporary. It's
13312 not clear that it's worth it. */
13319 /* These are supported directly. */
13326 /* For commutative operators, try to canonicalize the destination
13327 operand to be first in the comparison - this helps reload to
13328 avoid extra moves. */
13329 if (!dest || !rtx_equal_p (dest, *pop1))
13337 /* These are not supported directly. Swap the comparison operands
13338 to transform into something that is supported. */
13342 code = swap_condition (code);
13346 gcc_unreachable ();
13352 /* Detect conditional moves that exactly match min/max operational
13353 semantics. Note that this is IEEE safe, as long as we don't
13354 interchange the operands.
13356 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13357 and TRUE if the operation is successful and instructions are emitted. */
13360 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13361 rtx cmp_op1, rtx if_true, rtx if_false)
13363 enum machine_mode mode;
13369 else if (code == UNGE)
13372 if_true = if_false;
13378 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13380 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13385 mode = GET_MODE (dest);
13387 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13388 but MODE may be a vector mode and thus not appropriate. */
13389 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13391 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13394 if_true = force_reg (mode, if_true);
13395 v = gen_rtvec (2, if_true, if_false);
13396 tmp = gen_rtx_UNSPEC (mode, v, u);
13400 code = is_min ? SMIN : SMAX;
13401 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13404 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13408 /* Expand an sse vector comparison. Return the register with the result. */
13411 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13412 rtx op_true, rtx op_false)
13414 enum machine_mode mode = GET_MODE (dest);
13417 cmp_op0 = force_reg (mode, cmp_op0);
13418 if (!nonimmediate_operand (cmp_op1, mode))
13419 cmp_op1 = force_reg (mode, cmp_op1);
13422 || reg_overlap_mentioned_p (dest, op_true)
13423 || reg_overlap_mentioned_p (dest, op_false))
13424 dest = gen_reg_rtx (mode);
13426 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13427 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13432 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13433 operations. This is used for both scalar and vector conditional moves. */
13436 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13438 enum machine_mode mode = GET_MODE (dest);
13441 if (op_false == CONST0_RTX (mode))
13443 op_true = force_reg (mode, op_true);
13444 x = gen_rtx_AND (mode, cmp, op_true);
13445 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13447 else if (op_true == CONST0_RTX (mode))
13449 op_false = force_reg (mode, op_false);
13450 x = gen_rtx_NOT (mode, cmp);
13451 x = gen_rtx_AND (mode, x, op_false);
13452 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13454 else if (TARGET_SSE5)
13456 rtx pcmov = gen_rtx_SET (mode, dest,
13457 gen_rtx_IF_THEN_ELSE (mode, cmp,
13464 op_true = force_reg (mode, op_true);
13465 op_false = force_reg (mode, op_false);
13467 t2 = gen_reg_rtx (mode);
13469 t3 = gen_reg_rtx (mode);
13473 x = gen_rtx_AND (mode, op_true, cmp);
13474 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13476 x = gen_rtx_NOT (mode, cmp);
13477 x = gen_rtx_AND (mode, x, op_false);
13478 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13480 x = gen_rtx_IOR (mode, t3, t2);
13481 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13485 /* Expand a floating-point conditional move. Return true if successful. */
13488 ix86_expand_fp_movcc (rtx operands[])
13490 enum machine_mode mode = GET_MODE (operands[0]);
13491 enum rtx_code code = GET_CODE (operands[1]);
13492 rtx tmp, compare_op, second_test, bypass_test;
13494 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13496 enum machine_mode cmode;
13498 /* Since we've no cmove for sse registers, don't force bad register
13499 allocation just to gain access to it. Deny movcc when the
13500 comparison mode doesn't match the move mode. */
13501 cmode = GET_MODE (ix86_compare_op0);
13502 if (cmode == VOIDmode)
13503 cmode = GET_MODE (ix86_compare_op1);
13507 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13509 &ix86_compare_op1);
13510 if (code == UNKNOWN)
13513 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13514 ix86_compare_op1, operands[2],
13518 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13519 ix86_compare_op1, operands[2], operands[3]);
13520 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13524 /* The floating point conditional move instructions don't directly
13525 support conditions resulting from a signed integer comparison. */
13527 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13529 /* The floating point conditional move instructions don't directly
13530 support signed integer comparisons. */
13532 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13534 gcc_assert (!second_test && !bypass_test);
13535 tmp = gen_reg_rtx (QImode);
13536 ix86_expand_setcc (code, tmp);
13538 ix86_compare_op0 = tmp;
13539 ix86_compare_op1 = const0_rtx;
13540 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13542 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13544 tmp = gen_reg_rtx (mode);
13545 emit_move_insn (tmp, operands[3]);
13548 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13550 tmp = gen_reg_rtx (mode);
13551 emit_move_insn (tmp, operands[2]);
13555 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13556 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13557 operands[2], operands[3])));
13559 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13560 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13561 operands[3], operands[0])));
13563 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13564 gen_rtx_IF_THEN_ELSE (mode, second_test,
13565 operands[2], operands[0])));
13570 /* Expand a floating-point vector conditional move; a vcond operation
13571 rather than a movcc operation. */
13574 ix86_expand_fp_vcond (rtx operands[])
13576 enum rtx_code code = GET_CODE (operands[3]);
13579 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13580 &operands[4], &operands[5]);
13581 if (code == UNKNOWN)
13584 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13585 operands[5], operands[1], operands[2]))
13588 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13589 operands[1], operands[2]);
13590 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13594 /* Expand a signed/unsigned integral vector conditional move. */
13597 ix86_expand_int_vcond (rtx operands[])
13599 enum machine_mode mode = GET_MODE (operands[0]);
13600 enum rtx_code code = GET_CODE (operands[3]);
13601 bool negate = false;
13604 cop0 = operands[4];
13605 cop1 = operands[5];
13607 /* SSE5 supports all of the comparisons on all vector int types. */
13610 /* Canonicalize the comparison to EQ, GT, GTU. */
13621 code = reverse_condition (code);
13627 code = reverse_condition (code);
13633 code = swap_condition (code);
13634 x = cop0, cop0 = cop1, cop1 = x;
13638 gcc_unreachable ();
13641 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13642 if (mode == V2DImode)
13647 /* SSE4.1 supports EQ. */
13648 if (!TARGET_SSE4_1)
13654 /* SSE4.2 supports GT/GTU. */
13655 if (!TARGET_SSE4_2)
13660 gcc_unreachable ();
13664 /* Unsigned parallel compare is not supported by the hardware. Play some
13665 tricks to turn this into a signed comparison against 0. */
13668 cop0 = force_reg (mode, cop0);
13677 /* Perform a parallel modulo subtraction. */
13678 t1 = gen_reg_rtx (mode);
13679 emit_insn ((mode == V4SImode
13681 : gen_subv2di3) (t1, cop0, cop1));
13683 /* Extract the original sign bit of op0. */
13684 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13686 t2 = gen_reg_rtx (mode);
13687 emit_insn ((mode == V4SImode
13689 : gen_andv2di3) (t2, cop0, mask));
13691 /* XOR it back into the result of the subtraction. This results
13692 in the sign bit set iff we saw unsigned underflow. */
13693 x = gen_reg_rtx (mode);
13694 emit_insn ((mode == V4SImode
13696 : gen_xorv2di3) (x, t1, t2));
13704 /* Perform a parallel unsigned saturating subtraction. */
13705 x = gen_reg_rtx (mode);
13706 emit_insn (gen_rtx_SET (VOIDmode, x,
13707 gen_rtx_US_MINUS (mode, cop0, cop1)));
13714 gcc_unreachable ();
13718 cop1 = CONST0_RTX (mode);
13722 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13723 operands[1+negate], operands[2-negate]);
13725 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13726 operands[2-negate]);
13730 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13731 true if we should do zero extension, else sign extension. HIGH_P is
13732 true if we want the N/2 high elements, else the low elements. */
13735 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13737 enum machine_mode imode = GET_MODE (operands[1]);
13738 rtx (*unpack)(rtx, rtx, rtx);
13745 unpack = gen_vec_interleave_highv16qi;
13747 unpack = gen_vec_interleave_lowv16qi;
13751 unpack = gen_vec_interleave_highv8hi;
13753 unpack = gen_vec_interleave_lowv8hi;
13757 unpack = gen_vec_interleave_highv4si;
13759 unpack = gen_vec_interleave_lowv4si;
13762 gcc_unreachable ();
13765 dest = gen_lowpart (imode, operands[0]);
13768 se = force_reg (imode, CONST0_RTX (imode));
13770 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13771 operands[1], pc_rtx, pc_rtx);
13773 emit_insn (unpack (dest, operands[1], se));
13776 /* This function performs the same task as ix86_expand_sse_unpack,
13777 but with SSE4.1 instructions. */
13780 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13782 enum machine_mode imode = GET_MODE (operands[1]);
13783 rtx (*unpack)(rtx, rtx);
13790 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13792 unpack = gen_sse4_1_extendv8qiv8hi2;
13796 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13798 unpack = gen_sse4_1_extendv4hiv4si2;
13802 unpack = gen_sse4_1_zero_extendv2siv2di2;
13804 unpack = gen_sse4_1_extendv2siv2di2;
13807 gcc_unreachable ();
13810 dest = operands[0];
13813 /* Shift higher 8 bytes to lower 8 bytes. */
13814 src = gen_reg_rtx (imode);
13815 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13816 gen_lowpart (TImode, operands[1]),
13822 emit_insn (unpack (dest, src));
13825 /* This function performs the same task as ix86_expand_sse_unpack,
13826 but with sse5 instructions. */
13829 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13831 enum machine_mode imode = GET_MODE (operands[1]);
13832 int pperm_bytes[16];
13834 int h = (high_p) ? 8 : 0;
13837 rtvec v = rtvec_alloc (16);
13840 rtx op0 = operands[0], op1 = operands[1];
13845 vs = rtvec_alloc (8);
13846 h2 = (high_p) ? 8 : 0;
13847 for (i = 0; i < 8; i++)
13849 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13850 pperm_bytes[2*i+1] = ((unsigned_p)
13852 : PPERM_SIGN | PPERM_SRC2 | i | h);
13855 for (i = 0; i < 16; i++)
13856 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13858 for (i = 0; i < 8; i++)
13859 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13861 p = gen_rtx_PARALLEL (VOIDmode, vs);
13862 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13864 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13866 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13870 vs = rtvec_alloc (4);
13871 h2 = (high_p) ? 4 : 0;
13872 for (i = 0; i < 4; i++)
13874 sign_extend = ((unsigned_p)
13876 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13877 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13878 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13879 pperm_bytes[4*i+2] = sign_extend;
13880 pperm_bytes[4*i+3] = sign_extend;
13883 for (i = 0; i < 16; i++)
13884 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13886 for (i = 0; i < 4; i++)
13887 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13889 p = gen_rtx_PARALLEL (VOIDmode, vs);
13890 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13892 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
13894 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
13898 vs = rtvec_alloc (2);
13899 h2 = (high_p) ? 2 : 0;
13900 for (i = 0; i < 2; i++)
13902 sign_extend = ((unsigned_p)
13904 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
13905 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
13906 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
13907 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
13908 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
13909 pperm_bytes[8*i+4] = sign_extend;
13910 pperm_bytes[8*i+5] = sign_extend;
13911 pperm_bytes[8*i+6] = sign_extend;
13912 pperm_bytes[8*i+7] = sign_extend;
13915 for (i = 0; i < 16; i++)
13916 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13918 for (i = 0; i < 2; i++)
13919 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13921 p = gen_rtx_PARALLEL (VOIDmode, vs);
13922 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13924 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
13926 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
13930 gcc_unreachable ();
13936 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
13937 next narrower integer vector type */
13939 ix86_expand_sse5_pack (rtx operands[3])
13941 enum machine_mode imode = GET_MODE (operands[0]);
13942 int pperm_bytes[16];
13944 rtvec v = rtvec_alloc (16);
13946 rtx op0 = operands[0];
13947 rtx op1 = operands[1];
13948 rtx op2 = operands[2];
13953 for (i = 0; i < 8; i++)
13955 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
13956 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
13959 for (i = 0; i < 16; i++)
13960 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13962 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13963 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
13967 for (i = 0; i < 4; i++)
13969 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
13970 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
13971 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
13972 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
13975 for (i = 0; i < 16; i++)
13976 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13978 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13979 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
13983 for (i = 0; i < 2; i++)
13985 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
13986 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
13987 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
13988 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
13989 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
13990 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
13991 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
13992 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
13995 for (i = 0; i < 16; i++)
13996 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13998 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13999 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
14003 gcc_unreachable ();
14009 /* Expand conditional increment or decrement using adb/sbb instructions.
14010 The default case using setcc followed by the conditional move can be
14011 done by generic code. */
14013 ix86_expand_int_addcc (rtx operands[])
14015 enum rtx_code code = GET_CODE (operands[1]);
14017 rtx val = const0_rtx;
14018 bool fpcmp = false;
14019 enum machine_mode mode = GET_MODE (operands[0]);
14021 if (operands[3] != const1_rtx
14022 && operands[3] != constm1_rtx)
14024 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14025 ix86_compare_op1, &compare_op))
14027 code = GET_CODE (compare_op);
14029 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14030 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14033 code = ix86_fp_compare_code_to_integer (code);
14040 PUT_CODE (compare_op,
14041 reverse_condition_maybe_unordered
14042 (GET_CODE (compare_op)));
14044 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14046 PUT_MODE (compare_op, mode);
14048 /* Construct either adc or sbb insn. */
14049 if ((code == LTU) == (operands[3] == constm1_rtx))
14051 switch (GET_MODE (operands[0]))
14054 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
14057 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
14060 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
14063 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
14066 gcc_unreachable ();
14071 switch (GET_MODE (operands[0]))
14074 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
14077 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
14080 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
14083 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
14086 gcc_unreachable ();
14089 return 1; /* DONE */
14093 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
14094 works for floating pointer parameters and nonoffsetable memories.
14095 For pushes, it returns just stack offsets; the values will be saved
14096 in the right order. Maximally three parts are generated. */
14099 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
14104 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
14106 size = (GET_MODE_SIZE (mode) + 4) / 8;
14108 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
14109 gcc_assert (size >= 2 && size <= 3);
14111 /* Optimize constant pool reference to immediates. This is used by fp
14112 moves, that force all constants to memory to allow combining. */
14113 if (MEM_P (operand) && MEM_READONLY_P (operand))
14115 rtx tmp = maybe_get_pool_constant (operand);
14120 if (MEM_P (operand) && !offsettable_memref_p (operand))
14122 /* The only non-offsetable memories we handle are pushes. */
14123 int ok = push_operand (operand, VOIDmode);
14127 operand = copy_rtx (operand);
14128 PUT_MODE (operand, Pmode);
14129 parts[0] = parts[1] = parts[2] = operand;
14133 if (GET_CODE (operand) == CONST_VECTOR)
14135 enum machine_mode imode = int_mode_for_mode (mode);
14136 /* Caution: if we looked through a constant pool memory above,
14137 the operand may actually have a different mode now. That's
14138 ok, since we want to pun this all the way back to an integer. */
14139 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
14140 gcc_assert (operand != NULL);
14146 if (mode == DImode)
14147 split_di (&operand, 1, &parts[0], &parts[1]);
14150 if (REG_P (operand))
14152 gcc_assert (reload_completed);
14153 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
14154 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
14156 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
14158 else if (offsettable_memref_p (operand))
14160 operand = adjust_address (operand, SImode, 0);
14161 parts[0] = operand;
14162 parts[1] = adjust_address (operand, SImode, 4);
14164 parts[2] = adjust_address (operand, SImode, 8);
14166 else if (GET_CODE (operand) == CONST_DOUBLE)
14171 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14175 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
14176 parts[2] = gen_int_mode (l[2], SImode);
14179 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14182 gcc_unreachable ();
14184 parts[1] = gen_int_mode (l[1], SImode);
14185 parts[0] = gen_int_mode (l[0], SImode);
14188 gcc_unreachable ();
14193 if (mode == TImode)
14194 split_ti (&operand, 1, &parts[0], &parts[1]);
14195 if (mode == XFmode || mode == TFmode)
14197 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
14198 if (REG_P (operand))
14200 gcc_assert (reload_completed);
14201 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
14202 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
14204 else if (offsettable_memref_p (operand))
14206 operand = adjust_address (operand, DImode, 0);
14207 parts[0] = operand;
14208 parts[1] = adjust_address (operand, upper_mode, 8);
14210 else if (GET_CODE (operand) == CONST_DOUBLE)
14215 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14216 real_to_target (l, &r, mode);
14218 /* Do not use shift by 32 to avoid warning on 32bit systems. */
14219 if (HOST_BITS_PER_WIDE_INT >= 64)
14222 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
14223 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
14226 parts[0] = immed_double_const (l[0], l[1], DImode);
14228 if (upper_mode == SImode)
14229 parts[1] = gen_int_mode (l[2], SImode);
14230 else if (HOST_BITS_PER_WIDE_INT >= 64)
14233 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
14234 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
14237 parts[1] = immed_double_const (l[2], l[3], DImode);
14240 gcc_unreachable ();
14247 /* Emit insns to perform a move or push of DI, DF, and XF values.
14248 Return false when normal moves are needed; true when all required
14249 insns have been emitted. Operands 2-4 contain the input values
14250 int the correct order; operands 5-7 contain the output values. */
14253 ix86_split_long_move (rtx operands[])
14258 int collisions = 0;
14259 enum machine_mode mode = GET_MODE (operands[0]);
14261 /* The DFmode expanders may ask us to move double.
14262 For 64bit target this is single move. By hiding the fact
14263 here we simplify i386.md splitters. */
14264 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
14266 /* Optimize constant pool reference to immediates. This is used by
14267 fp moves, that force all constants to memory to allow combining. */
14269 if (MEM_P (operands[1])
14270 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
14271 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
14272 operands[1] = get_pool_constant (XEXP (operands[1], 0));
14273 if (push_operand (operands[0], VOIDmode))
14275 operands[0] = copy_rtx (operands[0]);
14276 PUT_MODE (operands[0], Pmode);
14279 operands[0] = gen_lowpart (DImode, operands[0]);
14280 operands[1] = gen_lowpart (DImode, operands[1]);
14281 emit_move_insn (operands[0], operands[1]);
14285 /* The only non-offsettable memory we handle is push. */
14286 if (push_operand (operands[0], VOIDmode))
14289 gcc_assert (!MEM_P (operands[0])
14290 || offsettable_memref_p (operands[0]));
14292 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14293 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
14295 /* When emitting push, take care for source operands on the stack. */
14296 if (push && MEM_P (operands[1])
14297 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14300 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
14301 XEXP (part[1][2], 0));
14302 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
14303 XEXP (part[1][1], 0));
14306 /* We need to do copy in the right order in case an address register
14307 of the source overlaps the destination. */
14308 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
14310 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
14312 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14315 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
14318 /* Collision in the middle part can be handled by reordering. */
14319 if (collisions == 1 && nparts == 3
14320 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
14323 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14324 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14327 /* If there are more collisions, we can't handle it by reordering.
14328 Do an lea to the last part and use only one colliding move. */
14329 else if (collisions > 1)
14335 base = part[0][nparts - 1];
14337 /* Handle the case when the last part isn't valid for lea.
14338 Happens in 64-bit mode storing the 12-byte XFmode. */
14339 if (GET_MODE (base) != Pmode)
14340 base = gen_rtx_REG (Pmode, REGNO (base));
14342 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14343 part[1][0] = replace_equiv_address (part[1][0], base);
14344 part[1][1] = replace_equiv_address (part[1][1],
14345 plus_constant (base, UNITS_PER_WORD));
14347 part[1][2] = replace_equiv_address (part[1][2],
14348 plus_constant (base, 8));
14358 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14359 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14360 emit_move_insn (part[0][2], part[1][2]);
14365 /* In 64bit mode we don't have 32bit push available. In case this is
14366 register, it is OK - we will just use larger counterpart. We also
14367 retype memory - these comes from attempt to avoid REX prefix on
14368 moving of second half of TFmode value. */
14369 if (GET_MODE (part[1][1]) == SImode)
14371 switch (GET_CODE (part[1][1]))
14374 part[1][1] = adjust_address (part[1][1], DImode, 0);
14378 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14382 gcc_unreachable ();
14385 if (GET_MODE (part[1][0]) == SImode)
14386 part[1][0] = part[1][1];
14389 emit_move_insn (part[0][1], part[1][1]);
14390 emit_move_insn (part[0][0], part[1][0]);
14394 /* Choose correct order to not overwrite the source before it is copied. */
14395 if ((REG_P (part[0][0])
14396 && REG_P (part[1][1])
14397 && (REGNO (part[0][0]) == REGNO (part[1][1])
14399 && REGNO (part[0][0]) == REGNO (part[1][2]))))
14401 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14405 operands[2] = part[0][2];
14406 operands[3] = part[0][1];
14407 operands[4] = part[0][0];
14408 operands[5] = part[1][2];
14409 operands[6] = part[1][1];
14410 operands[7] = part[1][0];
14414 operands[2] = part[0][1];
14415 operands[3] = part[0][0];
14416 operands[5] = part[1][1];
14417 operands[6] = part[1][0];
14424 operands[2] = part[0][0];
14425 operands[3] = part[0][1];
14426 operands[4] = part[0][2];
14427 operands[5] = part[1][0];
14428 operands[6] = part[1][1];
14429 operands[7] = part[1][2];
14433 operands[2] = part[0][0];
14434 operands[3] = part[0][1];
14435 operands[5] = part[1][0];
14436 operands[6] = part[1][1];
14440 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14443 if (CONST_INT_P (operands[5])
14444 && operands[5] != const0_rtx
14445 && REG_P (operands[2]))
14447 if (CONST_INT_P (operands[6])
14448 && INTVAL (operands[6]) == INTVAL (operands[5]))
14449 operands[6] = operands[2];
14452 && CONST_INT_P (operands[7])
14453 && INTVAL (operands[7]) == INTVAL (operands[5]))
14454 operands[7] = operands[2];
14458 && CONST_INT_P (operands[6])
14459 && operands[6] != const0_rtx
14460 && REG_P (operands[3])
14461 && CONST_INT_P (operands[7])
14462 && INTVAL (operands[7]) == INTVAL (operands[6]))
14463 operands[7] = operands[3];
14466 emit_move_insn (operands[2], operands[5]);
14467 emit_move_insn (operands[3], operands[6]);
14469 emit_move_insn (operands[4], operands[7]);
14474 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14475 left shift by a constant, either using a single shift or
14476 a sequence of add instructions. */
14479 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14483 emit_insn ((mode == DImode
14485 : gen_adddi3) (operand, operand, operand));
14487 else if (!optimize_size
14488 && count * ix86_cost->add <= ix86_cost->shift_const)
14491 for (i=0; i<count; i++)
14493 emit_insn ((mode == DImode
14495 : gen_adddi3) (operand, operand, operand));
14499 emit_insn ((mode == DImode
14501 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14505 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14507 rtx low[2], high[2];
14509 const int single_width = mode == DImode ? 32 : 64;
14511 if (CONST_INT_P (operands[2]))
14513 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14514 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14516 if (count >= single_width)
14518 emit_move_insn (high[0], low[1]);
14519 emit_move_insn (low[0], const0_rtx);
14521 if (count > single_width)
14522 ix86_expand_ashl_const (high[0], count - single_width, mode);
14526 if (!rtx_equal_p (operands[0], operands[1]))
14527 emit_move_insn (operands[0], operands[1]);
14528 emit_insn ((mode == DImode
14530 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14531 ix86_expand_ashl_const (low[0], count, mode);
14536 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14538 if (operands[1] == const1_rtx)
14540 /* Assuming we've chosen a QImode capable registers, then 1 << N
14541 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14542 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14544 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14546 ix86_expand_clear (low[0]);
14547 ix86_expand_clear (high[0]);
14548 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14550 d = gen_lowpart (QImode, low[0]);
14551 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14552 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14553 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14555 d = gen_lowpart (QImode, high[0]);
14556 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14557 s = gen_rtx_NE (QImode, flags, const0_rtx);
14558 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14561 /* Otherwise, we can get the same results by manually performing
14562 a bit extract operation on bit 5/6, and then performing the two
14563 shifts. The two methods of getting 0/1 into low/high are exactly
14564 the same size. Avoiding the shift in the bit extract case helps
14565 pentium4 a bit; no one else seems to care much either way. */
14570 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14571 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14573 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14574 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14576 emit_insn ((mode == DImode
14578 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14579 emit_insn ((mode == DImode
14581 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14582 emit_move_insn (low[0], high[0]);
14583 emit_insn ((mode == DImode
14585 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14588 emit_insn ((mode == DImode
14590 : gen_ashldi3) (low[0], low[0], operands[2]));
14591 emit_insn ((mode == DImode
14593 : gen_ashldi3) (high[0], high[0], operands[2]));
14597 if (operands[1] == constm1_rtx)
14599 /* For -1 << N, we can avoid the shld instruction, because we
14600 know that we're shifting 0...31/63 ones into a -1. */
14601 emit_move_insn (low[0], constm1_rtx);
14603 emit_move_insn (high[0], low[0]);
14605 emit_move_insn (high[0], constm1_rtx);
14609 if (!rtx_equal_p (operands[0], operands[1]))
14610 emit_move_insn (operands[0], operands[1]);
14612 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14613 emit_insn ((mode == DImode
14615 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14618 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14620 if (TARGET_CMOVE && scratch)
14622 ix86_expand_clear (scratch);
14623 emit_insn ((mode == DImode
14624 ? gen_x86_shift_adj_1
14625 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
14628 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
14632 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14634 rtx low[2], high[2];
14636 const int single_width = mode == DImode ? 32 : 64;
14638 if (CONST_INT_P (operands[2]))
14640 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14641 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14643 if (count == single_width * 2 - 1)
14645 emit_move_insn (high[0], high[1]);
14646 emit_insn ((mode == DImode
14648 : gen_ashrdi3) (high[0], high[0],
14649 GEN_INT (single_width - 1)));
14650 emit_move_insn (low[0], high[0]);
14653 else if (count >= single_width)
14655 emit_move_insn (low[0], high[1]);
14656 emit_move_insn (high[0], low[0]);
14657 emit_insn ((mode == DImode
14659 : gen_ashrdi3) (high[0], high[0],
14660 GEN_INT (single_width - 1)));
14661 if (count > single_width)
14662 emit_insn ((mode == DImode
14664 : gen_ashrdi3) (low[0], low[0],
14665 GEN_INT (count - single_width)));
14669 if (!rtx_equal_p (operands[0], operands[1]))
14670 emit_move_insn (operands[0], operands[1]);
14671 emit_insn ((mode == DImode
14673 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14674 emit_insn ((mode == DImode
14676 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14681 if (!rtx_equal_p (operands[0], operands[1]))
14682 emit_move_insn (operands[0], operands[1]);
14684 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14686 emit_insn ((mode == DImode
14688 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14689 emit_insn ((mode == DImode
14691 : gen_ashrdi3) (high[0], high[0], operands[2]));
14693 if (TARGET_CMOVE && scratch)
14695 emit_move_insn (scratch, high[0]);
14696 emit_insn ((mode == DImode
14698 : gen_ashrdi3) (scratch, scratch,
14699 GEN_INT (single_width - 1)));
14700 emit_insn ((mode == DImode
14701 ? gen_x86_shift_adj_1
14702 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14706 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
14711 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14713 rtx low[2], high[2];
14715 const int single_width = mode == DImode ? 32 : 64;
14717 if (CONST_INT_P (operands[2]))
14719 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14720 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14722 if (count >= single_width)
14724 emit_move_insn (low[0], high[1]);
14725 ix86_expand_clear (high[0]);
14727 if (count > single_width)
14728 emit_insn ((mode == DImode
14730 : gen_lshrdi3) (low[0], low[0],
14731 GEN_INT (count - single_width)));
14735 if (!rtx_equal_p (operands[0], operands[1]))
14736 emit_move_insn (operands[0], operands[1]);
14737 emit_insn ((mode == DImode
14739 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14740 emit_insn ((mode == DImode
14742 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14747 if (!rtx_equal_p (operands[0], operands[1]))
14748 emit_move_insn (operands[0], operands[1]);
14750 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14752 emit_insn ((mode == DImode
14754 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14755 emit_insn ((mode == DImode
14757 : gen_lshrdi3) (high[0], high[0], operands[2]));
14759 /* Heh. By reversing the arguments, we can reuse this pattern. */
14760 if (TARGET_CMOVE && scratch)
14762 ix86_expand_clear (scratch);
14763 emit_insn ((mode == DImode
14764 ? gen_x86_shift_adj_1
14765 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
14769 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
14773 /* Predict just emitted jump instruction to be taken with probability PROB. */
14775 predict_jump (int prob)
14777 rtx insn = get_last_insn ();
14778 gcc_assert (JUMP_P (insn));
14780 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14785 /* Helper function for the string operations below. Dest VARIABLE whether
14786 it is aligned to VALUE bytes. If true, jump to the label. */
14788 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14790 rtx label = gen_label_rtx ();
14791 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14792 if (GET_MODE (variable) == DImode)
14793 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14795 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14796 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14799 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14801 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14805 /* Adjust COUNTER by the VALUE. */
14807 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14809 if (GET_MODE (countreg) == DImode)
14810 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14812 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14815 /* Zero extend possibly SImode EXP to Pmode register. */
14817 ix86_zero_extend_to_Pmode (rtx exp)
14820 if (GET_MODE (exp) == VOIDmode)
14821 return force_reg (Pmode, exp);
14822 if (GET_MODE (exp) == Pmode)
14823 return copy_to_mode_reg (Pmode, exp);
14824 r = gen_reg_rtx (Pmode);
14825 emit_insn (gen_zero_extendsidi2 (r, exp));
14829 /* Divide COUNTREG by SCALE. */
14831 scale_counter (rtx countreg, int scale)
14834 rtx piece_size_mask;
14838 if (CONST_INT_P (countreg))
14839 return GEN_INT (INTVAL (countreg) / scale);
14840 gcc_assert (REG_P (countreg));
14842 piece_size_mask = GEN_INT (scale - 1);
14843 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14844 GEN_INT (exact_log2 (scale)),
14845 NULL, 1, OPTAB_DIRECT);
14849 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14850 DImode for constant loop counts. */
14852 static enum machine_mode
14853 counter_mode (rtx count_exp)
14855 if (GET_MODE (count_exp) != VOIDmode)
14856 return GET_MODE (count_exp);
14857 if (GET_CODE (count_exp) != CONST_INT)
14859 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14864 /* When SRCPTR is non-NULL, output simple loop to move memory
14865 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14866 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14867 equivalent loop to set memory by VALUE (supposed to be in MODE).
14869 The size is rounded down to whole number of chunk size moved at once.
14870 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14874 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14875 rtx destptr, rtx srcptr, rtx value,
14876 rtx count, enum machine_mode mode, int unroll,
14879 rtx out_label, top_label, iter, tmp;
14880 enum machine_mode iter_mode = counter_mode (count);
14881 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14882 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14888 top_label = gen_label_rtx ();
14889 out_label = gen_label_rtx ();
14890 iter = gen_reg_rtx (iter_mode);
14892 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14893 NULL, 1, OPTAB_DIRECT);
14894 /* Those two should combine. */
14895 if (piece_size == const1_rtx)
14897 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14899 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14901 emit_move_insn (iter, const0_rtx);
14903 emit_label (top_label);
14905 tmp = convert_modes (Pmode, iter_mode, iter, true);
14906 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14907 destmem = change_address (destmem, mode, x_addr);
14911 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14912 srcmem = change_address (srcmem, mode, y_addr);
14914 /* When unrolling for chips that reorder memory reads and writes,
14915 we can save registers by using single temporary.
14916 Also using 4 temporaries is overkill in 32bit mode. */
14917 if (!TARGET_64BIT && 0)
14919 for (i = 0; i < unroll; i++)
14924 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14926 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14928 emit_move_insn (destmem, srcmem);
14934 gcc_assert (unroll <= 4);
14935 for (i = 0; i < unroll; i++)
14937 tmpreg[i] = gen_reg_rtx (mode);
14941 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14943 emit_move_insn (tmpreg[i], srcmem);
14945 for (i = 0; i < unroll; i++)
14950 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14952 emit_move_insn (destmem, tmpreg[i]);
14957 for (i = 0; i < unroll; i++)
14961 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14962 emit_move_insn (destmem, value);
14965 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14966 true, OPTAB_LIB_WIDEN);
14968 emit_move_insn (iter, tmp);
14970 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14972 if (expected_size != -1)
14974 expected_size /= GET_MODE_SIZE (mode) * unroll;
14975 if (expected_size == 0)
14977 else if (expected_size > REG_BR_PROB_BASE)
14978 predict_jump (REG_BR_PROB_BASE - 1);
14980 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14983 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14984 iter = ix86_zero_extend_to_Pmode (iter);
14985 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14986 true, OPTAB_LIB_WIDEN);
14987 if (tmp != destptr)
14988 emit_move_insn (destptr, tmp);
14991 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14992 true, OPTAB_LIB_WIDEN);
14994 emit_move_insn (srcptr, tmp);
14996 emit_label (out_label);
14999 /* Output "rep; mov" instruction.
15000 Arguments have same meaning as for previous function */
15002 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
15003 rtx destptr, rtx srcptr,
15005 enum machine_mode mode)
15011 /* If the size is known, it is shorter to use rep movs. */
15012 if (mode == QImode && CONST_INT_P (count)
15013 && !(INTVAL (count) & 3))
15016 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15017 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15018 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
15019 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
15020 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15021 if (mode != QImode)
15023 destexp = gen_rtx_ASHIFT (Pmode, countreg,
15024 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15025 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15026 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
15027 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15028 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
15032 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15033 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
15035 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
15039 /* Output "rep; stos" instruction.
15040 Arguments have same meaning as for previous function */
15042 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
15044 enum machine_mode mode)
15049 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15050 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15051 value = force_reg (mode, gen_lowpart (mode, value));
15052 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15053 if (mode != QImode)
15055 destexp = gen_rtx_ASHIFT (Pmode, countreg,
15056 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15057 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15060 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15061 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
15065 emit_strmov (rtx destmem, rtx srcmem,
15066 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
15068 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
15069 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
15070 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15073 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
15075 expand_movmem_epilogue (rtx destmem, rtx srcmem,
15076 rtx destptr, rtx srcptr, rtx count, int max_size)
15079 if (CONST_INT_P (count))
15081 HOST_WIDE_INT countval = INTVAL (count);
15084 if ((countval & 0x10) && max_size > 16)
15088 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
15089 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
15092 gcc_unreachable ();
15095 if ((countval & 0x08) && max_size > 8)
15098 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
15101 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
15102 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
15106 if ((countval & 0x04) && max_size > 4)
15108 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
15111 if ((countval & 0x02) && max_size > 2)
15113 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
15116 if ((countval & 0x01) && max_size > 1)
15118 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
15125 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
15126 count, 1, OPTAB_DIRECT);
15127 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
15128 count, QImode, 1, 4);
15132 /* When there are stringops, we can cheaply increase dest and src pointers.
15133 Otherwise we save code size by maintaining offset (zero is readily
15134 available from preceding rep operation) and using x86 addressing modes.
15136 if (TARGET_SINGLE_STRINGOP)
15140 rtx label = ix86_expand_aligntest (count, 4, true);
15141 src = change_address (srcmem, SImode, srcptr);
15142 dest = change_address (destmem, SImode, destptr);
15143 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15144 emit_label (label);
15145 LABEL_NUSES (label) = 1;
15149 rtx label = ix86_expand_aligntest (count, 2, true);
15150 src = change_address (srcmem, HImode, srcptr);
15151 dest = change_address (destmem, HImode, destptr);
15152 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15153 emit_label (label);
15154 LABEL_NUSES (label) = 1;
15158 rtx label = ix86_expand_aligntest (count, 1, true);
15159 src = change_address (srcmem, QImode, srcptr);
15160 dest = change_address (destmem, QImode, destptr);
15161 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15162 emit_label (label);
15163 LABEL_NUSES (label) = 1;
15168 rtx offset = force_reg (Pmode, const0_rtx);
15173 rtx label = ix86_expand_aligntest (count, 4, true);
15174 src = change_address (srcmem, SImode, srcptr);
15175 dest = change_address (destmem, SImode, destptr);
15176 emit_move_insn (dest, src);
15177 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
15178 true, OPTAB_LIB_WIDEN);
15180 emit_move_insn (offset, tmp);
15181 emit_label (label);
15182 LABEL_NUSES (label) = 1;
15186 rtx label = ix86_expand_aligntest (count, 2, true);
15187 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15188 src = change_address (srcmem, HImode, tmp);
15189 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15190 dest = change_address (destmem, HImode, tmp);
15191 emit_move_insn (dest, src);
15192 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
15193 true, OPTAB_LIB_WIDEN);
15195 emit_move_insn (offset, tmp);
15196 emit_label (label);
15197 LABEL_NUSES (label) = 1;
15201 rtx label = ix86_expand_aligntest (count, 1, true);
15202 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15203 src = change_address (srcmem, QImode, tmp);
15204 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15205 dest = change_address (destmem, QImode, tmp);
15206 emit_move_insn (dest, src);
15207 emit_label (label);
15208 LABEL_NUSES (label) = 1;
15213 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15215 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
15216 rtx count, int max_size)
15219 expand_simple_binop (counter_mode (count), AND, count,
15220 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
15221 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
15222 gen_lowpart (QImode, value), count, QImode,
15226 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15228 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
15232 if (CONST_INT_P (count))
15234 HOST_WIDE_INT countval = INTVAL (count);
15237 if ((countval & 0x10) && max_size > 16)
15241 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15242 emit_insn (gen_strset (destptr, dest, value));
15243 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
15244 emit_insn (gen_strset (destptr, dest, value));
15247 gcc_unreachable ();
15250 if ((countval & 0x08) && max_size > 8)
15254 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15255 emit_insn (gen_strset (destptr, dest, value));
15259 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15260 emit_insn (gen_strset (destptr, dest, value));
15261 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
15262 emit_insn (gen_strset (destptr, dest, value));
15266 if ((countval & 0x04) && max_size > 4)
15268 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15269 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15272 if ((countval & 0x02) && max_size > 2)
15274 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15275 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15278 if ((countval & 0x01) && max_size > 1)
15280 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15281 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15288 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15293 rtx label = ix86_expand_aligntest (count, 16, true);
15296 dest = change_address (destmem, DImode, destptr);
15297 emit_insn (gen_strset (destptr, dest, value));
15298 emit_insn (gen_strset (destptr, dest, value));
15302 dest = change_address (destmem, SImode, destptr);
15303 emit_insn (gen_strset (destptr, dest, value));
15304 emit_insn (gen_strset (destptr, dest, value));
15305 emit_insn (gen_strset (destptr, dest, value));
15306 emit_insn (gen_strset (destptr, dest, value));
15308 emit_label (label);
15309 LABEL_NUSES (label) = 1;
15313 rtx label = ix86_expand_aligntest (count, 8, true);
15316 dest = change_address (destmem, DImode, destptr);
15317 emit_insn (gen_strset (destptr, dest, value));
15321 dest = change_address (destmem, SImode, destptr);
15322 emit_insn (gen_strset (destptr, dest, value));
15323 emit_insn (gen_strset (destptr, dest, value));
15325 emit_label (label);
15326 LABEL_NUSES (label) = 1;
15330 rtx label = ix86_expand_aligntest (count, 4, true);
15331 dest = change_address (destmem, SImode, destptr);
15332 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15333 emit_label (label);
15334 LABEL_NUSES (label) = 1;
15338 rtx label = ix86_expand_aligntest (count, 2, true);
15339 dest = change_address (destmem, HImode, destptr);
15340 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15341 emit_label (label);
15342 LABEL_NUSES (label) = 1;
15346 rtx label = ix86_expand_aligntest (count, 1, true);
15347 dest = change_address (destmem, QImode, destptr);
15348 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15349 emit_label (label);
15350 LABEL_NUSES (label) = 1;
15354 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15355 DESIRED_ALIGNMENT. */
15357 expand_movmem_prologue (rtx destmem, rtx srcmem,
15358 rtx destptr, rtx srcptr, rtx count,
15359 int align, int desired_alignment)
15361 if (align <= 1 && desired_alignment > 1)
15363 rtx label = ix86_expand_aligntest (destptr, 1, false);
15364 srcmem = change_address (srcmem, QImode, srcptr);
15365 destmem = change_address (destmem, QImode, destptr);
15366 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15367 ix86_adjust_counter (count, 1);
15368 emit_label (label);
15369 LABEL_NUSES (label) = 1;
15371 if (align <= 2 && desired_alignment > 2)
15373 rtx label = ix86_expand_aligntest (destptr, 2, false);
15374 srcmem = change_address (srcmem, HImode, srcptr);
15375 destmem = change_address (destmem, HImode, destptr);
15376 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15377 ix86_adjust_counter (count, 2);
15378 emit_label (label);
15379 LABEL_NUSES (label) = 1;
15381 if (align <= 4 && desired_alignment > 4)
15383 rtx label = ix86_expand_aligntest (destptr, 4, false);
15384 srcmem = change_address (srcmem, SImode, srcptr);
15385 destmem = change_address (destmem, SImode, destptr);
15386 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15387 ix86_adjust_counter (count, 4);
15388 emit_label (label);
15389 LABEL_NUSES (label) = 1;
15391 gcc_assert (desired_alignment <= 8);
15394 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15395 DESIRED_ALIGNMENT. */
15397 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15398 int align, int desired_alignment)
15400 if (align <= 1 && desired_alignment > 1)
15402 rtx label = ix86_expand_aligntest (destptr, 1, false);
15403 destmem = change_address (destmem, QImode, destptr);
15404 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15405 ix86_adjust_counter (count, 1);
15406 emit_label (label);
15407 LABEL_NUSES (label) = 1;
15409 if (align <= 2 && desired_alignment > 2)
15411 rtx label = ix86_expand_aligntest (destptr, 2, false);
15412 destmem = change_address (destmem, HImode, destptr);
15413 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15414 ix86_adjust_counter (count, 2);
15415 emit_label (label);
15416 LABEL_NUSES (label) = 1;
15418 if (align <= 4 && desired_alignment > 4)
15420 rtx label = ix86_expand_aligntest (destptr, 4, false);
15421 destmem = change_address (destmem, SImode, destptr);
15422 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15423 ix86_adjust_counter (count, 4);
15424 emit_label (label);
15425 LABEL_NUSES (label) = 1;
15427 gcc_assert (desired_alignment <= 8);
15430 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15431 static enum stringop_alg
15432 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15433 int *dynamic_check)
15435 const struct stringop_algs * algs;
15436 /* Algorithms using the rep prefix want at least edi and ecx;
15437 additionally, memset wants eax and memcpy wants esi. Don't
15438 consider such algorithms if the user has appropriated those
15439 registers for their own purposes. */
15440 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15442 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15444 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15445 || (alg != rep_prefix_1_byte \
15446 && alg != rep_prefix_4_byte \
15447 && alg != rep_prefix_8_byte))
15449 *dynamic_check = -1;
15451 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15453 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15454 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15455 return stringop_alg;
15456 /* rep; movq or rep; movl is the smallest variant. */
15457 else if (optimize_size)
15459 if (!count || (count & 3))
15460 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15462 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15464 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15466 else if (expected_size != -1 && expected_size < 4)
15467 return loop_1_byte;
15468 else if (expected_size != -1)
15471 enum stringop_alg alg = libcall;
15472 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15474 /* We get here if the algorithms that were not libcall-based
15475 were rep-prefix based and we are unable to use rep prefixes
15476 based on global register usage. Break out of the loop and
15477 use the heuristic below. */
15478 if (algs->size[i].max == 0)
15480 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15482 enum stringop_alg candidate = algs->size[i].alg;
15484 if (candidate != libcall && ALG_USABLE_P (candidate))
15486 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15487 last non-libcall inline algorithm. */
15488 if (TARGET_INLINE_ALL_STRINGOPS)
15490 /* When the current size is best to be copied by a libcall,
15491 but we are still forced to inline, run the heuristic below
15492 that will pick code for medium sized blocks. */
15493 if (alg != libcall)
15497 else if (ALG_USABLE_P (candidate))
15501 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15503 /* When asked to inline the call anyway, try to pick meaningful choice.
15504 We look for maximal size of block that is faster to copy by hand and
15505 take blocks of at most of that size guessing that average size will
15506 be roughly half of the block.
15508 If this turns out to be bad, we might simply specify the preferred
15509 choice in ix86_costs. */
15510 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15511 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15514 enum stringop_alg alg;
15516 bool any_alg_usable_p = true;
15518 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15520 enum stringop_alg candidate = algs->size[i].alg;
15521 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15523 if (candidate != libcall && candidate
15524 && ALG_USABLE_P (candidate))
15525 max = algs->size[i].max;
15527 /* If there aren't any usable algorithms, then recursing on
15528 smaller sizes isn't going to find anything. Just return the
15529 simple byte-at-a-time copy loop. */
15530 if (!any_alg_usable_p)
15532 /* Pick something reasonable. */
15533 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15534 *dynamic_check = 128;
15535 return loop_1_byte;
15539 alg = decide_alg (count, max / 2, memset, dynamic_check);
15540 gcc_assert (*dynamic_check == -1);
15541 gcc_assert (alg != libcall);
15542 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15543 *dynamic_check = max;
15546 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15547 #undef ALG_USABLE_P
15550 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15551 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15553 decide_alignment (int align,
15554 enum stringop_alg alg,
15557 int desired_align = 0;
15561 gcc_unreachable ();
15563 case unrolled_loop:
15564 desired_align = GET_MODE_SIZE (Pmode);
15566 case rep_prefix_8_byte:
15569 case rep_prefix_4_byte:
15570 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15571 copying whole cacheline at once. */
15572 if (TARGET_PENTIUMPRO)
15577 case rep_prefix_1_byte:
15578 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15579 copying whole cacheline at once. */
15580 if (TARGET_PENTIUMPRO)
15594 if (desired_align < align)
15595 desired_align = align;
15596 if (expected_size != -1 && expected_size < 4)
15597 desired_align = align;
15598 return desired_align;
15601 /* Return the smallest power of 2 greater than VAL. */
15603 smallest_pow2_greater_than (int val)
15611 /* Expand string move (memcpy) operation. Use i386 string operations when
15612 profitable. expand_setmem contains similar code. The code depends upon
15613 architecture, block size and alignment, but always has the same
15616 1) Prologue guard: Conditional that jumps up to epilogues for small
15617 blocks that can be handled by epilogue alone. This is faster but
15618 also needed for correctness, since prologue assume the block is larger
15619 than the desired alignment.
15621 Optional dynamic check for size and libcall for large
15622 blocks is emitted here too, with -minline-stringops-dynamically.
15624 2) Prologue: copy first few bytes in order to get destination aligned
15625 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15626 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15627 We emit either a jump tree on power of two sized blocks, or a byte loop.
15629 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15630 with specified algorithm.
15632 4) Epilogue: code copying tail of the block that is too small to be
15633 handled by main body (or up to size guarded by prologue guard). */
15636 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15637 rtx expected_align_exp, rtx expected_size_exp)
15643 rtx jump_around_label = NULL;
15644 HOST_WIDE_INT align = 1;
15645 unsigned HOST_WIDE_INT count = 0;
15646 HOST_WIDE_INT expected_size = -1;
15647 int size_needed = 0, epilogue_size_needed;
15648 int desired_align = 0;
15649 enum stringop_alg alg;
15652 if (CONST_INT_P (align_exp))
15653 align = INTVAL (align_exp);
15654 /* i386 can do misaligned access on reasonably increased cost. */
15655 if (CONST_INT_P (expected_align_exp)
15656 && INTVAL (expected_align_exp) > align)
15657 align = INTVAL (expected_align_exp);
15658 if (CONST_INT_P (count_exp))
15659 count = expected_size = INTVAL (count_exp);
15660 if (CONST_INT_P (expected_size_exp) && count == 0)
15661 expected_size = INTVAL (expected_size_exp);
15663 /* Make sure we don't need to care about overflow later on. */
15664 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15667 /* Step 0: Decide on preferred algorithm, desired alignment and
15668 size of chunks to be copied by main loop. */
15670 alg = decide_alg (count, expected_size, false, &dynamic_check);
15671 desired_align = decide_alignment (align, alg, expected_size);
15673 if (!TARGET_ALIGN_STRINGOPS)
15674 align = desired_align;
15676 if (alg == libcall)
15678 gcc_assert (alg != no_stringop);
15680 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15681 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15682 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15687 gcc_unreachable ();
15689 size_needed = GET_MODE_SIZE (Pmode);
15691 case unrolled_loop:
15692 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15694 case rep_prefix_8_byte:
15697 case rep_prefix_4_byte:
15700 case rep_prefix_1_byte:
15706 epilogue_size_needed = size_needed;
15708 /* Step 1: Prologue guard. */
15710 /* Alignment code needs count to be in register. */
15711 if (CONST_INT_P (count_exp) && desired_align > align)
15712 count_exp = force_reg (counter_mode (count_exp), count_exp);
15713 gcc_assert (desired_align >= 1 && align >= 1);
15715 /* Ensure that alignment prologue won't copy past end of block. */
15716 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15718 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15719 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15720 Make sure it is power of 2. */
15721 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15723 if (CONST_INT_P (count_exp))
15725 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15730 label = gen_label_rtx ();
15731 emit_cmp_and_jump_insns (count_exp,
15732 GEN_INT (epilogue_size_needed),
15733 LTU, 0, counter_mode (count_exp), 1, label);
15734 if (expected_size == -1 || expected_size < epilogue_size_needed)
15735 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15737 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15741 /* Emit code to decide on runtime whether library call or inline should be
15743 if (dynamic_check != -1)
15745 if (CONST_INT_P (count_exp))
15747 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15749 emit_block_move_via_libcall (dst, src, count_exp, false);
15750 count_exp = const0_rtx;
15756 rtx hot_label = gen_label_rtx ();
15757 jump_around_label = gen_label_rtx ();
15758 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15759 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15760 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15761 emit_block_move_via_libcall (dst, src, count_exp, false);
15762 emit_jump (jump_around_label);
15763 emit_label (hot_label);
15767 /* Step 2: Alignment prologue. */
15769 if (desired_align > align)
15771 /* Except for the first move in epilogue, we no longer know
15772 constant offset in aliasing info. It don't seems to worth
15773 the pain to maintain it for the first move, so throw away
15775 src = change_address (src, BLKmode, srcreg);
15776 dst = change_address (dst, BLKmode, destreg);
15777 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15780 if (label && size_needed == 1)
15782 emit_label (label);
15783 LABEL_NUSES (label) = 1;
15787 /* Step 3: Main loop. */
15793 gcc_unreachable ();
15795 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15796 count_exp, QImode, 1, expected_size);
15799 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15800 count_exp, Pmode, 1, expected_size);
15802 case unrolled_loop:
15803 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15804 registers for 4 temporaries anyway. */
15805 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15806 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15809 case rep_prefix_8_byte:
15810 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15813 case rep_prefix_4_byte:
15814 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15817 case rep_prefix_1_byte:
15818 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15822 /* Adjust properly the offset of src and dest memory for aliasing. */
15823 if (CONST_INT_P (count_exp))
15825 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15826 (count / size_needed) * size_needed);
15827 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15828 (count / size_needed) * size_needed);
15832 src = change_address (src, BLKmode, srcreg);
15833 dst = change_address (dst, BLKmode, destreg);
15836 /* Step 4: Epilogue to copy the remaining bytes. */
15840 /* When the main loop is done, COUNT_EXP might hold original count,
15841 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15842 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15843 bytes. Compensate if needed. */
15845 if (size_needed < epilogue_size_needed)
15848 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15849 GEN_INT (size_needed - 1), count_exp, 1,
15851 if (tmp != count_exp)
15852 emit_move_insn (count_exp, tmp);
15854 emit_label (label);
15855 LABEL_NUSES (label) = 1;
15858 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15859 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15860 epilogue_size_needed);
15861 if (jump_around_label)
15862 emit_label (jump_around_label);
15866 /* Helper function for memcpy. For QImode value 0xXY produce
15867 0xXYXYXYXY of wide specified by MODE. This is essentially
15868 a * 0x10101010, but we can do slightly better than
15869 synth_mult by unwinding the sequence by hand on CPUs with
15872 promote_duplicated_reg (enum machine_mode mode, rtx val)
15874 enum machine_mode valmode = GET_MODE (val);
15876 int nops = mode == DImode ? 3 : 2;
15878 gcc_assert (mode == SImode || mode == DImode);
15879 if (val == const0_rtx)
15880 return copy_to_mode_reg (mode, const0_rtx);
15881 if (CONST_INT_P (val))
15883 HOST_WIDE_INT v = INTVAL (val) & 255;
15887 if (mode == DImode)
15888 v |= (v << 16) << 16;
15889 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15892 if (valmode == VOIDmode)
15894 if (valmode != QImode)
15895 val = gen_lowpart (QImode, val);
15896 if (mode == QImode)
15898 if (!TARGET_PARTIAL_REG_STALL)
15900 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15901 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15902 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15903 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15905 rtx reg = convert_modes (mode, QImode, val, true);
15906 tmp = promote_duplicated_reg (mode, const1_rtx);
15907 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15912 rtx reg = convert_modes (mode, QImode, val, true);
15914 if (!TARGET_PARTIAL_REG_STALL)
15915 if (mode == SImode)
15916 emit_insn (gen_movsi_insv_1 (reg, reg));
15918 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15921 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15922 NULL, 1, OPTAB_DIRECT);
15924 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15926 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15927 NULL, 1, OPTAB_DIRECT);
15928 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15929 if (mode == SImode)
15931 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15932 NULL, 1, OPTAB_DIRECT);
15933 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15938 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15939 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15940 alignment from ALIGN to DESIRED_ALIGN. */
15942 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15947 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15948 promoted_val = promote_duplicated_reg (DImode, val);
15949 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15950 promoted_val = promote_duplicated_reg (SImode, val);
15951 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15952 promoted_val = promote_duplicated_reg (HImode, val);
15954 promoted_val = val;
15956 return promoted_val;
15959 /* Expand string clear operation (bzero). Use i386 string operations when
15960 profitable. See expand_movmem comment for explanation of individual
15961 steps performed. */
15963 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15964 rtx expected_align_exp, rtx expected_size_exp)
15969 rtx jump_around_label = NULL;
15970 HOST_WIDE_INT align = 1;
15971 unsigned HOST_WIDE_INT count = 0;
15972 HOST_WIDE_INT expected_size = -1;
15973 int size_needed = 0, epilogue_size_needed;
15974 int desired_align = 0;
15975 enum stringop_alg alg;
15976 rtx promoted_val = NULL;
15977 bool force_loopy_epilogue = false;
15980 if (CONST_INT_P (align_exp))
15981 align = INTVAL (align_exp);
15982 /* i386 can do misaligned access on reasonably increased cost. */
15983 if (CONST_INT_P (expected_align_exp)
15984 && INTVAL (expected_align_exp) > align)
15985 align = INTVAL (expected_align_exp);
15986 if (CONST_INT_P (count_exp))
15987 count = expected_size = INTVAL (count_exp);
15988 if (CONST_INT_P (expected_size_exp) && count == 0)
15989 expected_size = INTVAL (expected_size_exp);
15991 /* Make sure we don't need to care about overflow later on. */
15992 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15995 /* Step 0: Decide on preferred algorithm, desired alignment and
15996 size of chunks to be copied by main loop. */
15998 alg = decide_alg (count, expected_size, true, &dynamic_check);
15999 desired_align = decide_alignment (align, alg, expected_size);
16001 if (!TARGET_ALIGN_STRINGOPS)
16002 align = desired_align;
16004 if (alg == libcall)
16006 gcc_assert (alg != no_stringop);
16008 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
16009 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16014 gcc_unreachable ();
16016 size_needed = GET_MODE_SIZE (Pmode);
16018 case unrolled_loop:
16019 size_needed = GET_MODE_SIZE (Pmode) * 4;
16021 case rep_prefix_8_byte:
16024 case rep_prefix_4_byte:
16027 case rep_prefix_1_byte:
16032 epilogue_size_needed = size_needed;
16034 /* Step 1: Prologue guard. */
16036 /* Alignment code needs count to be in register. */
16037 if (CONST_INT_P (count_exp) && desired_align > align)
16039 enum machine_mode mode = SImode;
16040 if (TARGET_64BIT && (count & ~0xffffffff))
16042 count_exp = force_reg (mode, count_exp);
16044 /* Do the cheap promotion to allow better CSE across the
16045 main loop and epilogue (ie one load of the big constant in the
16046 front of all code. */
16047 if (CONST_INT_P (val_exp))
16048 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16049 desired_align, align);
16050 /* Ensure that alignment prologue won't copy past end of block. */
16051 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
16053 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
16054 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
16055 Make sure it is power of 2. */
16056 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
16058 /* To improve performance of small blocks, we jump around the VAL
16059 promoting mode. This mean that if the promoted VAL is not constant,
16060 we might not use it in the epilogue and have to use byte
16062 if (epilogue_size_needed > 2 && !promoted_val)
16063 force_loopy_epilogue = true;
16064 label = gen_label_rtx ();
16065 emit_cmp_and_jump_insns (count_exp,
16066 GEN_INT (epilogue_size_needed),
16067 LTU, 0, counter_mode (count_exp), 1, label);
16068 if (GET_CODE (count_exp) == CONST_INT)
16070 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
16071 predict_jump (REG_BR_PROB_BASE * 60 / 100);
16073 predict_jump (REG_BR_PROB_BASE * 20 / 100);
16075 if (dynamic_check != -1)
16077 rtx hot_label = gen_label_rtx ();
16078 jump_around_label = gen_label_rtx ();
16079 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
16080 LEU, 0, counter_mode (count_exp), 1, hot_label);
16081 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16082 set_storage_via_libcall (dst, count_exp, val_exp, false);
16083 emit_jump (jump_around_label);
16084 emit_label (hot_label);
16087 /* Step 2: Alignment prologue. */
16089 /* Do the expensive promotion once we branched off the small blocks. */
16091 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16092 desired_align, align);
16093 gcc_assert (desired_align >= 1 && align >= 1);
16095 if (desired_align > align)
16097 /* Except for the first move in epilogue, we no longer know
16098 constant offset in aliasing info. It don't seems to worth
16099 the pain to maintain it for the first move, so throw away
16101 dst = change_address (dst, BLKmode, destreg);
16102 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
16105 if (label && size_needed == 1)
16107 emit_label (label);
16108 LABEL_NUSES (label) = 1;
16112 /* Step 3: Main loop. */
16118 gcc_unreachable ();
16120 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16121 count_exp, QImode, 1, expected_size);
16124 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16125 count_exp, Pmode, 1, expected_size);
16127 case unrolled_loop:
16128 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16129 count_exp, Pmode, 4, expected_size);
16131 case rep_prefix_8_byte:
16132 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16135 case rep_prefix_4_byte:
16136 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16139 case rep_prefix_1_byte:
16140 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16144 /* Adjust properly the offset of src and dest memory for aliasing. */
16145 if (CONST_INT_P (count_exp))
16146 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
16147 (count / size_needed) * size_needed);
16149 dst = change_address (dst, BLKmode, destreg);
16151 /* Step 4: Epilogue to copy the remaining bytes. */
16155 /* When the main loop is done, COUNT_EXP might hold original count,
16156 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
16157 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
16158 bytes. Compensate if needed. */
16160 if (size_needed < desired_align - align)
16163 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
16164 GEN_INT (size_needed - 1), count_exp, 1,
16166 size_needed = desired_align - align + 1;
16167 if (tmp != count_exp)
16168 emit_move_insn (count_exp, tmp);
16170 emit_label (label);
16171 LABEL_NUSES (label) = 1;
16173 if (count_exp != const0_rtx && epilogue_size_needed > 1)
16175 if (force_loopy_epilogue)
16176 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
16179 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
16182 if (jump_around_label)
16183 emit_label (jump_around_label);
16187 /* Expand the appropriate insns for doing strlen if not just doing
16190 out = result, initialized with the start address
16191 align_rtx = alignment of the address.
16192 scratch = scratch register, initialized with the startaddress when
16193 not aligned, otherwise undefined
16195 This is just the body. It needs the initializations mentioned above and
16196 some address computing at the end. These things are done in i386.md. */
16199 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
16203 rtx align_2_label = NULL_RTX;
16204 rtx align_3_label = NULL_RTX;
16205 rtx align_4_label = gen_label_rtx ();
16206 rtx end_0_label = gen_label_rtx ();
16208 rtx tmpreg = gen_reg_rtx (SImode);
16209 rtx scratch = gen_reg_rtx (SImode);
16213 if (CONST_INT_P (align_rtx))
16214 align = INTVAL (align_rtx);
16216 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
16218 /* Is there a known alignment and is it less than 4? */
16221 rtx scratch1 = gen_reg_rtx (Pmode);
16222 emit_move_insn (scratch1, out);
16223 /* Is there a known alignment and is it not 2? */
16226 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
16227 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
16229 /* Leave just the 3 lower bits. */
16230 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
16231 NULL_RTX, 0, OPTAB_WIDEN);
16233 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16234 Pmode, 1, align_4_label);
16235 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
16236 Pmode, 1, align_2_label);
16237 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
16238 Pmode, 1, align_3_label);
16242 /* Since the alignment is 2, we have to check 2 or 0 bytes;
16243 check if is aligned to 4 - byte. */
16245 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
16246 NULL_RTX, 0, OPTAB_WIDEN);
16248 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16249 Pmode, 1, align_4_label);
16252 mem = change_address (src, QImode, out);
16254 /* Now compare the bytes. */
16256 /* Compare the first n unaligned byte on a byte per byte basis. */
16257 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
16258 QImode, 1, end_0_label);
16260 /* Increment the address. */
16262 emit_insn (gen_adddi3 (out, out, const1_rtx));
16264 emit_insn (gen_addsi3 (out, out, const1_rtx));
16266 /* Not needed with an alignment of 2 */
16269 emit_label (align_2_label);
16271 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16275 emit_insn (gen_adddi3 (out, out, const1_rtx));
16277 emit_insn (gen_addsi3 (out, out, const1_rtx));
16279 emit_label (align_3_label);
16282 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16286 emit_insn (gen_adddi3 (out, out, const1_rtx));
16288 emit_insn (gen_addsi3 (out, out, const1_rtx));
16291 /* Generate loop to check 4 bytes at a time. It is not a good idea to
16292 align this loop. It gives only huge programs, but does not help to
16294 emit_label (align_4_label);
16296 mem = change_address (src, SImode, out);
16297 emit_move_insn (scratch, mem);
16299 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
16301 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
16303 /* This formula yields a nonzero result iff one of the bytes is zero.
16304 This saves three branches inside loop and many cycles. */
16306 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16307 emit_insn (gen_one_cmplsi2 (scratch, scratch));
16308 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
16309 emit_insn (gen_andsi3 (tmpreg, tmpreg,
16310 gen_int_mode (0x80808080, SImode)));
16311 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16316 rtx reg = gen_reg_rtx (SImode);
16317 rtx reg2 = gen_reg_rtx (Pmode);
16318 emit_move_insn (reg, tmpreg);
16319 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16321 /* If zero is not in the first two bytes, move two bytes forward. */
16322 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16323 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16324 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16325 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16326 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16329 /* Emit lea manually to avoid clobbering of flags. */
16330 emit_insn (gen_rtx_SET (SImode, reg2,
16331 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16333 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16334 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16335 emit_insn (gen_rtx_SET (VOIDmode, out,
16336 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16343 rtx end_2_label = gen_label_rtx ();
16344 /* Is zero in the first two bytes? */
16346 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16347 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16348 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16349 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16350 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16352 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16353 JUMP_LABEL (tmp) = end_2_label;
16355 /* Not in the first two. Move two bytes forward. */
16356 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16358 emit_insn (gen_adddi3 (out, out, const2_rtx));
16360 emit_insn (gen_addsi3 (out, out, const2_rtx));
16362 emit_label (end_2_label);
16366 /* Avoid branch in fixing the byte. */
16367 tmpreg = gen_lowpart (QImode, tmpreg);
16368 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16369 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16371 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
16373 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
16375 emit_label (end_0_label);
16378 /* Expand strlen. */
16381 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16383 rtx addr, scratch1, scratch2, scratch3, scratch4;
16385 /* The generic case of strlen expander is long. Avoid it's
16386 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16388 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16389 && !TARGET_INLINE_ALL_STRINGOPS
16391 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16394 addr = force_reg (Pmode, XEXP (src, 0));
16395 scratch1 = gen_reg_rtx (Pmode);
16397 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16400 /* Well it seems that some optimizer does not combine a call like
16401 foo(strlen(bar), strlen(bar));
16402 when the move and the subtraction is done here. It does calculate
16403 the length just once when these instructions are done inside of
16404 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16405 often used and I use one fewer register for the lifetime of
16406 output_strlen_unroll() this is better. */
16408 emit_move_insn (out, addr);
16410 ix86_expand_strlensi_unroll_1 (out, src, align);
16412 /* strlensi_unroll_1 returns the address of the zero at the end of
16413 the string, like memchr(), so compute the length by subtracting
16414 the start address. */
16416 emit_insn (gen_subdi3 (out, out, addr));
16418 emit_insn (gen_subsi3 (out, out, addr));
16424 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16425 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16428 scratch2 = gen_reg_rtx (Pmode);
16429 scratch3 = gen_reg_rtx (Pmode);
16430 scratch4 = force_reg (Pmode, constm1_rtx);
16432 emit_move_insn (scratch3, addr);
16433 eoschar = force_reg (QImode, eoschar);
16435 src = replace_equiv_address_nv (src, scratch3);
16437 /* If .md starts supporting :P, this can be done in .md. */
16438 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16439 scratch4), UNSPEC_SCAS);
16440 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16443 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
16444 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
16448 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
16449 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
16455 /* For given symbol (function) construct code to compute address of it's PLT
16456 entry in large x86-64 PIC model. */
16458 construct_plt_address (rtx symbol)
16460 rtx tmp = gen_reg_rtx (Pmode);
16461 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16463 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16464 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16466 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16467 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16472 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16473 rtx callarg2 ATTRIBUTE_UNUSED,
16474 rtx pop, int sibcall)
16476 rtx use = NULL, call;
16478 if (pop == const0_rtx)
16480 gcc_assert (!TARGET_64BIT || !pop);
16482 if (TARGET_MACHO && !TARGET_64BIT)
16485 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16486 fnaddr = machopic_indirect_call_target (fnaddr);
16491 /* Static functions and indirect calls don't need the pic register. */
16492 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16493 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16494 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16495 use_reg (&use, pic_offset_table_rtx);
16498 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16500 rtx al = gen_rtx_REG (QImode, AX_REG);
16501 emit_move_insn (al, callarg2);
16502 use_reg (&use, al);
16505 if (ix86_cmodel == CM_LARGE_PIC
16506 && GET_CODE (fnaddr) == MEM
16507 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16508 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16509 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16510 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16512 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16513 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16515 if (sibcall && TARGET_64BIT
16516 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16519 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16520 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16521 emit_move_insn (fnaddr, addr);
16522 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16525 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16527 call = gen_rtx_SET (VOIDmode, retval, call);
16530 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16531 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16532 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16535 call = emit_call_insn (call);
16537 CALL_INSN_FUNCTION_USAGE (call) = use;
16541 /* Clear stack slot assignments remembered from previous functions.
16542 This is called from INIT_EXPANDERS once before RTL is emitted for each
16545 static struct machine_function *
16546 ix86_init_machine_status (void)
16548 struct machine_function *f;
16550 f = GGC_CNEW (struct machine_function);
16551 f->use_fast_prologue_epilogue_nregs = -1;
16552 f->tls_descriptor_call_expanded_p = 0;
16553 f->call_abi = DEFAULT_ABI;
16558 /* Return a MEM corresponding to a stack slot with mode MODE.
16559 Allocate a new slot if necessary.
16561 The RTL for a function can have several slots available: N is
16562 which slot to use. */
16565 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16567 struct stack_local_entry *s;
16569 gcc_assert (n < MAX_386_STACK_LOCALS);
16571 /* Virtual slot is valid only before vregs are instantiated. */
16572 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16574 for (s = ix86_stack_locals; s; s = s->next)
16575 if (s->mode == mode && s->n == n)
16576 return copy_rtx (s->rtl);
16578 s = (struct stack_local_entry *)
16579 ggc_alloc (sizeof (struct stack_local_entry));
16582 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16584 s->next = ix86_stack_locals;
16585 ix86_stack_locals = s;
16589 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16591 static GTY(()) rtx ix86_tls_symbol;
16593 ix86_tls_get_addr (void)
16596 if (!ix86_tls_symbol)
16598 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16599 (TARGET_ANY_GNU_TLS
16601 ? "___tls_get_addr"
16602 : "__tls_get_addr");
16605 return ix86_tls_symbol;
16608 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16610 static GTY(()) rtx ix86_tls_module_base_symbol;
16612 ix86_tls_module_base (void)
16615 if (!ix86_tls_module_base_symbol)
16617 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16618 "_TLS_MODULE_BASE_");
16619 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16620 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16623 return ix86_tls_module_base_symbol;
16626 /* Calculate the length of the memory address in the instruction
16627 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16630 memory_address_length (rtx addr)
16632 struct ix86_address parts;
16633 rtx base, index, disp;
16637 if (GET_CODE (addr) == PRE_DEC
16638 || GET_CODE (addr) == POST_INC
16639 || GET_CODE (addr) == PRE_MODIFY
16640 || GET_CODE (addr) == POST_MODIFY)
16643 ok = ix86_decompose_address (addr, &parts);
16646 if (parts.base && GET_CODE (parts.base) == SUBREG)
16647 parts.base = SUBREG_REG (parts.base);
16648 if (parts.index && GET_CODE (parts.index) == SUBREG)
16649 parts.index = SUBREG_REG (parts.index);
16652 index = parts.index;
16657 - esp as the base always wants an index,
16658 - ebp as the base always wants a displacement. */
16660 /* Register Indirect. */
16661 if (base && !index && !disp)
16663 /* esp (for its index) and ebp (for its displacement) need
16664 the two-byte modrm form. */
16665 if (addr == stack_pointer_rtx
16666 || addr == arg_pointer_rtx
16667 || addr == frame_pointer_rtx
16668 || addr == hard_frame_pointer_rtx)
16672 /* Direct Addressing. */
16673 else if (disp && !base && !index)
16678 /* Find the length of the displacement constant. */
16681 if (base && satisfies_constraint_K (disp))
16686 /* ebp always wants a displacement. */
16687 else if (base == hard_frame_pointer_rtx)
16690 /* An index requires the two-byte modrm form.... */
16692 /* ...like esp, which always wants an index. */
16693 || base == stack_pointer_rtx
16694 || base == arg_pointer_rtx
16695 || base == frame_pointer_rtx)
16702 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16703 is set, expect that insn have 8bit immediate alternative. */
16705 ix86_attr_length_immediate_default (rtx insn, int shortform)
16709 extract_insn_cached (insn);
16710 for (i = recog_data.n_operands - 1; i >= 0; --i)
16711 if (CONSTANT_P (recog_data.operand[i]))
16714 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16718 switch (get_attr_mode (insn))
16729 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16734 fatal_insn ("unknown insn mode", insn);
16740 /* Compute default value for "length_address" attribute. */
16742 ix86_attr_length_address_default (rtx insn)
16746 if (get_attr_type (insn) == TYPE_LEA)
16748 rtx set = PATTERN (insn);
16750 if (GET_CODE (set) == PARALLEL)
16751 set = XVECEXP (set, 0, 0);
16753 gcc_assert (GET_CODE (set) == SET);
16755 return memory_address_length (SET_SRC (set));
16758 extract_insn_cached (insn);
16759 for (i = recog_data.n_operands - 1; i >= 0; --i)
16760 if (MEM_P (recog_data.operand[i]))
16762 return memory_address_length (XEXP (recog_data.operand[i], 0));
16768 /* Return the maximum number of instructions a cpu can issue. */
16771 ix86_issue_rate (void)
16775 case PROCESSOR_PENTIUM:
16779 case PROCESSOR_PENTIUMPRO:
16780 case PROCESSOR_PENTIUM4:
16781 case PROCESSOR_ATHLON:
16783 case PROCESSOR_AMDFAM10:
16784 case PROCESSOR_NOCONA:
16785 case PROCESSOR_GENERIC32:
16786 case PROCESSOR_GENERIC64:
16789 case PROCESSOR_CORE2:
16797 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16798 by DEP_INSN and nothing set by DEP_INSN. */
16801 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16805 /* Simplify the test for uninteresting insns. */
16806 if (insn_type != TYPE_SETCC
16807 && insn_type != TYPE_ICMOV
16808 && insn_type != TYPE_FCMOV
16809 && insn_type != TYPE_IBR)
16812 if ((set = single_set (dep_insn)) != 0)
16814 set = SET_DEST (set);
16817 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16818 && XVECLEN (PATTERN (dep_insn), 0) == 2
16819 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16820 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16822 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16823 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16828 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16831 /* This test is true if the dependent insn reads the flags but
16832 not any other potentially set register. */
16833 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16836 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16842 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16843 address with operands set by DEP_INSN. */
16846 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16850 if (insn_type == TYPE_LEA
16853 addr = PATTERN (insn);
16855 if (GET_CODE (addr) == PARALLEL)
16856 addr = XVECEXP (addr, 0, 0);
16858 gcc_assert (GET_CODE (addr) == SET);
16860 addr = SET_SRC (addr);
16865 extract_insn_cached (insn);
16866 for (i = recog_data.n_operands - 1; i >= 0; --i)
16867 if (MEM_P (recog_data.operand[i]))
16869 addr = XEXP (recog_data.operand[i], 0);
16876 return modified_in_p (addr, dep_insn);
16880 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16882 enum attr_type insn_type, dep_insn_type;
16883 enum attr_memory memory;
16885 int dep_insn_code_number;
16887 /* Anti and output dependencies have zero cost on all CPUs. */
16888 if (REG_NOTE_KIND (link) != 0)
16891 dep_insn_code_number = recog_memoized (dep_insn);
16893 /* If we can't recognize the insns, we can't really do anything. */
16894 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16897 insn_type = get_attr_type (insn);
16898 dep_insn_type = get_attr_type (dep_insn);
16902 case PROCESSOR_PENTIUM:
16903 /* Address Generation Interlock adds a cycle of latency. */
16904 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16907 /* ??? Compares pair with jump/setcc. */
16908 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16911 /* Floating point stores require value to be ready one cycle earlier. */
16912 if (insn_type == TYPE_FMOV
16913 && get_attr_memory (insn) == MEMORY_STORE
16914 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16918 case PROCESSOR_PENTIUMPRO:
16919 memory = get_attr_memory (insn);
16921 /* INT->FP conversion is expensive. */
16922 if (get_attr_fp_int_src (dep_insn))
16925 /* There is one cycle extra latency between an FP op and a store. */
16926 if (insn_type == TYPE_FMOV
16927 && (set = single_set (dep_insn)) != NULL_RTX
16928 && (set2 = single_set (insn)) != NULL_RTX
16929 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16930 && MEM_P (SET_DEST (set2)))
16933 /* Show ability of reorder buffer to hide latency of load by executing
16934 in parallel with previous instruction in case
16935 previous instruction is not needed to compute the address. */
16936 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16937 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16939 /* Claim moves to take one cycle, as core can issue one load
16940 at time and the next load can start cycle later. */
16941 if (dep_insn_type == TYPE_IMOV
16942 || dep_insn_type == TYPE_FMOV)
16950 memory = get_attr_memory (insn);
16952 /* The esp dependency is resolved before the instruction is really
16954 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16955 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16958 /* INT->FP conversion is expensive. */
16959 if (get_attr_fp_int_src (dep_insn))
16962 /* Show ability of reorder buffer to hide latency of load by executing
16963 in parallel with previous instruction in case
16964 previous instruction is not needed to compute the address. */
16965 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16966 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16968 /* Claim moves to take one cycle, as core can issue one load
16969 at time and the next load can start cycle later. */
16970 if (dep_insn_type == TYPE_IMOV
16971 || dep_insn_type == TYPE_FMOV)
16980 case PROCESSOR_ATHLON:
16982 case PROCESSOR_AMDFAM10:
16983 case PROCESSOR_GENERIC32:
16984 case PROCESSOR_GENERIC64:
16985 memory = get_attr_memory (insn);
16987 /* Show ability of reorder buffer to hide latency of load by executing
16988 in parallel with previous instruction in case
16989 previous instruction is not needed to compute the address. */
16990 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16991 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16993 enum attr_unit unit = get_attr_unit (insn);
16996 /* Because of the difference between the length of integer and
16997 floating unit pipeline preparation stages, the memory operands
16998 for floating point are cheaper.
17000 ??? For Athlon it the difference is most probably 2. */
17001 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
17004 loadcost = TARGET_ATHLON ? 2 : 0;
17006 if (cost >= loadcost)
17019 /* How many alternative schedules to try. This should be as wide as the
17020 scheduling freedom in the DFA, but no wider. Making this value too
17021 large results extra work for the scheduler. */
17024 ia32_multipass_dfa_lookahead (void)
17028 case PROCESSOR_PENTIUM:
17031 case PROCESSOR_PENTIUMPRO:
17041 /* Compute the alignment given to a constant that is being placed in memory.
17042 EXP is the constant and ALIGN is the alignment that the object would
17044 The value of this function is used instead of that alignment to align
17048 ix86_constant_alignment (tree exp, int align)
17050 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17051 || TREE_CODE (exp) == INTEGER_CST)
17053 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
17055 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
17058 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17059 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17060 return BITS_PER_WORD;
17065 /* Compute the alignment for a static variable.
17066 TYPE is the data type, and ALIGN is the alignment that
17067 the object would ordinarily have. The value of this function is used
17068 instead of that alignment to align the object. */
17071 ix86_data_alignment (tree type, int align)
17073 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
17075 if (AGGREGATE_TYPE_P (type)
17076 && TYPE_SIZE (type)
17077 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17078 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
17079 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
17080 && align < max_align)
17083 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17084 to 16byte boundary. */
17087 if (AGGREGATE_TYPE_P (type)
17088 && TYPE_SIZE (type)
17089 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17090 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
17091 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17095 if (TREE_CODE (type) == ARRAY_TYPE)
17097 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17099 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17102 else if (TREE_CODE (type) == COMPLEX_TYPE)
17105 if (TYPE_MODE (type) == DCmode && align < 64)
17107 if (TYPE_MODE (type) == XCmode && align < 128)
17110 else if ((TREE_CODE (type) == RECORD_TYPE
17111 || TREE_CODE (type) == UNION_TYPE
17112 || TREE_CODE (type) == QUAL_UNION_TYPE)
17113 && TYPE_FIELDS (type))
17115 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17117 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17120 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17121 || TREE_CODE (type) == INTEGER_TYPE)
17123 if (TYPE_MODE (type) == DFmode && align < 64)
17125 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17132 /* Compute the alignment for a local variable or a stack slot. TYPE is
17133 the data type, MODE is the widest mode available and ALIGN is the
17134 alignment that the object would ordinarily have. The value of this
17135 macro is used instead of that alignment to align the object. */
17138 ix86_local_alignment (tree type, enum machine_mode mode,
17139 unsigned int align)
17141 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17142 register in MODE. We will return the largest alignment of XF
17146 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
17147 align = GET_MODE_ALIGNMENT (DFmode);
17151 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17152 to 16byte boundary. */
17155 if (AGGREGATE_TYPE_P (type)
17156 && TYPE_SIZE (type)
17157 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17158 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
17159 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17162 if (TREE_CODE (type) == ARRAY_TYPE)
17164 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17166 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17169 else if (TREE_CODE (type) == COMPLEX_TYPE)
17171 if (TYPE_MODE (type) == DCmode && align < 64)
17173 if (TYPE_MODE (type) == XCmode && align < 128)
17176 else if ((TREE_CODE (type) == RECORD_TYPE
17177 || TREE_CODE (type) == UNION_TYPE
17178 || TREE_CODE (type) == QUAL_UNION_TYPE)
17179 && TYPE_FIELDS (type))
17181 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17183 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17186 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17187 || TREE_CODE (type) == INTEGER_TYPE)
17190 if (TYPE_MODE (type) == DFmode && align < 64)
17192 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17198 /* Emit RTL insns to initialize the variable parts of a trampoline.
17199 FNADDR is an RTX for the address of the function's pure code.
17200 CXT is an RTX for the static chain value for the function. */
17202 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
17206 /* Compute offset from the end of the jmp to the target function. */
17207 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
17208 plus_constant (tramp, 10),
17209 NULL_RTX, 1, OPTAB_DIRECT);
17210 emit_move_insn (gen_rtx_MEM (QImode, tramp),
17211 gen_int_mode (0xb9, QImode));
17212 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
17213 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
17214 gen_int_mode (0xe9, QImode));
17215 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
17220 /* Try to load address using shorter movl instead of movabs.
17221 We may want to support movq for kernel mode, but kernel does not use
17222 trampolines at the moment. */
17223 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17225 fnaddr = copy_to_mode_reg (DImode, fnaddr);
17226 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17227 gen_int_mode (0xbb41, HImode));
17228 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
17229 gen_lowpart (SImode, fnaddr));
17234 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17235 gen_int_mode (0xbb49, HImode));
17236 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17240 /* Load static chain using movabs to r10. */
17241 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17242 gen_int_mode (0xba49, HImode));
17243 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17246 /* Jump to the r11 */
17247 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17248 gen_int_mode (0xff49, HImode));
17249 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
17250 gen_int_mode (0xe3, QImode));
17252 gcc_assert (offset <= TRAMPOLINE_SIZE);
17255 #ifdef ENABLE_EXECUTE_STACK
17256 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17257 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
17261 /* Codes for all the SSE/MMX builtins. */
17264 IX86_BUILTIN_ADDPS,
17265 IX86_BUILTIN_ADDSS,
17266 IX86_BUILTIN_DIVPS,
17267 IX86_BUILTIN_DIVSS,
17268 IX86_BUILTIN_MULPS,
17269 IX86_BUILTIN_MULSS,
17270 IX86_BUILTIN_SUBPS,
17271 IX86_BUILTIN_SUBSS,
17273 IX86_BUILTIN_CMPEQPS,
17274 IX86_BUILTIN_CMPLTPS,
17275 IX86_BUILTIN_CMPLEPS,
17276 IX86_BUILTIN_CMPGTPS,
17277 IX86_BUILTIN_CMPGEPS,
17278 IX86_BUILTIN_CMPNEQPS,
17279 IX86_BUILTIN_CMPNLTPS,
17280 IX86_BUILTIN_CMPNLEPS,
17281 IX86_BUILTIN_CMPNGTPS,
17282 IX86_BUILTIN_CMPNGEPS,
17283 IX86_BUILTIN_CMPORDPS,
17284 IX86_BUILTIN_CMPUNORDPS,
17285 IX86_BUILTIN_CMPEQSS,
17286 IX86_BUILTIN_CMPLTSS,
17287 IX86_BUILTIN_CMPLESS,
17288 IX86_BUILTIN_CMPNEQSS,
17289 IX86_BUILTIN_CMPNLTSS,
17290 IX86_BUILTIN_CMPNLESS,
17291 IX86_BUILTIN_CMPNGTSS,
17292 IX86_BUILTIN_CMPNGESS,
17293 IX86_BUILTIN_CMPORDSS,
17294 IX86_BUILTIN_CMPUNORDSS,
17296 IX86_BUILTIN_COMIEQSS,
17297 IX86_BUILTIN_COMILTSS,
17298 IX86_BUILTIN_COMILESS,
17299 IX86_BUILTIN_COMIGTSS,
17300 IX86_BUILTIN_COMIGESS,
17301 IX86_BUILTIN_COMINEQSS,
17302 IX86_BUILTIN_UCOMIEQSS,
17303 IX86_BUILTIN_UCOMILTSS,
17304 IX86_BUILTIN_UCOMILESS,
17305 IX86_BUILTIN_UCOMIGTSS,
17306 IX86_BUILTIN_UCOMIGESS,
17307 IX86_BUILTIN_UCOMINEQSS,
17309 IX86_BUILTIN_CVTPI2PS,
17310 IX86_BUILTIN_CVTPS2PI,
17311 IX86_BUILTIN_CVTSI2SS,
17312 IX86_BUILTIN_CVTSI642SS,
17313 IX86_BUILTIN_CVTSS2SI,
17314 IX86_BUILTIN_CVTSS2SI64,
17315 IX86_BUILTIN_CVTTPS2PI,
17316 IX86_BUILTIN_CVTTSS2SI,
17317 IX86_BUILTIN_CVTTSS2SI64,
17319 IX86_BUILTIN_MAXPS,
17320 IX86_BUILTIN_MAXSS,
17321 IX86_BUILTIN_MINPS,
17322 IX86_BUILTIN_MINSS,
17324 IX86_BUILTIN_LOADUPS,
17325 IX86_BUILTIN_STOREUPS,
17326 IX86_BUILTIN_MOVSS,
17328 IX86_BUILTIN_MOVHLPS,
17329 IX86_BUILTIN_MOVLHPS,
17330 IX86_BUILTIN_LOADHPS,
17331 IX86_BUILTIN_LOADLPS,
17332 IX86_BUILTIN_STOREHPS,
17333 IX86_BUILTIN_STORELPS,
17335 IX86_BUILTIN_MASKMOVQ,
17336 IX86_BUILTIN_MOVMSKPS,
17337 IX86_BUILTIN_PMOVMSKB,
17339 IX86_BUILTIN_MOVNTPS,
17340 IX86_BUILTIN_MOVNTQ,
17342 IX86_BUILTIN_LOADDQU,
17343 IX86_BUILTIN_STOREDQU,
17345 IX86_BUILTIN_PACKSSWB,
17346 IX86_BUILTIN_PACKSSDW,
17347 IX86_BUILTIN_PACKUSWB,
17349 IX86_BUILTIN_PADDB,
17350 IX86_BUILTIN_PADDW,
17351 IX86_BUILTIN_PADDD,
17352 IX86_BUILTIN_PADDQ,
17353 IX86_BUILTIN_PADDSB,
17354 IX86_BUILTIN_PADDSW,
17355 IX86_BUILTIN_PADDUSB,
17356 IX86_BUILTIN_PADDUSW,
17357 IX86_BUILTIN_PSUBB,
17358 IX86_BUILTIN_PSUBW,
17359 IX86_BUILTIN_PSUBD,
17360 IX86_BUILTIN_PSUBQ,
17361 IX86_BUILTIN_PSUBSB,
17362 IX86_BUILTIN_PSUBSW,
17363 IX86_BUILTIN_PSUBUSB,
17364 IX86_BUILTIN_PSUBUSW,
17367 IX86_BUILTIN_PANDN,
17371 IX86_BUILTIN_PAVGB,
17372 IX86_BUILTIN_PAVGW,
17374 IX86_BUILTIN_PCMPEQB,
17375 IX86_BUILTIN_PCMPEQW,
17376 IX86_BUILTIN_PCMPEQD,
17377 IX86_BUILTIN_PCMPGTB,
17378 IX86_BUILTIN_PCMPGTW,
17379 IX86_BUILTIN_PCMPGTD,
17381 IX86_BUILTIN_PMADDWD,
17383 IX86_BUILTIN_PMAXSW,
17384 IX86_BUILTIN_PMAXUB,
17385 IX86_BUILTIN_PMINSW,
17386 IX86_BUILTIN_PMINUB,
17388 IX86_BUILTIN_PMULHUW,
17389 IX86_BUILTIN_PMULHW,
17390 IX86_BUILTIN_PMULLW,
17392 IX86_BUILTIN_PSADBW,
17393 IX86_BUILTIN_PSHUFW,
17395 IX86_BUILTIN_PSLLW,
17396 IX86_BUILTIN_PSLLD,
17397 IX86_BUILTIN_PSLLQ,
17398 IX86_BUILTIN_PSRAW,
17399 IX86_BUILTIN_PSRAD,
17400 IX86_BUILTIN_PSRLW,
17401 IX86_BUILTIN_PSRLD,
17402 IX86_BUILTIN_PSRLQ,
17403 IX86_BUILTIN_PSLLWI,
17404 IX86_BUILTIN_PSLLDI,
17405 IX86_BUILTIN_PSLLQI,
17406 IX86_BUILTIN_PSRAWI,
17407 IX86_BUILTIN_PSRADI,
17408 IX86_BUILTIN_PSRLWI,
17409 IX86_BUILTIN_PSRLDI,
17410 IX86_BUILTIN_PSRLQI,
17412 IX86_BUILTIN_PUNPCKHBW,
17413 IX86_BUILTIN_PUNPCKHWD,
17414 IX86_BUILTIN_PUNPCKHDQ,
17415 IX86_BUILTIN_PUNPCKLBW,
17416 IX86_BUILTIN_PUNPCKLWD,
17417 IX86_BUILTIN_PUNPCKLDQ,
17419 IX86_BUILTIN_SHUFPS,
17421 IX86_BUILTIN_RCPPS,
17422 IX86_BUILTIN_RCPSS,
17423 IX86_BUILTIN_RSQRTPS,
17424 IX86_BUILTIN_RSQRTPS_NR,
17425 IX86_BUILTIN_RSQRTSS,
17426 IX86_BUILTIN_RSQRTF,
17427 IX86_BUILTIN_SQRTPS,
17428 IX86_BUILTIN_SQRTPS_NR,
17429 IX86_BUILTIN_SQRTSS,
17431 IX86_BUILTIN_UNPCKHPS,
17432 IX86_BUILTIN_UNPCKLPS,
17434 IX86_BUILTIN_ANDPS,
17435 IX86_BUILTIN_ANDNPS,
17437 IX86_BUILTIN_XORPS,
17440 IX86_BUILTIN_LDMXCSR,
17441 IX86_BUILTIN_STMXCSR,
17442 IX86_BUILTIN_SFENCE,
17444 /* 3DNow! Original */
17445 IX86_BUILTIN_FEMMS,
17446 IX86_BUILTIN_PAVGUSB,
17447 IX86_BUILTIN_PF2ID,
17448 IX86_BUILTIN_PFACC,
17449 IX86_BUILTIN_PFADD,
17450 IX86_BUILTIN_PFCMPEQ,
17451 IX86_BUILTIN_PFCMPGE,
17452 IX86_BUILTIN_PFCMPGT,
17453 IX86_BUILTIN_PFMAX,
17454 IX86_BUILTIN_PFMIN,
17455 IX86_BUILTIN_PFMUL,
17456 IX86_BUILTIN_PFRCP,
17457 IX86_BUILTIN_PFRCPIT1,
17458 IX86_BUILTIN_PFRCPIT2,
17459 IX86_BUILTIN_PFRSQIT1,
17460 IX86_BUILTIN_PFRSQRT,
17461 IX86_BUILTIN_PFSUB,
17462 IX86_BUILTIN_PFSUBR,
17463 IX86_BUILTIN_PI2FD,
17464 IX86_BUILTIN_PMULHRW,
17466 /* 3DNow! Athlon Extensions */
17467 IX86_BUILTIN_PF2IW,
17468 IX86_BUILTIN_PFNACC,
17469 IX86_BUILTIN_PFPNACC,
17470 IX86_BUILTIN_PI2FW,
17471 IX86_BUILTIN_PSWAPDSI,
17472 IX86_BUILTIN_PSWAPDSF,
17475 IX86_BUILTIN_ADDPD,
17476 IX86_BUILTIN_ADDSD,
17477 IX86_BUILTIN_DIVPD,
17478 IX86_BUILTIN_DIVSD,
17479 IX86_BUILTIN_MULPD,
17480 IX86_BUILTIN_MULSD,
17481 IX86_BUILTIN_SUBPD,
17482 IX86_BUILTIN_SUBSD,
17484 IX86_BUILTIN_CMPEQPD,
17485 IX86_BUILTIN_CMPLTPD,
17486 IX86_BUILTIN_CMPLEPD,
17487 IX86_BUILTIN_CMPGTPD,
17488 IX86_BUILTIN_CMPGEPD,
17489 IX86_BUILTIN_CMPNEQPD,
17490 IX86_BUILTIN_CMPNLTPD,
17491 IX86_BUILTIN_CMPNLEPD,
17492 IX86_BUILTIN_CMPNGTPD,
17493 IX86_BUILTIN_CMPNGEPD,
17494 IX86_BUILTIN_CMPORDPD,
17495 IX86_BUILTIN_CMPUNORDPD,
17496 IX86_BUILTIN_CMPEQSD,
17497 IX86_BUILTIN_CMPLTSD,
17498 IX86_BUILTIN_CMPLESD,
17499 IX86_BUILTIN_CMPNEQSD,
17500 IX86_BUILTIN_CMPNLTSD,
17501 IX86_BUILTIN_CMPNLESD,
17502 IX86_BUILTIN_CMPORDSD,
17503 IX86_BUILTIN_CMPUNORDSD,
17505 IX86_BUILTIN_COMIEQSD,
17506 IX86_BUILTIN_COMILTSD,
17507 IX86_BUILTIN_COMILESD,
17508 IX86_BUILTIN_COMIGTSD,
17509 IX86_BUILTIN_COMIGESD,
17510 IX86_BUILTIN_COMINEQSD,
17511 IX86_BUILTIN_UCOMIEQSD,
17512 IX86_BUILTIN_UCOMILTSD,
17513 IX86_BUILTIN_UCOMILESD,
17514 IX86_BUILTIN_UCOMIGTSD,
17515 IX86_BUILTIN_UCOMIGESD,
17516 IX86_BUILTIN_UCOMINEQSD,
17518 IX86_BUILTIN_MAXPD,
17519 IX86_BUILTIN_MAXSD,
17520 IX86_BUILTIN_MINPD,
17521 IX86_BUILTIN_MINSD,
17523 IX86_BUILTIN_ANDPD,
17524 IX86_BUILTIN_ANDNPD,
17526 IX86_BUILTIN_XORPD,
17528 IX86_BUILTIN_SQRTPD,
17529 IX86_BUILTIN_SQRTSD,
17531 IX86_BUILTIN_UNPCKHPD,
17532 IX86_BUILTIN_UNPCKLPD,
17534 IX86_BUILTIN_SHUFPD,
17536 IX86_BUILTIN_LOADUPD,
17537 IX86_BUILTIN_STOREUPD,
17538 IX86_BUILTIN_MOVSD,
17540 IX86_BUILTIN_LOADHPD,
17541 IX86_BUILTIN_LOADLPD,
17543 IX86_BUILTIN_CVTDQ2PD,
17544 IX86_BUILTIN_CVTDQ2PS,
17546 IX86_BUILTIN_CVTPD2DQ,
17547 IX86_BUILTIN_CVTPD2PI,
17548 IX86_BUILTIN_CVTPD2PS,
17549 IX86_BUILTIN_CVTTPD2DQ,
17550 IX86_BUILTIN_CVTTPD2PI,
17552 IX86_BUILTIN_CVTPI2PD,
17553 IX86_BUILTIN_CVTSI2SD,
17554 IX86_BUILTIN_CVTSI642SD,
17556 IX86_BUILTIN_CVTSD2SI,
17557 IX86_BUILTIN_CVTSD2SI64,
17558 IX86_BUILTIN_CVTSD2SS,
17559 IX86_BUILTIN_CVTSS2SD,
17560 IX86_BUILTIN_CVTTSD2SI,
17561 IX86_BUILTIN_CVTTSD2SI64,
17563 IX86_BUILTIN_CVTPS2DQ,
17564 IX86_BUILTIN_CVTPS2PD,
17565 IX86_BUILTIN_CVTTPS2DQ,
17567 IX86_BUILTIN_MOVNTI,
17568 IX86_BUILTIN_MOVNTPD,
17569 IX86_BUILTIN_MOVNTDQ,
17572 IX86_BUILTIN_MASKMOVDQU,
17573 IX86_BUILTIN_MOVMSKPD,
17574 IX86_BUILTIN_PMOVMSKB128,
17576 IX86_BUILTIN_PACKSSWB128,
17577 IX86_BUILTIN_PACKSSDW128,
17578 IX86_BUILTIN_PACKUSWB128,
17580 IX86_BUILTIN_PADDB128,
17581 IX86_BUILTIN_PADDW128,
17582 IX86_BUILTIN_PADDD128,
17583 IX86_BUILTIN_PADDQ128,
17584 IX86_BUILTIN_PADDSB128,
17585 IX86_BUILTIN_PADDSW128,
17586 IX86_BUILTIN_PADDUSB128,
17587 IX86_BUILTIN_PADDUSW128,
17588 IX86_BUILTIN_PSUBB128,
17589 IX86_BUILTIN_PSUBW128,
17590 IX86_BUILTIN_PSUBD128,
17591 IX86_BUILTIN_PSUBQ128,
17592 IX86_BUILTIN_PSUBSB128,
17593 IX86_BUILTIN_PSUBSW128,
17594 IX86_BUILTIN_PSUBUSB128,
17595 IX86_BUILTIN_PSUBUSW128,
17597 IX86_BUILTIN_PAND128,
17598 IX86_BUILTIN_PANDN128,
17599 IX86_BUILTIN_POR128,
17600 IX86_BUILTIN_PXOR128,
17602 IX86_BUILTIN_PAVGB128,
17603 IX86_BUILTIN_PAVGW128,
17605 IX86_BUILTIN_PCMPEQB128,
17606 IX86_BUILTIN_PCMPEQW128,
17607 IX86_BUILTIN_PCMPEQD128,
17608 IX86_BUILTIN_PCMPGTB128,
17609 IX86_BUILTIN_PCMPGTW128,
17610 IX86_BUILTIN_PCMPGTD128,
17612 IX86_BUILTIN_PMADDWD128,
17614 IX86_BUILTIN_PMAXSW128,
17615 IX86_BUILTIN_PMAXUB128,
17616 IX86_BUILTIN_PMINSW128,
17617 IX86_BUILTIN_PMINUB128,
17619 IX86_BUILTIN_PMULUDQ,
17620 IX86_BUILTIN_PMULUDQ128,
17621 IX86_BUILTIN_PMULHUW128,
17622 IX86_BUILTIN_PMULHW128,
17623 IX86_BUILTIN_PMULLW128,
17625 IX86_BUILTIN_PSADBW128,
17626 IX86_BUILTIN_PSHUFHW,
17627 IX86_BUILTIN_PSHUFLW,
17628 IX86_BUILTIN_PSHUFD,
17630 IX86_BUILTIN_PSLLDQI128,
17631 IX86_BUILTIN_PSLLWI128,
17632 IX86_BUILTIN_PSLLDI128,
17633 IX86_BUILTIN_PSLLQI128,
17634 IX86_BUILTIN_PSRAWI128,
17635 IX86_BUILTIN_PSRADI128,
17636 IX86_BUILTIN_PSRLDQI128,
17637 IX86_BUILTIN_PSRLWI128,
17638 IX86_BUILTIN_PSRLDI128,
17639 IX86_BUILTIN_PSRLQI128,
17641 IX86_BUILTIN_PSLLDQ128,
17642 IX86_BUILTIN_PSLLW128,
17643 IX86_BUILTIN_PSLLD128,
17644 IX86_BUILTIN_PSLLQ128,
17645 IX86_BUILTIN_PSRAW128,
17646 IX86_BUILTIN_PSRAD128,
17647 IX86_BUILTIN_PSRLW128,
17648 IX86_BUILTIN_PSRLD128,
17649 IX86_BUILTIN_PSRLQ128,
17651 IX86_BUILTIN_PUNPCKHBW128,
17652 IX86_BUILTIN_PUNPCKHWD128,
17653 IX86_BUILTIN_PUNPCKHDQ128,
17654 IX86_BUILTIN_PUNPCKHQDQ128,
17655 IX86_BUILTIN_PUNPCKLBW128,
17656 IX86_BUILTIN_PUNPCKLWD128,
17657 IX86_BUILTIN_PUNPCKLDQ128,
17658 IX86_BUILTIN_PUNPCKLQDQ128,
17660 IX86_BUILTIN_CLFLUSH,
17661 IX86_BUILTIN_MFENCE,
17662 IX86_BUILTIN_LFENCE,
17665 IX86_BUILTIN_ADDSUBPS,
17666 IX86_BUILTIN_HADDPS,
17667 IX86_BUILTIN_HSUBPS,
17668 IX86_BUILTIN_MOVSHDUP,
17669 IX86_BUILTIN_MOVSLDUP,
17670 IX86_BUILTIN_ADDSUBPD,
17671 IX86_BUILTIN_HADDPD,
17672 IX86_BUILTIN_HSUBPD,
17673 IX86_BUILTIN_LDDQU,
17675 IX86_BUILTIN_MONITOR,
17676 IX86_BUILTIN_MWAIT,
17679 IX86_BUILTIN_PHADDW,
17680 IX86_BUILTIN_PHADDD,
17681 IX86_BUILTIN_PHADDSW,
17682 IX86_BUILTIN_PHSUBW,
17683 IX86_BUILTIN_PHSUBD,
17684 IX86_BUILTIN_PHSUBSW,
17685 IX86_BUILTIN_PMADDUBSW,
17686 IX86_BUILTIN_PMULHRSW,
17687 IX86_BUILTIN_PSHUFB,
17688 IX86_BUILTIN_PSIGNB,
17689 IX86_BUILTIN_PSIGNW,
17690 IX86_BUILTIN_PSIGND,
17691 IX86_BUILTIN_PALIGNR,
17692 IX86_BUILTIN_PABSB,
17693 IX86_BUILTIN_PABSW,
17694 IX86_BUILTIN_PABSD,
17696 IX86_BUILTIN_PHADDW128,
17697 IX86_BUILTIN_PHADDD128,
17698 IX86_BUILTIN_PHADDSW128,
17699 IX86_BUILTIN_PHSUBW128,
17700 IX86_BUILTIN_PHSUBD128,
17701 IX86_BUILTIN_PHSUBSW128,
17702 IX86_BUILTIN_PMADDUBSW128,
17703 IX86_BUILTIN_PMULHRSW128,
17704 IX86_BUILTIN_PSHUFB128,
17705 IX86_BUILTIN_PSIGNB128,
17706 IX86_BUILTIN_PSIGNW128,
17707 IX86_BUILTIN_PSIGND128,
17708 IX86_BUILTIN_PALIGNR128,
17709 IX86_BUILTIN_PABSB128,
17710 IX86_BUILTIN_PABSW128,
17711 IX86_BUILTIN_PABSD128,
17713 /* AMDFAM10 - SSE4A New Instructions. */
17714 IX86_BUILTIN_MOVNTSD,
17715 IX86_BUILTIN_MOVNTSS,
17716 IX86_BUILTIN_EXTRQI,
17717 IX86_BUILTIN_EXTRQ,
17718 IX86_BUILTIN_INSERTQI,
17719 IX86_BUILTIN_INSERTQ,
17722 IX86_BUILTIN_BLENDPD,
17723 IX86_BUILTIN_BLENDPS,
17724 IX86_BUILTIN_BLENDVPD,
17725 IX86_BUILTIN_BLENDVPS,
17726 IX86_BUILTIN_PBLENDVB128,
17727 IX86_BUILTIN_PBLENDW128,
17732 IX86_BUILTIN_INSERTPS128,
17734 IX86_BUILTIN_MOVNTDQA,
17735 IX86_BUILTIN_MPSADBW128,
17736 IX86_BUILTIN_PACKUSDW128,
17737 IX86_BUILTIN_PCMPEQQ,
17738 IX86_BUILTIN_PHMINPOSUW128,
17740 IX86_BUILTIN_PMAXSB128,
17741 IX86_BUILTIN_PMAXSD128,
17742 IX86_BUILTIN_PMAXUD128,
17743 IX86_BUILTIN_PMAXUW128,
17745 IX86_BUILTIN_PMINSB128,
17746 IX86_BUILTIN_PMINSD128,
17747 IX86_BUILTIN_PMINUD128,
17748 IX86_BUILTIN_PMINUW128,
17750 IX86_BUILTIN_PMOVSXBW128,
17751 IX86_BUILTIN_PMOVSXBD128,
17752 IX86_BUILTIN_PMOVSXBQ128,
17753 IX86_BUILTIN_PMOVSXWD128,
17754 IX86_BUILTIN_PMOVSXWQ128,
17755 IX86_BUILTIN_PMOVSXDQ128,
17757 IX86_BUILTIN_PMOVZXBW128,
17758 IX86_BUILTIN_PMOVZXBD128,
17759 IX86_BUILTIN_PMOVZXBQ128,
17760 IX86_BUILTIN_PMOVZXWD128,
17761 IX86_BUILTIN_PMOVZXWQ128,
17762 IX86_BUILTIN_PMOVZXDQ128,
17764 IX86_BUILTIN_PMULDQ128,
17765 IX86_BUILTIN_PMULLD128,
17767 IX86_BUILTIN_ROUNDPD,
17768 IX86_BUILTIN_ROUNDPS,
17769 IX86_BUILTIN_ROUNDSD,
17770 IX86_BUILTIN_ROUNDSS,
17772 IX86_BUILTIN_PTESTZ,
17773 IX86_BUILTIN_PTESTC,
17774 IX86_BUILTIN_PTESTNZC,
17776 IX86_BUILTIN_VEC_INIT_V2SI,
17777 IX86_BUILTIN_VEC_INIT_V4HI,
17778 IX86_BUILTIN_VEC_INIT_V8QI,
17779 IX86_BUILTIN_VEC_EXT_V2DF,
17780 IX86_BUILTIN_VEC_EXT_V2DI,
17781 IX86_BUILTIN_VEC_EXT_V4SF,
17782 IX86_BUILTIN_VEC_EXT_V4SI,
17783 IX86_BUILTIN_VEC_EXT_V8HI,
17784 IX86_BUILTIN_VEC_EXT_V2SI,
17785 IX86_BUILTIN_VEC_EXT_V4HI,
17786 IX86_BUILTIN_VEC_EXT_V16QI,
17787 IX86_BUILTIN_VEC_SET_V2DI,
17788 IX86_BUILTIN_VEC_SET_V4SF,
17789 IX86_BUILTIN_VEC_SET_V4SI,
17790 IX86_BUILTIN_VEC_SET_V8HI,
17791 IX86_BUILTIN_VEC_SET_V4HI,
17792 IX86_BUILTIN_VEC_SET_V16QI,
17794 IX86_BUILTIN_VEC_PACK_SFIX,
17797 IX86_BUILTIN_CRC32QI,
17798 IX86_BUILTIN_CRC32HI,
17799 IX86_BUILTIN_CRC32SI,
17800 IX86_BUILTIN_CRC32DI,
17802 IX86_BUILTIN_PCMPESTRI128,
17803 IX86_BUILTIN_PCMPESTRM128,
17804 IX86_BUILTIN_PCMPESTRA128,
17805 IX86_BUILTIN_PCMPESTRC128,
17806 IX86_BUILTIN_PCMPESTRO128,
17807 IX86_BUILTIN_PCMPESTRS128,
17808 IX86_BUILTIN_PCMPESTRZ128,
17809 IX86_BUILTIN_PCMPISTRI128,
17810 IX86_BUILTIN_PCMPISTRM128,
17811 IX86_BUILTIN_PCMPISTRA128,
17812 IX86_BUILTIN_PCMPISTRC128,
17813 IX86_BUILTIN_PCMPISTRO128,
17814 IX86_BUILTIN_PCMPISTRS128,
17815 IX86_BUILTIN_PCMPISTRZ128,
17817 IX86_BUILTIN_PCMPGTQ,
17819 /* AES instructions */
17820 IX86_BUILTIN_AESENC128,
17821 IX86_BUILTIN_AESENCLAST128,
17822 IX86_BUILTIN_AESDEC128,
17823 IX86_BUILTIN_AESDECLAST128,
17824 IX86_BUILTIN_AESIMC128,
17825 IX86_BUILTIN_AESKEYGENASSIST128,
17827 /* PCLMUL instruction */
17828 IX86_BUILTIN_PCLMULQDQ128,
17830 /* TFmode support builtins. */
17832 IX86_BUILTIN_FABSQ,
17833 IX86_BUILTIN_COPYSIGNQ,
17835 /* SSE5 instructions */
17836 IX86_BUILTIN_FMADDSS,
17837 IX86_BUILTIN_FMADDSD,
17838 IX86_BUILTIN_FMADDPS,
17839 IX86_BUILTIN_FMADDPD,
17840 IX86_BUILTIN_FMSUBSS,
17841 IX86_BUILTIN_FMSUBSD,
17842 IX86_BUILTIN_FMSUBPS,
17843 IX86_BUILTIN_FMSUBPD,
17844 IX86_BUILTIN_FNMADDSS,
17845 IX86_BUILTIN_FNMADDSD,
17846 IX86_BUILTIN_FNMADDPS,
17847 IX86_BUILTIN_FNMADDPD,
17848 IX86_BUILTIN_FNMSUBSS,
17849 IX86_BUILTIN_FNMSUBSD,
17850 IX86_BUILTIN_FNMSUBPS,
17851 IX86_BUILTIN_FNMSUBPD,
17852 IX86_BUILTIN_PCMOV_V2DI,
17853 IX86_BUILTIN_PCMOV_V4SI,
17854 IX86_BUILTIN_PCMOV_V8HI,
17855 IX86_BUILTIN_PCMOV_V16QI,
17856 IX86_BUILTIN_PCMOV_V4SF,
17857 IX86_BUILTIN_PCMOV_V2DF,
17858 IX86_BUILTIN_PPERM,
17859 IX86_BUILTIN_PERMPS,
17860 IX86_BUILTIN_PERMPD,
17861 IX86_BUILTIN_PMACSSWW,
17862 IX86_BUILTIN_PMACSWW,
17863 IX86_BUILTIN_PMACSSWD,
17864 IX86_BUILTIN_PMACSWD,
17865 IX86_BUILTIN_PMACSSDD,
17866 IX86_BUILTIN_PMACSDD,
17867 IX86_BUILTIN_PMACSSDQL,
17868 IX86_BUILTIN_PMACSSDQH,
17869 IX86_BUILTIN_PMACSDQL,
17870 IX86_BUILTIN_PMACSDQH,
17871 IX86_BUILTIN_PMADCSSWD,
17872 IX86_BUILTIN_PMADCSWD,
17873 IX86_BUILTIN_PHADDBW,
17874 IX86_BUILTIN_PHADDBD,
17875 IX86_BUILTIN_PHADDBQ,
17876 IX86_BUILTIN_PHADDWD,
17877 IX86_BUILTIN_PHADDWQ,
17878 IX86_BUILTIN_PHADDDQ,
17879 IX86_BUILTIN_PHADDUBW,
17880 IX86_BUILTIN_PHADDUBD,
17881 IX86_BUILTIN_PHADDUBQ,
17882 IX86_BUILTIN_PHADDUWD,
17883 IX86_BUILTIN_PHADDUWQ,
17884 IX86_BUILTIN_PHADDUDQ,
17885 IX86_BUILTIN_PHSUBBW,
17886 IX86_BUILTIN_PHSUBWD,
17887 IX86_BUILTIN_PHSUBDQ,
17888 IX86_BUILTIN_PROTB,
17889 IX86_BUILTIN_PROTW,
17890 IX86_BUILTIN_PROTD,
17891 IX86_BUILTIN_PROTQ,
17892 IX86_BUILTIN_PROTB_IMM,
17893 IX86_BUILTIN_PROTW_IMM,
17894 IX86_BUILTIN_PROTD_IMM,
17895 IX86_BUILTIN_PROTQ_IMM,
17896 IX86_BUILTIN_PSHLB,
17897 IX86_BUILTIN_PSHLW,
17898 IX86_BUILTIN_PSHLD,
17899 IX86_BUILTIN_PSHLQ,
17900 IX86_BUILTIN_PSHAB,
17901 IX86_BUILTIN_PSHAW,
17902 IX86_BUILTIN_PSHAD,
17903 IX86_BUILTIN_PSHAQ,
17904 IX86_BUILTIN_FRCZSS,
17905 IX86_BUILTIN_FRCZSD,
17906 IX86_BUILTIN_FRCZPS,
17907 IX86_BUILTIN_FRCZPD,
17908 IX86_BUILTIN_CVTPH2PS,
17909 IX86_BUILTIN_CVTPS2PH,
17911 IX86_BUILTIN_COMEQSS,
17912 IX86_BUILTIN_COMNESS,
17913 IX86_BUILTIN_COMLTSS,
17914 IX86_BUILTIN_COMLESS,
17915 IX86_BUILTIN_COMGTSS,
17916 IX86_BUILTIN_COMGESS,
17917 IX86_BUILTIN_COMUEQSS,
17918 IX86_BUILTIN_COMUNESS,
17919 IX86_BUILTIN_COMULTSS,
17920 IX86_BUILTIN_COMULESS,
17921 IX86_BUILTIN_COMUGTSS,
17922 IX86_BUILTIN_COMUGESS,
17923 IX86_BUILTIN_COMORDSS,
17924 IX86_BUILTIN_COMUNORDSS,
17925 IX86_BUILTIN_COMFALSESS,
17926 IX86_BUILTIN_COMTRUESS,
17928 IX86_BUILTIN_COMEQSD,
17929 IX86_BUILTIN_COMNESD,
17930 IX86_BUILTIN_COMLTSD,
17931 IX86_BUILTIN_COMLESD,
17932 IX86_BUILTIN_COMGTSD,
17933 IX86_BUILTIN_COMGESD,
17934 IX86_BUILTIN_COMUEQSD,
17935 IX86_BUILTIN_COMUNESD,
17936 IX86_BUILTIN_COMULTSD,
17937 IX86_BUILTIN_COMULESD,
17938 IX86_BUILTIN_COMUGTSD,
17939 IX86_BUILTIN_COMUGESD,
17940 IX86_BUILTIN_COMORDSD,
17941 IX86_BUILTIN_COMUNORDSD,
17942 IX86_BUILTIN_COMFALSESD,
17943 IX86_BUILTIN_COMTRUESD,
17945 IX86_BUILTIN_COMEQPS,
17946 IX86_BUILTIN_COMNEPS,
17947 IX86_BUILTIN_COMLTPS,
17948 IX86_BUILTIN_COMLEPS,
17949 IX86_BUILTIN_COMGTPS,
17950 IX86_BUILTIN_COMGEPS,
17951 IX86_BUILTIN_COMUEQPS,
17952 IX86_BUILTIN_COMUNEPS,
17953 IX86_BUILTIN_COMULTPS,
17954 IX86_BUILTIN_COMULEPS,
17955 IX86_BUILTIN_COMUGTPS,
17956 IX86_BUILTIN_COMUGEPS,
17957 IX86_BUILTIN_COMORDPS,
17958 IX86_BUILTIN_COMUNORDPS,
17959 IX86_BUILTIN_COMFALSEPS,
17960 IX86_BUILTIN_COMTRUEPS,
17962 IX86_BUILTIN_COMEQPD,
17963 IX86_BUILTIN_COMNEPD,
17964 IX86_BUILTIN_COMLTPD,
17965 IX86_BUILTIN_COMLEPD,
17966 IX86_BUILTIN_COMGTPD,
17967 IX86_BUILTIN_COMGEPD,
17968 IX86_BUILTIN_COMUEQPD,
17969 IX86_BUILTIN_COMUNEPD,
17970 IX86_BUILTIN_COMULTPD,
17971 IX86_BUILTIN_COMULEPD,
17972 IX86_BUILTIN_COMUGTPD,
17973 IX86_BUILTIN_COMUGEPD,
17974 IX86_BUILTIN_COMORDPD,
17975 IX86_BUILTIN_COMUNORDPD,
17976 IX86_BUILTIN_COMFALSEPD,
17977 IX86_BUILTIN_COMTRUEPD,
17979 IX86_BUILTIN_PCOMEQUB,
17980 IX86_BUILTIN_PCOMNEUB,
17981 IX86_BUILTIN_PCOMLTUB,
17982 IX86_BUILTIN_PCOMLEUB,
17983 IX86_BUILTIN_PCOMGTUB,
17984 IX86_BUILTIN_PCOMGEUB,
17985 IX86_BUILTIN_PCOMFALSEUB,
17986 IX86_BUILTIN_PCOMTRUEUB,
17987 IX86_BUILTIN_PCOMEQUW,
17988 IX86_BUILTIN_PCOMNEUW,
17989 IX86_BUILTIN_PCOMLTUW,
17990 IX86_BUILTIN_PCOMLEUW,
17991 IX86_BUILTIN_PCOMGTUW,
17992 IX86_BUILTIN_PCOMGEUW,
17993 IX86_BUILTIN_PCOMFALSEUW,
17994 IX86_BUILTIN_PCOMTRUEUW,
17995 IX86_BUILTIN_PCOMEQUD,
17996 IX86_BUILTIN_PCOMNEUD,
17997 IX86_BUILTIN_PCOMLTUD,
17998 IX86_BUILTIN_PCOMLEUD,
17999 IX86_BUILTIN_PCOMGTUD,
18000 IX86_BUILTIN_PCOMGEUD,
18001 IX86_BUILTIN_PCOMFALSEUD,
18002 IX86_BUILTIN_PCOMTRUEUD,
18003 IX86_BUILTIN_PCOMEQUQ,
18004 IX86_BUILTIN_PCOMNEUQ,
18005 IX86_BUILTIN_PCOMLTUQ,
18006 IX86_BUILTIN_PCOMLEUQ,
18007 IX86_BUILTIN_PCOMGTUQ,
18008 IX86_BUILTIN_PCOMGEUQ,
18009 IX86_BUILTIN_PCOMFALSEUQ,
18010 IX86_BUILTIN_PCOMTRUEUQ,
18012 IX86_BUILTIN_PCOMEQB,
18013 IX86_BUILTIN_PCOMNEB,
18014 IX86_BUILTIN_PCOMLTB,
18015 IX86_BUILTIN_PCOMLEB,
18016 IX86_BUILTIN_PCOMGTB,
18017 IX86_BUILTIN_PCOMGEB,
18018 IX86_BUILTIN_PCOMFALSEB,
18019 IX86_BUILTIN_PCOMTRUEB,
18020 IX86_BUILTIN_PCOMEQW,
18021 IX86_BUILTIN_PCOMNEW,
18022 IX86_BUILTIN_PCOMLTW,
18023 IX86_BUILTIN_PCOMLEW,
18024 IX86_BUILTIN_PCOMGTW,
18025 IX86_BUILTIN_PCOMGEW,
18026 IX86_BUILTIN_PCOMFALSEW,
18027 IX86_BUILTIN_PCOMTRUEW,
18028 IX86_BUILTIN_PCOMEQD,
18029 IX86_BUILTIN_PCOMNED,
18030 IX86_BUILTIN_PCOMLTD,
18031 IX86_BUILTIN_PCOMLED,
18032 IX86_BUILTIN_PCOMGTD,
18033 IX86_BUILTIN_PCOMGED,
18034 IX86_BUILTIN_PCOMFALSED,
18035 IX86_BUILTIN_PCOMTRUED,
18036 IX86_BUILTIN_PCOMEQQ,
18037 IX86_BUILTIN_PCOMNEQ,
18038 IX86_BUILTIN_PCOMLTQ,
18039 IX86_BUILTIN_PCOMLEQ,
18040 IX86_BUILTIN_PCOMGTQ,
18041 IX86_BUILTIN_PCOMGEQ,
18042 IX86_BUILTIN_PCOMFALSEQ,
18043 IX86_BUILTIN_PCOMTRUEQ,
18048 /* Table for the ix86 builtin decls. */
18049 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
18051 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
18052 * if the target_flags include one of MASK. Stores the function decl
18053 * in the ix86_builtins array.
18054 * Returns the function decl or NULL_TREE, if the builtin was not added. */
18057 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
18059 tree decl = NULL_TREE;
18061 if (mask & ix86_isa_flags
18062 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
18064 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
18066 ix86_builtins[(int) code] = decl;
18072 /* Like def_builtin, but also marks the function decl "const". */
18075 def_builtin_const (int mask, const char *name, tree type,
18076 enum ix86_builtins code)
18078 tree decl = def_builtin (mask, name, type, code);
18080 TREE_READONLY (decl) = 1;
18084 /* Bits for builtin_description.flag. */
18086 /* Set when we don't support the comparison natively, and should
18087 swap_comparison in order to support it. */
18088 #define BUILTIN_DESC_SWAP_OPERANDS 1
18090 struct builtin_description
18092 const unsigned int mask;
18093 const enum insn_code icode;
18094 const char *const name;
18095 const enum ix86_builtins code;
18096 const enum rtx_code comparison;
18100 static const struct builtin_description bdesc_comi[] =
18102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
18103 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
18104 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
18105 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
18106 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
18107 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
18108 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
18109 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
18110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
18111 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
18112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
18113 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
18114 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
18115 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
18116 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
18117 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
18118 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
18119 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
18120 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
18121 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
18122 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
18123 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
18124 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
18125 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
18128 static const struct builtin_description bdesc_pcmpestr[] =
18131 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
18132 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
18133 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
18134 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
18135 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
18136 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
18137 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
18140 static const struct builtin_description bdesc_pcmpistr[] =
18143 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
18144 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
18145 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
18146 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
18147 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
18148 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
18149 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
18152 /* Special builtin types */
18153 enum ix86_special_builtin_type
18155 SPECIAL_FTYPE_UNKNOWN,
18157 V16QI_FTYPE_PCCHAR,
18158 V4SF_FTYPE_PCFLOAT,
18159 V2DF_FTYPE_PCDOUBLE,
18160 V4SF_FTYPE_V4SF_PCV2SF,
18161 V2DF_FTYPE_V2DF_PCDOUBLE,
18163 VOID_FTYPE_PV2SF_V4SF,
18164 VOID_FTYPE_PV2DI_V2DI,
18165 VOID_FTYPE_PCHAR_V16QI,
18166 VOID_FTYPE_PFLOAT_V4SF,
18167 VOID_FTYPE_PDOUBLE_V2DF,
18169 VOID_FTYPE_PINT_INT
18172 /* Builtin types */
18173 enum ix86_builtin_type
18176 FLOAT128_FTYPE_FLOAT128,
18178 FLOAT128_FTYPE_FLOAT128_FLOAT128,
18179 INT_FTYPE_V2DI_V2DI_PTEST,
18197 V4SF_FTYPE_V4SF_VEC_MERGE,
18205 V2DF_FTYPE_V2DF_VEC_MERGE,
18215 V16QI_FTYPE_V16QI_V16QI,
18216 V16QI_FTYPE_V8HI_V8HI,
18217 V8QI_FTYPE_V8QI_V8QI,
18218 V8QI_FTYPE_V4HI_V4HI,
18219 V8HI_FTYPE_V8HI_V8HI,
18220 V8HI_FTYPE_V8HI_V8HI_COUNT,
18221 V8HI_FTYPE_V16QI_V16QI,
18222 V8HI_FTYPE_V4SI_V4SI,
18223 V8HI_FTYPE_V8HI_SI_COUNT,
18224 V4SI_FTYPE_V4SI_V4SI,
18225 V4SI_FTYPE_V4SI_V4SI_COUNT,
18226 V4SI_FTYPE_V8HI_V8HI,
18227 V4SI_FTYPE_V4SF_V4SF,
18228 V4SI_FTYPE_V2DF_V2DF,
18229 V4SI_FTYPE_V4SI_SI_COUNT,
18230 V4HI_FTYPE_V4HI_V4HI,
18231 V4HI_FTYPE_V4HI_V4HI_COUNT,
18232 V4HI_FTYPE_V8QI_V8QI,
18233 V4HI_FTYPE_V2SI_V2SI,
18234 V4HI_FTYPE_V4HI_SI_COUNT,
18235 V4SF_FTYPE_V4SF_V4SF,
18236 V4SF_FTYPE_V4SF_V4SF_SWAP,
18237 V4SF_FTYPE_V4SF_V2SI,
18238 V4SF_FTYPE_V4SF_V2DF,
18239 V4SF_FTYPE_V4SF_DI,
18240 V4SF_FTYPE_V4SF_SI,
18241 V2DI_FTYPE_V2DI_V2DI,
18242 V2DI_FTYPE_V2DI_V2DI_COUNT,
18243 V2DI_FTYPE_V16QI_V16QI,
18244 V2DI_FTYPE_V4SI_V4SI,
18245 V2DI_FTYPE_V2DI_V16QI,
18246 V2DI_FTYPE_V2DF_V2DF,
18247 V2DI_FTYPE_V2DI_SI_COUNT,
18248 V2SI_FTYPE_V2SI_V2SI,
18249 V2SI_FTYPE_V2SI_V2SI_COUNT,
18250 V2SI_FTYPE_V4HI_V4HI,
18251 V2SI_FTYPE_V2SF_V2SF,
18252 V2SI_FTYPE_V2SI_SI_COUNT,
18253 V2DF_FTYPE_V2DF_V2DF,
18254 V2DF_FTYPE_V2DF_V2DF_SWAP,
18255 V2DF_FTYPE_V2DF_V4SF,
18256 V2DF_FTYPE_V2DF_DI,
18257 V2DF_FTYPE_V2DF_SI,
18258 V2SF_FTYPE_V2SF_V2SF,
18259 V1DI_FTYPE_V1DI_V1DI,
18260 V1DI_FTYPE_V1DI_V1DI_COUNT,
18261 V1DI_FTYPE_V8QI_V8QI,
18262 V1DI_FTYPE_V2SI_V2SI,
18263 V1DI_FTYPE_V1DI_SI_COUNT,
18264 UINT64_FTYPE_UINT64_UINT64,
18265 UINT_FTYPE_UINT_UINT,
18266 UINT_FTYPE_UINT_USHORT,
18267 UINT_FTYPE_UINT_UCHAR,
18268 V8HI_FTYPE_V8HI_INT,
18269 V4SI_FTYPE_V4SI_INT,
18270 V4HI_FTYPE_V4HI_INT,
18271 V4SF_FTYPE_V4SF_INT,
18272 V2DI_FTYPE_V2DI_INT,
18273 V2DI2TI_FTYPE_V2DI_INT,
18274 V2DF_FTYPE_V2DF_INT,
18275 V16QI_FTYPE_V16QI_V16QI_V16QI,
18276 V4SF_FTYPE_V4SF_V4SF_V4SF,
18277 V2DF_FTYPE_V2DF_V2DF_V2DF,
18278 V16QI_FTYPE_V16QI_V16QI_INT,
18279 V8HI_FTYPE_V8HI_V8HI_INT,
18280 V4SI_FTYPE_V4SI_V4SI_INT,
18281 V4SF_FTYPE_V4SF_V4SF_INT,
18282 V2DI_FTYPE_V2DI_V2DI_INT,
18283 V2DI2TI_FTYPE_V2DI_V2DI_INT,
18284 V1DI2DI_FTYPE_V1DI_V1DI_INT,
18285 V2DF_FTYPE_V2DF_V2DF_INT,
18286 V2DI_FTYPE_V2DI_UINT_UINT,
18287 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
18290 /* Special builtins with variable number of arguments. */
18291 static const struct builtin_description bdesc_special_args[] =
18294 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18297 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
18304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18309 /* SSE or 3DNow!A */
18310 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18311 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
18314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
18318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
18320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
18321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
18322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18328 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18331 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
18334 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18335 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18338 /* Builtins with variable number of arguments. */
18339 static const struct builtin_description bdesc_args[] =
18342 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18343 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18344 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18345 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18346 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18347 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18349 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18350 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18351 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18352 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18353 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18354 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18355 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18356 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18359 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18361 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18362 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18363 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18364 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18366 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18367 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18368 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18369 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18370 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18371 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18373 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18374 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18376 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18377 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
18378 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
18380 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18381 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
18382 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18384 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
18386 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18387 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18388 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18389 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18390 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18391 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18393 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18394 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18395 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18396 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18397 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18398 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18400 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18401 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18402 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18403 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18406 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18407 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18408 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18409 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18411 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18412 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18413 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18414 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18415 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18416 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18417 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18418 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18419 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18420 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18421 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18422 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18423 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18424 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18425 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18428 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18429 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18430 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18431 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18432 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18433 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18436 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
18437 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18438 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18439 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18440 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18441 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18442 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18443 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18444 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18445 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18446 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18447 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18449 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18451 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18452 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18453 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18454 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18455 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18456 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18457 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18458 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18460 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18461 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18462 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18463 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18464 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18465 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18466 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18467 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18468 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18469 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18470 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
18471 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18472 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18473 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18474 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18483 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18484 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18488 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18490 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18491 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18495 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18496 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18497 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18499 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
18500 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
18501 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
18503 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
18505 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18509 /* SSE MMX or 3Dnow!A */
18510 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18511 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18512 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18514 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18515 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18516 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18517 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18519 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
18520 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
18522 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
18525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
18528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
18529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
18530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
18531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
18533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
18536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
18541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18543 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18544 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18546 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18547 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
18548 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18550 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18551 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18552 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18553 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
18564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18569 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18572 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18573 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18574 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18575 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18576 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18581 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18585 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18586 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18587 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18588 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18594 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
18596 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18597 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18598 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18599 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18600 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18601 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18602 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18603 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18614 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18615 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
18617 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18619 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18620 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18625 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18626 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18630 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18632 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18633 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18634 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18639 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18650 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
18653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
18654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
18658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
18659 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
18660 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
18661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
18663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18664 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18665 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18666 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18667 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18668 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18669 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18672 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18673 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18674 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18675 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18676 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18677 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18679 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18680 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18681 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18682 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18684 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
18685 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
18691 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18692 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18695 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
18696 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18698 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18699 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18700 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18701 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18702 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18703 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18706 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
18707 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
18708 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18709 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
18710 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
18711 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18713 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18714 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18715 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18716 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18717 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18718 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18719 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18720 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18721 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18722 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18723 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18724 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18725 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
18726 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
18727 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18728 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18729 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18730 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18731 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18732 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18733 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18734 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18735 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18736 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18739 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
18740 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
18743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18744 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18745 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
18746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
18747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18748 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18749 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18750 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
18751 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
18752 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
18754 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18755 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18756 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18757 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18758 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18759 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18760 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18761 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18762 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18763 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18764 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18765 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18766 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18768 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18769 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18770 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18771 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18772 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18773 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18774 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18775 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18776 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18777 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18778 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18779 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18781 /* SSE4.1 and SSE5 */
18782 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
18783 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
18784 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18785 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18787 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18788 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18789 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18792 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18793 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
18794 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
18795 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
18796 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
18799 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
18800 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
18801 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
18802 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18805 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
18806 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
18808 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18809 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18810 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18811 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18814 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
18817 { OPTION_MASK_ISA_64BIT, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
18818 { OPTION_MASK_ISA_64BIT, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
18822 enum multi_arg_type {
18832 MULTI_ARG_3_PERMPS,
18833 MULTI_ARG_3_PERMPD,
18840 MULTI_ARG_2_DI_IMM,
18841 MULTI_ARG_2_SI_IMM,
18842 MULTI_ARG_2_HI_IMM,
18843 MULTI_ARG_2_QI_IMM,
18844 MULTI_ARG_2_SF_CMP,
18845 MULTI_ARG_2_DF_CMP,
18846 MULTI_ARG_2_DI_CMP,
18847 MULTI_ARG_2_SI_CMP,
18848 MULTI_ARG_2_HI_CMP,
18849 MULTI_ARG_2_QI_CMP,
18872 static const struct builtin_description bdesc_multi_arg[] =
18874 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18891 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18905 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18908 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
18920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
18921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
18922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
18923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
18924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
18925 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
18926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
18927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
18928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
18929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
18930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
18931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
18932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
18933 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
18934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
18935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
18936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
18937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
18938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
18939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
18940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
18941 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
18942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
18943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
18944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
18945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
18946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
18947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
18948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
18950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
18951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
18953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
18954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
18955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
18956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
18957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
18967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
18968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
18970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
18971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
18972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
18973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
18974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
18975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
18977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
18978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
18979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
18980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
18981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
18982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
18984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
18985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
18987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
18988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
18989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
18990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
18991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
18992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
18994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
18995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
18996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
18997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
18998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
18999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
19001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
19002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
19003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
19004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
19005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
19006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
19007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
19008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
19009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
19012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
19013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
19014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
19015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
19016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
19018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
19019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
19022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
19023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
19024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
19026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
19027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19029 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
19030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
19031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
19032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
19034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
19035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
19038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
19039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
19040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
19042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
19043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
19044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
19045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
19046 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
19047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
19048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
19050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
19051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
19052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
19053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
19054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
19055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
19056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
19058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
19059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
19060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
19061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
19062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
19063 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
19064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
19066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
19067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
19068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
19069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
19070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
19071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
19072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
19074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
19075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
19076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
19077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
19078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
19079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
19080 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
19082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
19083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
19084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
19085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
19086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
19087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
19088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
19089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
19091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
19092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
19093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
19094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
19095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
19096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
19097 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
19098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
19100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
19101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
19102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
19103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
19104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
19105 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
19106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
19107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
19110 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
19111 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
19114 ix86_init_mmx_sse_builtins (void)
19116 const struct builtin_description * d;
19119 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
19120 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19121 tree V1DI_type_node
19122 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
19123 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
19124 tree V2DI_type_node
19125 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
19126 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
19127 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
19128 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
19129 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19130 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
19131 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
19133 tree pchar_type_node = build_pointer_type (char_type_node);
19134 tree pcchar_type_node
19135 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
19136 tree pfloat_type_node = build_pointer_type (float_type_node);
19137 tree pcfloat_type_node
19138 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
19139 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
19140 tree pcv2sf_type_node
19141 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
19142 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
19143 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
19146 tree int_ftype_v4sf_v4sf
19147 = build_function_type_list (integer_type_node,
19148 V4SF_type_node, V4SF_type_node, NULL_TREE);
19149 tree v4si_ftype_v4sf_v4sf
19150 = build_function_type_list (V4SI_type_node,
19151 V4SF_type_node, V4SF_type_node, NULL_TREE);
19152 /* MMX/SSE/integer conversions. */
19153 tree int_ftype_v4sf
19154 = build_function_type_list (integer_type_node,
19155 V4SF_type_node, NULL_TREE);
19156 tree int64_ftype_v4sf
19157 = build_function_type_list (long_long_integer_type_node,
19158 V4SF_type_node, NULL_TREE);
19159 tree int_ftype_v8qi
19160 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
19161 tree v4sf_ftype_v4sf_int
19162 = build_function_type_list (V4SF_type_node,
19163 V4SF_type_node, integer_type_node, NULL_TREE);
19164 tree v4sf_ftype_v4sf_int64
19165 = build_function_type_list (V4SF_type_node,
19166 V4SF_type_node, long_long_integer_type_node,
19168 tree v4sf_ftype_v4sf_v2si
19169 = build_function_type_list (V4SF_type_node,
19170 V4SF_type_node, V2SI_type_node, NULL_TREE);
19172 /* Miscellaneous. */
19173 tree v8qi_ftype_v4hi_v4hi
19174 = build_function_type_list (V8QI_type_node,
19175 V4HI_type_node, V4HI_type_node, NULL_TREE);
19176 tree v4hi_ftype_v2si_v2si
19177 = build_function_type_list (V4HI_type_node,
19178 V2SI_type_node, V2SI_type_node, NULL_TREE);
19179 tree v4sf_ftype_v4sf_v4sf_int
19180 = build_function_type_list (V4SF_type_node,
19181 V4SF_type_node, V4SF_type_node,
19182 integer_type_node, NULL_TREE);
19183 tree v2si_ftype_v4hi_v4hi
19184 = build_function_type_list (V2SI_type_node,
19185 V4HI_type_node, V4HI_type_node, NULL_TREE);
19186 tree v4hi_ftype_v4hi_int
19187 = build_function_type_list (V4HI_type_node,
19188 V4HI_type_node, integer_type_node, NULL_TREE);
19189 tree v2si_ftype_v2si_int
19190 = build_function_type_list (V2SI_type_node,
19191 V2SI_type_node, integer_type_node, NULL_TREE);
19192 tree v1di_ftype_v1di_int
19193 = build_function_type_list (V1DI_type_node,
19194 V1DI_type_node, integer_type_node, NULL_TREE);
19196 tree void_ftype_void
19197 = build_function_type (void_type_node, void_list_node);
19198 tree void_ftype_unsigned
19199 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
19200 tree void_ftype_unsigned_unsigned
19201 = build_function_type_list (void_type_node, unsigned_type_node,
19202 unsigned_type_node, NULL_TREE);
19203 tree void_ftype_pcvoid_unsigned_unsigned
19204 = build_function_type_list (void_type_node, const_ptr_type_node,
19205 unsigned_type_node, unsigned_type_node,
19207 tree unsigned_ftype_void
19208 = build_function_type (unsigned_type_node, void_list_node);
19209 tree v2si_ftype_v4sf
19210 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
19211 /* Loads/stores. */
19212 tree void_ftype_v8qi_v8qi_pchar
19213 = build_function_type_list (void_type_node,
19214 V8QI_type_node, V8QI_type_node,
19215 pchar_type_node, NULL_TREE);
19216 tree v4sf_ftype_pcfloat
19217 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
19218 tree v4sf_ftype_v4sf_pcv2sf
19219 = build_function_type_list (V4SF_type_node,
19220 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
19221 tree void_ftype_pv2sf_v4sf
19222 = build_function_type_list (void_type_node,
19223 pv2sf_type_node, V4SF_type_node, NULL_TREE);
19224 tree void_ftype_pfloat_v4sf
19225 = build_function_type_list (void_type_node,
19226 pfloat_type_node, V4SF_type_node, NULL_TREE);
19227 tree void_ftype_pdi_di
19228 = build_function_type_list (void_type_node,
19229 pdi_type_node, long_long_unsigned_type_node,
19231 tree void_ftype_pv2di_v2di
19232 = build_function_type_list (void_type_node,
19233 pv2di_type_node, V2DI_type_node, NULL_TREE);
19234 /* Normal vector unops. */
19235 tree v4sf_ftype_v4sf
19236 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
19237 tree v16qi_ftype_v16qi
19238 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
19239 tree v8hi_ftype_v8hi
19240 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
19241 tree v4si_ftype_v4si
19242 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
19243 tree v8qi_ftype_v8qi
19244 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
19245 tree v4hi_ftype_v4hi
19246 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
19248 /* Normal vector binops. */
19249 tree v4sf_ftype_v4sf_v4sf
19250 = build_function_type_list (V4SF_type_node,
19251 V4SF_type_node, V4SF_type_node, NULL_TREE);
19252 tree v8qi_ftype_v8qi_v8qi
19253 = build_function_type_list (V8QI_type_node,
19254 V8QI_type_node, V8QI_type_node, NULL_TREE);
19255 tree v4hi_ftype_v4hi_v4hi
19256 = build_function_type_list (V4HI_type_node,
19257 V4HI_type_node, V4HI_type_node, NULL_TREE);
19258 tree v2si_ftype_v2si_v2si
19259 = build_function_type_list (V2SI_type_node,
19260 V2SI_type_node, V2SI_type_node, NULL_TREE);
19261 tree v1di_ftype_v1di_v1di
19262 = build_function_type_list (V1DI_type_node,
19263 V1DI_type_node, V1DI_type_node, NULL_TREE);
19264 tree v1di_ftype_v1di_v1di_int
19265 = build_function_type_list (V1DI_type_node,
19266 V1DI_type_node, V1DI_type_node,
19267 integer_type_node, NULL_TREE);
19268 tree v2si_ftype_v2sf
19269 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
19270 tree v2sf_ftype_v2si
19271 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
19272 tree v2si_ftype_v2si
19273 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
19274 tree v2sf_ftype_v2sf
19275 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
19276 tree v2sf_ftype_v2sf_v2sf
19277 = build_function_type_list (V2SF_type_node,
19278 V2SF_type_node, V2SF_type_node, NULL_TREE);
19279 tree v2si_ftype_v2sf_v2sf
19280 = build_function_type_list (V2SI_type_node,
19281 V2SF_type_node, V2SF_type_node, NULL_TREE);
19282 tree pint_type_node = build_pointer_type (integer_type_node);
19283 tree pdouble_type_node = build_pointer_type (double_type_node);
19284 tree pcdouble_type_node = build_pointer_type (
19285 build_type_variant (double_type_node, 1, 0));
19286 tree int_ftype_v2df_v2df
19287 = build_function_type_list (integer_type_node,
19288 V2DF_type_node, V2DF_type_node, NULL_TREE);
19290 tree void_ftype_pcvoid
19291 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
19292 tree v4sf_ftype_v4si
19293 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
19294 tree v4si_ftype_v4sf
19295 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
19296 tree v2df_ftype_v4si
19297 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
19298 tree v4si_ftype_v2df
19299 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
19300 tree v4si_ftype_v2df_v2df
19301 = build_function_type_list (V4SI_type_node,
19302 V2DF_type_node, V2DF_type_node, NULL_TREE);
19303 tree v2si_ftype_v2df
19304 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
19305 tree v4sf_ftype_v2df
19306 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
19307 tree v2df_ftype_v2si
19308 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
19309 tree v2df_ftype_v4sf
19310 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
19311 tree int_ftype_v2df
19312 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
19313 tree int64_ftype_v2df
19314 = build_function_type_list (long_long_integer_type_node,
19315 V2DF_type_node, NULL_TREE);
19316 tree v2df_ftype_v2df_int
19317 = build_function_type_list (V2DF_type_node,
19318 V2DF_type_node, integer_type_node, NULL_TREE);
19319 tree v2df_ftype_v2df_int64
19320 = build_function_type_list (V2DF_type_node,
19321 V2DF_type_node, long_long_integer_type_node,
19323 tree v4sf_ftype_v4sf_v2df
19324 = build_function_type_list (V4SF_type_node,
19325 V4SF_type_node, V2DF_type_node, NULL_TREE);
19326 tree v2df_ftype_v2df_v4sf
19327 = build_function_type_list (V2DF_type_node,
19328 V2DF_type_node, V4SF_type_node, NULL_TREE);
19329 tree v2df_ftype_v2df_v2df_int
19330 = build_function_type_list (V2DF_type_node,
19331 V2DF_type_node, V2DF_type_node,
19334 tree v2df_ftype_v2df_pcdouble
19335 = build_function_type_list (V2DF_type_node,
19336 V2DF_type_node, pcdouble_type_node, NULL_TREE);
19337 tree void_ftype_pdouble_v2df
19338 = build_function_type_list (void_type_node,
19339 pdouble_type_node, V2DF_type_node, NULL_TREE);
19340 tree void_ftype_pint_int
19341 = build_function_type_list (void_type_node,
19342 pint_type_node, integer_type_node, NULL_TREE);
19343 tree void_ftype_v16qi_v16qi_pchar
19344 = build_function_type_list (void_type_node,
19345 V16QI_type_node, V16QI_type_node,
19346 pchar_type_node, NULL_TREE);
19347 tree v2df_ftype_pcdouble
19348 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
19349 tree v2df_ftype_v2df_v2df
19350 = build_function_type_list (V2DF_type_node,
19351 V2DF_type_node, V2DF_type_node, NULL_TREE);
19352 tree v16qi_ftype_v16qi_v16qi
19353 = build_function_type_list (V16QI_type_node,
19354 V16QI_type_node, V16QI_type_node, NULL_TREE);
19355 tree v8hi_ftype_v8hi_v8hi
19356 = build_function_type_list (V8HI_type_node,
19357 V8HI_type_node, V8HI_type_node, NULL_TREE);
19358 tree v4si_ftype_v4si_v4si
19359 = build_function_type_list (V4SI_type_node,
19360 V4SI_type_node, V4SI_type_node, NULL_TREE);
19361 tree v2di_ftype_v2di_v2di
19362 = build_function_type_list (V2DI_type_node,
19363 V2DI_type_node, V2DI_type_node, NULL_TREE);
19364 tree v2di_ftype_v2df_v2df
19365 = build_function_type_list (V2DI_type_node,
19366 V2DF_type_node, V2DF_type_node, NULL_TREE);
19367 tree v2df_ftype_v2df
19368 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
19369 tree v2di_ftype_v2di_int
19370 = build_function_type_list (V2DI_type_node,
19371 V2DI_type_node, integer_type_node, NULL_TREE);
19372 tree v2di_ftype_v2di_v2di_int
19373 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19374 V2DI_type_node, integer_type_node, NULL_TREE);
19375 tree v4si_ftype_v4si_int
19376 = build_function_type_list (V4SI_type_node,
19377 V4SI_type_node, integer_type_node, NULL_TREE);
19378 tree v8hi_ftype_v8hi_int
19379 = build_function_type_list (V8HI_type_node,
19380 V8HI_type_node, integer_type_node, NULL_TREE);
19381 tree v4si_ftype_v8hi_v8hi
19382 = build_function_type_list (V4SI_type_node,
19383 V8HI_type_node, V8HI_type_node, NULL_TREE);
19384 tree v1di_ftype_v8qi_v8qi
19385 = build_function_type_list (V1DI_type_node,
19386 V8QI_type_node, V8QI_type_node, NULL_TREE);
19387 tree v1di_ftype_v2si_v2si
19388 = build_function_type_list (V1DI_type_node,
19389 V2SI_type_node, V2SI_type_node, NULL_TREE);
19390 tree v2di_ftype_v16qi_v16qi
19391 = build_function_type_list (V2DI_type_node,
19392 V16QI_type_node, V16QI_type_node, NULL_TREE);
19393 tree v2di_ftype_v4si_v4si
19394 = build_function_type_list (V2DI_type_node,
19395 V4SI_type_node, V4SI_type_node, NULL_TREE);
19396 tree int_ftype_v16qi
19397 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
19398 tree v16qi_ftype_pcchar
19399 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
19400 tree void_ftype_pchar_v16qi
19401 = build_function_type_list (void_type_node,
19402 pchar_type_node, V16QI_type_node, NULL_TREE);
19404 tree v2di_ftype_v2di_unsigned_unsigned
19405 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19406 unsigned_type_node, unsigned_type_node,
19408 tree v2di_ftype_v2di_v2di_unsigned_unsigned
19409 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
19410 unsigned_type_node, unsigned_type_node,
19412 tree v2di_ftype_v2di_v16qi
19413 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
19415 tree v2df_ftype_v2df_v2df_v2df
19416 = build_function_type_list (V2DF_type_node,
19417 V2DF_type_node, V2DF_type_node,
19418 V2DF_type_node, NULL_TREE);
19419 tree v4sf_ftype_v4sf_v4sf_v4sf
19420 = build_function_type_list (V4SF_type_node,
19421 V4SF_type_node, V4SF_type_node,
19422 V4SF_type_node, NULL_TREE);
19423 tree v8hi_ftype_v16qi
19424 = build_function_type_list (V8HI_type_node, V16QI_type_node,
19426 tree v4si_ftype_v16qi
19427 = build_function_type_list (V4SI_type_node, V16QI_type_node,
19429 tree v2di_ftype_v16qi
19430 = build_function_type_list (V2DI_type_node, V16QI_type_node,
19432 tree v4si_ftype_v8hi
19433 = build_function_type_list (V4SI_type_node, V8HI_type_node,
19435 tree v2di_ftype_v8hi
19436 = build_function_type_list (V2DI_type_node, V8HI_type_node,
19438 tree v2di_ftype_v4si
19439 = build_function_type_list (V2DI_type_node, V4SI_type_node,
19441 tree v2di_ftype_pv2di
19442 = build_function_type_list (V2DI_type_node, pv2di_type_node,
19444 tree v16qi_ftype_v16qi_v16qi_int
19445 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19446 V16QI_type_node, integer_type_node,
19448 tree v16qi_ftype_v16qi_v16qi_v16qi
19449 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19450 V16QI_type_node, V16QI_type_node,
19452 tree v8hi_ftype_v8hi_v8hi_int
19453 = build_function_type_list (V8HI_type_node, V8HI_type_node,
19454 V8HI_type_node, integer_type_node,
19456 tree v4si_ftype_v4si_v4si_int
19457 = build_function_type_list (V4SI_type_node, V4SI_type_node,
19458 V4SI_type_node, integer_type_node,
19460 tree int_ftype_v2di_v2di
19461 = build_function_type_list (integer_type_node,
19462 V2DI_type_node, V2DI_type_node,
19464 tree int_ftype_v16qi_int_v16qi_int_int
19465 = build_function_type_list (integer_type_node,
19472 tree v16qi_ftype_v16qi_int_v16qi_int_int
19473 = build_function_type_list (V16QI_type_node,
19480 tree int_ftype_v16qi_v16qi_int
19481 = build_function_type_list (integer_type_node,
19487 /* SSE5 instructions */
19488 tree v2di_ftype_v2di_v2di_v2di
19489 = build_function_type_list (V2DI_type_node,
19495 tree v4si_ftype_v4si_v4si_v4si
19496 = build_function_type_list (V4SI_type_node,
19502 tree v4si_ftype_v4si_v4si_v2di
19503 = build_function_type_list (V4SI_type_node,
19509 tree v8hi_ftype_v8hi_v8hi_v8hi
19510 = build_function_type_list (V8HI_type_node,
19516 tree v8hi_ftype_v8hi_v8hi_v4si
19517 = build_function_type_list (V8HI_type_node,
19523 tree v2df_ftype_v2df_v2df_v16qi
19524 = build_function_type_list (V2DF_type_node,
19530 tree v4sf_ftype_v4sf_v4sf_v16qi
19531 = build_function_type_list (V4SF_type_node,
19537 tree v2di_ftype_v2di_si
19538 = build_function_type_list (V2DI_type_node,
19543 tree v4si_ftype_v4si_si
19544 = build_function_type_list (V4SI_type_node,
19549 tree v8hi_ftype_v8hi_si
19550 = build_function_type_list (V8HI_type_node,
19555 tree v16qi_ftype_v16qi_si
19556 = build_function_type_list (V16QI_type_node,
19560 tree v4sf_ftype_v4hi
19561 = build_function_type_list (V4SF_type_node,
19565 tree v4hi_ftype_v4sf
19566 = build_function_type_list (V4HI_type_node,
19570 tree v2di_ftype_v2di
19571 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19573 tree v16qi_ftype_v8hi_v8hi
19574 = build_function_type_list (V16QI_type_node,
19575 V8HI_type_node, V8HI_type_node,
19577 tree v8hi_ftype_v4si_v4si
19578 = build_function_type_list (V8HI_type_node,
19579 V4SI_type_node, V4SI_type_node,
19581 tree v8hi_ftype_v16qi_v16qi
19582 = build_function_type_list (V8HI_type_node,
19583 V16QI_type_node, V16QI_type_node,
19585 tree v4hi_ftype_v8qi_v8qi
19586 = build_function_type_list (V4HI_type_node,
19587 V8QI_type_node, V8QI_type_node,
19589 tree unsigned_ftype_unsigned_uchar
19590 = build_function_type_list (unsigned_type_node,
19591 unsigned_type_node,
19592 unsigned_char_type_node,
19594 tree unsigned_ftype_unsigned_ushort
19595 = build_function_type_list (unsigned_type_node,
19596 unsigned_type_node,
19597 short_unsigned_type_node,
19599 tree unsigned_ftype_unsigned_unsigned
19600 = build_function_type_list (unsigned_type_node,
19601 unsigned_type_node,
19602 unsigned_type_node,
19604 tree uint64_ftype_uint64_uint64
19605 = build_function_type_list (long_long_unsigned_type_node,
19606 long_long_unsigned_type_node,
19607 long_long_unsigned_type_node,
19609 tree float_ftype_float
19610 = build_function_type_list (float_type_node,
19616 /* The __float80 type. */
19617 if (TYPE_MODE (long_double_type_node) == XFmode)
19618 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
19622 /* The __float80 type. */
19623 tree float80_type_node = make_node (REAL_TYPE);
19625 TYPE_PRECISION (float80_type_node) = 80;
19626 layout_type (float80_type_node);
19627 (*lang_hooks.types.register_builtin_type) (float80_type_node,
19633 tree float128_type_node = make_node (REAL_TYPE);
19635 TYPE_PRECISION (float128_type_node) = 128;
19636 layout_type (float128_type_node);
19637 (*lang_hooks.types.register_builtin_type) (float128_type_node,
19640 /* TFmode support builtins. */
19641 ftype = build_function_type (float128_type_node,
19643 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
19645 ftype = build_function_type_list (float128_type_node,
19646 float128_type_node,
19648 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
19650 ftype = build_function_type_list (float128_type_node,
19651 float128_type_node,
19652 float128_type_node,
19654 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
19657 /* Add all special builtins with variable number of operands. */
19658 for (i = 0, d = bdesc_special_args;
19659 i < ARRAY_SIZE (bdesc_special_args);
19667 switch ((enum ix86_special_builtin_type) d->flag)
19669 case VOID_FTYPE_VOID:
19670 type = void_ftype_void;
19672 case V16QI_FTYPE_PCCHAR:
19673 type = v16qi_ftype_pcchar;
19675 case V4SF_FTYPE_PCFLOAT:
19676 type = v4sf_ftype_pcfloat;
19678 case V2DI_FTYPE_PV2DI:
19679 type = v2di_ftype_pv2di;
19681 case V2DF_FTYPE_PCDOUBLE:
19682 type = v2df_ftype_pcdouble;
19684 case V4SF_FTYPE_V4SF_PCV2SF:
19685 type = v4sf_ftype_v4sf_pcv2sf;
19687 case V2DF_FTYPE_V2DF_PCDOUBLE:
19688 type = v2df_ftype_v2df_pcdouble;
19690 case VOID_FTYPE_PV2SF_V4SF:
19691 type = void_ftype_pv2sf_v4sf;
19693 case VOID_FTYPE_PV2DI_V2DI:
19694 type = void_ftype_pv2di_v2di;
19696 case VOID_FTYPE_PCHAR_V16QI:
19697 type = void_ftype_pchar_v16qi;
19699 case VOID_FTYPE_PFLOAT_V4SF:
19700 type = void_ftype_pfloat_v4sf;
19702 case VOID_FTYPE_PDOUBLE_V2DF:
19703 type = void_ftype_pdouble_v2df;
19705 case VOID_FTYPE_PDI_DI:
19706 type = void_ftype_pdi_di;
19708 case VOID_FTYPE_PINT_INT:
19709 type = void_ftype_pint_int;
19712 gcc_unreachable ();
19715 def_builtin (d->mask, d->name, type, d->code);
19718 /* Add all builtins with variable number of operands. */
19719 for (i = 0, d = bdesc_args;
19720 i < ARRAY_SIZE (bdesc_args);
19728 switch ((enum ix86_builtin_type) d->flag)
19730 case FLOAT_FTYPE_FLOAT:
19731 type = float_ftype_float;
19733 case INT_FTYPE_V2DI_V2DI_PTEST:
19734 type = int_ftype_v2di_v2di;
19736 case INT64_FTYPE_V4SF:
19737 type = int64_ftype_v4sf;
19739 case INT64_FTYPE_V2DF:
19740 type = int64_ftype_v2df;
19742 case INT_FTYPE_V16QI:
19743 type = int_ftype_v16qi;
19745 case INT_FTYPE_V8QI:
19746 type = int_ftype_v8qi;
19748 case INT_FTYPE_V4SF:
19749 type = int_ftype_v4sf;
19751 case INT_FTYPE_V2DF:
19752 type = int_ftype_v2df;
19754 case V16QI_FTYPE_V16QI:
19755 type = v16qi_ftype_v16qi;
19757 case V8HI_FTYPE_V8HI:
19758 type = v8hi_ftype_v8hi;
19760 case V8HI_FTYPE_V16QI:
19761 type = v8hi_ftype_v16qi;
19763 case V8QI_FTYPE_V8QI:
19764 type = v8qi_ftype_v8qi;
19766 case V4SI_FTYPE_V4SI:
19767 type = v4si_ftype_v4si;
19769 case V4SI_FTYPE_V16QI:
19770 type = v4si_ftype_v16qi;
19772 case V4SI_FTYPE_V8HI:
19773 type = v4si_ftype_v8hi;
19775 case V4SI_FTYPE_V4SF:
19776 type = v4si_ftype_v4sf;
19778 case V4SI_FTYPE_V2DF:
19779 type = v4si_ftype_v2df;
19781 case V4HI_FTYPE_V4HI:
19782 type = v4hi_ftype_v4hi;
19784 case V4SF_FTYPE_V4SF:
19785 case V4SF_FTYPE_V4SF_VEC_MERGE:
19786 type = v4sf_ftype_v4sf;
19788 case V4SF_FTYPE_V4SI:
19789 type = v4sf_ftype_v4si;
19791 case V4SF_FTYPE_V2DF:
19792 type = v4sf_ftype_v2df;
19794 case V2DI_FTYPE_V2DI:
19795 type = v2di_ftype_v2di;
19797 case V2DI_FTYPE_V16QI:
19798 type = v2di_ftype_v16qi;
19800 case V2DI_FTYPE_V8HI:
19801 type = v2di_ftype_v8hi;
19803 case V2DI_FTYPE_V4SI:
19804 type = v2di_ftype_v4si;
19806 case V2SI_FTYPE_V2SI:
19807 type = v2si_ftype_v2si;
19809 case V2SI_FTYPE_V4SF:
19810 type = v2si_ftype_v4sf;
19812 case V2SI_FTYPE_V2DF:
19813 type = v2si_ftype_v2df;
19815 case V2SI_FTYPE_V2SF:
19816 type = v2si_ftype_v2sf;
19818 case V2DF_FTYPE_V4SF:
19819 type = v2df_ftype_v4sf;
19821 case V2DF_FTYPE_V2DF:
19822 case V2DF_FTYPE_V2DF_VEC_MERGE:
19823 type = v2df_ftype_v2df;
19825 case V2DF_FTYPE_V2SI:
19826 type = v2df_ftype_v2si;
19828 case V2DF_FTYPE_V4SI:
19829 type = v2df_ftype_v4si;
19831 case V2SF_FTYPE_V2SF:
19832 type = v2sf_ftype_v2sf;
19834 case V2SF_FTYPE_V2SI:
19835 type = v2sf_ftype_v2si;
19837 case V16QI_FTYPE_V16QI_V16QI:
19838 type = v16qi_ftype_v16qi_v16qi;
19840 case V16QI_FTYPE_V8HI_V8HI:
19841 type = v16qi_ftype_v8hi_v8hi;
19843 case V8QI_FTYPE_V8QI_V8QI:
19844 type = v8qi_ftype_v8qi_v8qi;
19846 case V8QI_FTYPE_V4HI_V4HI:
19847 type = v8qi_ftype_v4hi_v4hi;
19849 case V8HI_FTYPE_V8HI_V8HI:
19850 case V8HI_FTYPE_V8HI_V8HI_COUNT:
19851 type = v8hi_ftype_v8hi_v8hi;
19853 case V8HI_FTYPE_V16QI_V16QI:
19854 type = v8hi_ftype_v16qi_v16qi;
19856 case V8HI_FTYPE_V4SI_V4SI:
19857 type = v8hi_ftype_v4si_v4si;
19859 case V8HI_FTYPE_V8HI_SI_COUNT:
19860 type = v8hi_ftype_v8hi_int;
19862 case V4SI_FTYPE_V4SI_V4SI:
19863 case V4SI_FTYPE_V4SI_V4SI_COUNT:
19864 type = v4si_ftype_v4si_v4si;
19866 case V4SI_FTYPE_V8HI_V8HI:
19867 type = v4si_ftype_v8hi_v8hi;
19869 case V4SI_FTYPE_V4SF_V4SF:
19870 type = v4si_ftype_v4sf_v4sf;
19872 case V4SI_FTYPE_V2DF_V2DF:
19873 type = v4si_ftype_v2df_v2df;
19875 case V4SI_FTYPE_V4SI_SI_COUNT:
19876 type = v4si_ftype_v4si_int;
19878 case V4HI_FTYPE_V4HI_V4HI:
19879 case V4HI_FTYPE_V4HI_V4HI_COUNT:
19880 type = v4hi_ftype_v4hi_v4hi;
19882 case V4HI_FTYPE_V8QI_V8QI:
19883 type = v4hi_ftype_v8qi_v8qi;
19885 case V4HI_FTYPE_V2SI_V2SI:
19886 type = v4hi_ftype_v2si_v2si;
19888 case V4HI_FTYPE_V4HI_SI_COUNT:
19889 type = v4hi_ftype_v4hi_int;
19891 case V4SF_FTYPE_V4SF_V4SF:
19892 case V4SF_FTYPE_V4SF_V4SF_SWAP:
19893 type = v4sf_ftype_v4sf_v4sf;
19895 case V4SF_FTYPE_V4SF_V2SI:
19896 type = v4sf_ftype_v4sf_v2si;
19898 case V4SF_FTYPE_V4SF_V2DF:
19899 type = v4sf_ftype_v4sf_v2df;
19901 case V4SF_FTYPE_V4SF_DI:
19902 type = v4sf_ftype_v4sf_int64;
19904 case V4SF_FTYPE_V4SF_SI:
19905 type = v4sf_ftype_v4sf_int;
19907 case V2DI_FTYPE_V2DI_V2DI:
19908 case V2DI_FTYPE_V2DI_V2DI_COUNT:
19909 type = v2di_ftype_v2di_v2di;
19911 case V2DI_FTYPE_V16QI_V16QI:
19912 type = v2di_ftype_v16qi_v16qi;
19914 case V2DI_FTYPE_V4SI_V4SI:
19915 type = v2di_ftype_v4si_v4si;
19917 case V2DI_FTYPE_V2DI_V16QI:
19918 type = v2di_ftype_v2di_v16qi;
19920 case V2DI_FTYPE_V2DF_V2DF:
19921 type = v2di_ftype_v2df_v2df;
19923 case V2DI_FTYPE_V2DI_SI_COUNT:
19924 type = v2di_ftype_v2di_int;
19926 case V2SI_FTYPE_V2SI_V2SI:
19927 case V2SI_FTYPE_V2SI_V2SI_COUNT:
19928 type = v2si_ftype_v2si_v2si;
19930 case V2SI_FTYPE_V4HI_V4HI:
19931 type = v2si_ftype_v4hi_v4hi;
19933 case V2SI_FTYPE_V2SF_V2SF:
19934 type = v2si_ftype_v2sf_v2sf;
19936 case V2SI_FTYPE_V2SI_SI_COUNT:
19937 type = v2si_ftype_v2si_int;
19939 case V2DF_FTYPE_V2DF_V2DF:
19940 case V2DF_FTYPE_V2DF_V2DF_SWAP:
19941 type = v2df_ftype_v2df_v2df;
19943 case V2DF_FTYPE_V2DF_V4SF:
19944 type = v2df_ftype_v2df_v4sf;
19946 case V2DF_FTYPE_V2DF_DI:
19947 type = v2df_ftype_v2df_int64;
19949 case V2DF_FTYPE_V2DF_SI:
19950 type = v2df_ftype_v2df_int;
19952 case V2SF_FTYPE_V2SF_V2SF:
19953 type = v2sf_ftype_v2sf_v2sf;
19955 case V1DI_FTYPE_V1DI_V1DI:
19956 case V1DI_FTYPE_V1DI_V1DI_COUNT:
19957 type = v1di_ftype_v1di_v1di;
19959 case V1DI_FTYPE_V8QI_V8QI:
19960 type = v1di_ftype_v8qi_v8qi;
19962 case V1DI_FTYPE_V2SI_V2SI:
19963 type = v1di_ftype_v2si_v2si;
19965 case V1DI_FTYPE_V1DI_SI_COUNT:
19966 type = v1di_ftype_v1di_int;
19968 case UINT64_FTYPE_UINT64_UINT64:
19969 type = uint64_ftype_uint64_uint64;
19971 case UINT_FTYPE_UINT_UINT:
19972 type = unsigned_ftype_unsigned_unsigned;
19974 case UINT_FTYPE_UINT_USHORT:
19975 type = unsigned_ftype_unsigned_ushort;
19977 case UINT_FTYPE_UINT_UCHAR:
19978 type = unsigned_ftype_unsigned_uchar;
19980 case V8HI_FTYPE_V8HI_INT:
19981 type = v8hi_ftype_v8hi_int;
19983 case V4SI_FTYPE_V4SI_INT:
19984 type = v4si_ftype_v4si_int;
19986 case V4HI_FTYPE_V4HI_INT:
19987 type = v4hi_ftype_v4hi_int;
19989 case V4SF_FTYPE_V4SF_INT:
19990 type = v4sf_ftype_v4sf_int;
19992 case V2DI_FTYPE_V2DI_INT:
19993 case V2DI2TI_FTYPE_V2DI_INT:
19994 type = v2di_ftype_v2di_int;
19996 case V2DF_FTYPE_V2DF_INT:
19997 type = v2df_ftype_v2df_int;
19999 case V16QI_FTYPE_V16QI_V16QI_V16QI:
20000 type = v16qi_ftype_v16qi_v16qi_v16qi;
20002 case V4SF_FTYPE_V4SF_V4SF_V4SF:
20003 type = v4sf_ftype_v4sf_v4sf_v4sf;
20005 case V2DF_FTYPE_V2DF_V2DF_V2DF:
20006 type = v2df_ftype_v2df_v2df_v2df;
20008 case V16QI_FTYPE_V16QI_V16QI_INT:
20009 type = v16qi_ftype_v16qi_v16qi_int;
20011 case V8HI_FTYPE_V8HI_V8HI_INT:
20012 type = v8hi_ftype_v8hi_v8hi_int;
20014 case V4SI_FTYPE_V4SI_V4SI_INT:
20015 type = v4si_ftype_v4si_v4si_int;
20017 case V4SF_FTYPE_V4SF_V4SF_INT:
20018 type = v4sf_ftype_v4sf_v4sf_int;
20020 case V2DI_FTYPE_V2DI_V2DI_INT:
20021 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
20022 type = v2di_ftype_v2di_v2di_int;
20024 case V2DF_FTYPE_V2DF_V2DF_INT:
20025 type = v2df_ftype_v2df_v2df_int;
20027 case V2DI_FTYPE_V2DI_UINT_UINT:
20028 type = v2di_ftype_v2di_unsigned_unsigned;
20030 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
20031 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
20033 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
20034 type = v1di_ftype_v1di_v1di_int;
20037 gcc_unreachable ();
20040 def_builtin_const (d->mask, d->name, type, d->code);
20043 /* pcmpestr[im] insns. */
20044 for (i = 0, d = bdesc_pcmpestr;
20045 i < ARRAY_SIZE (bdesc_pcmpestr);
20048 if (d->code == IX86_BUILTIN_PCMPESTRM128)
20049 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
20051 ftype = int_ftype_v16qi_int_v16qi_int_int;
20052 def_builtin_const (d->mask, d->name, ftype, d->code);
20055 /* pcmpistr[im] insns. */
20056 for (i = 0, d = bdesc_pcmpistr;
20057 i < ARRAY_SIZE (bdesc_pcmpistr);
20060 if (d->code == IX86_BUILTIN_PCMPISTRM128)
20061 ftype = v16qi_ftype_v16qi_v16qi_int;
20063 ftype = int_ftype_v16qi_v16qi_int;
20064 def_builtin_const (d->mask, d->name, ftype, d->code);
20067 /* comi/ucomi insns. */
20068 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
20069 if (d->mask == OPTION_MASK_ISA_SSE2)
20070 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
20072 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
20075 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
20076 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
20078 /* SSE or 3DNow!A */
20079 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
20082 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
20084 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
20085 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
20088 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
20089 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
20094 /* Define AES built-in functions only if AES is enabled. */
20095 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
20096 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
20097 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
20098 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
20099 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
20100 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
20106 /* Define PCLMUL built-in function only if PCLMUL is enabled. */
20107 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
20110 /* Access to the vec_init patterns. */
20111 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
20112 integer_type_node, NULL_TREE);
20113 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
20115 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
20116 short_integer_type_node,
20117 short_integer_type_node,
20118 short_integer_type_node, NULL_TREE);
20119 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
20121 ftype = build_function_type_list (V8QI_type_node, char_type_node,
20122 char_type_node, char_type_node,
20123 char_type_node, char_type_node,
20124 char_type_node, char_type_node,
20125 char_type_node, NULL_TREE);
20126 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
20128 /* Access to the vec_extract patterns. */
20129 ftype = build_function_type_list (double_type_node, V2DF_type_node,
20130 integer_type_node, NULL_TREE);
20131 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
20133 ftype = build_function_type_list (long_long_integer_type_node,
20134 V2DI_type_node, integer_type_node,
20136 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
20138 ftype = build_function_type_list (float_type_node, V4SF_type_node,
20139 integer_type_node, NULL_TREE);
20140 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
20142 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
20143 integer_type_node, NULL_TREE);
20144 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
20146 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
20147 integer_type_node, NULL_TREE);
20148 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
20150 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
20151 integer_type_node, NULL_TREE);
20152 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
20154 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
20155 integer_type_node, NULL_TREE);
20156 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
20158 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
20159 integer_type_node, NULL_TREE);
20160 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
20162 /* Access to the vec_set patterns. */
20163 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
20165 integer_type_node, NULL_TREE);
20166 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
20168 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
20170 integer_type_node, NULL_TREE);
20171 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
20173 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
20175 integer_type_node, NULL_TREE);
20176 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
20178 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
20180 integer_type_node, NULL_TREE);
20181 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
20183 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
20185 integer_type_node, NULL_TREE);
20186 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
20188 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
20190 integer_type_node, NULL_TREE);
20191 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
20193 /* Add SSE5 multi-arg argument instructions */
20194 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
20196 tree mtype = NULL_TREE;
20201 switch ((enum multi_arg_type)d->flag)
20203 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
20204 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
20205 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
20206 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
20207 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
20208 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
20209 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
20210 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
20211 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
20212 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
20213 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
20214 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
20215 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
20216 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
20217 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
20218 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
20219 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
20220 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
20221 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
20222 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
20223 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
20224 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
20225 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
20226 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
20227 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
20228 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
20229 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
20230 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
20231 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
20232 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
20233 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
20234 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
20235 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
20236 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
20237 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
20238 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
20239 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
20240 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
20241 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
20242 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
20243 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
20244 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
20245 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
20246 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
20247 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
20248 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
20249 case MULTI_ARG_UNKNOWN:
20251 gcc_unreachable ();
20255 def_builtin_const (d->mask, d->name, mtype, d->code);
20260 ix86_init_builtins (void)
20263 ix86_init_mmx_sse_builtins ();
20266 /* Errors in the source file can cause expand_expr to return const0_rtx
20267 where we expect a vector. To avoid crashing, use one of the vector
20268 clear instructions. */
20270 safe_vector_operand (rtx x, enum machine_mode mode)
20272 if (x == const0_rtx)
20273 x = CONST0_RTX (mode);
20277 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
20280 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
20283 tree arg0 = CALL_EXPR_ARG (exp, 0);
20284 tree arg1 = CALL_EXPR_ARG (exp, 1);
20285 rtx op0 = expand_normal (arg0);
20286 rtx op1 = expand_normal (arg1);
20287 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20288 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20289 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20291 if (VECTOR_MODE_P (mode0))
20292 op0 = safe_vector_operand (op0, mode0);
20293 if (VECTOR_MODE_P (mode1))
20294 op1 = safe_vector_operand (op1, mode1);
20296 if (optimize || !target
20297 || GET_MODE (target) != tmode
20298 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20299 target = gen_reg_rtx (tmode);
20301 if (GET_MODE (op1) == SImode && mode1 == TImode)
20303 rtx x = gen_reg_rtx (V4SImode);
20304 emit_insn (gen_sse2_loadd (x, op1));
20305 op1 = gen_lowpart (TImode, x);
20308 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20309 op0 = copy_to_mode_reg (mode0, op0);
20310 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20311 op1 = copy_to_mode_reg (mode1, op1);
20313 pat = GEN_FCN (icode) (target, op0, op1);
20322 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
20325 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
20326 enum multi_arg_type m_type,
20327 enum insn_code sub_code)
20332 bool comparison_p = false;
20334 bool last_arg_constant = false;
20335 int num_memory = 0;
20338 enum machine_mode mode;
20341 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20345 case MULTI_ARG_3_SF:
20346 case MULTI_ARG_3_DF:
20347 case MULTI_ARG_3_DI:
20348 case MULTI_ARG_3_SI:
20349 case MULTI_ARG_3_SI_DI:
20350 case MULTI_ARG_3_HI:
20351 case MULTI_ARG_3_HI_SI:
20352 case MULTI_ARG_3_QI:
20353 case MULTI_ARG_3_PERMPS:
20354 case MULTI_ARG_3_PERMPD:
20358 case MULTI_ARG_2_SF:
20359 case MULTI_ARG_2_DF:
20360 case MULTI_ARG_2_DI:
20361 case MULTI_ARG_2_SI:
20362 case MULTI_ARG_2_HI:
20363 case MULTI_ARG_2_QI:
20367 case MULTI_ARG_2_DI_IMM:
20368 case MULTI_ARG_2_SI_IMM:
20369 case MULTI_ARG_2_HI_IMM:
20370 case MULTI_ARG_2_QI_IMM:
20372 last_arg_constant = true;
20375 case MULTI_ARG_1_SF:
20376 case MULTI_ARG_1_DF:
20377 case MULTI_ARG_1_DI:
20378 case MULTI_ARG_1_SI:
20379 case MULTI_ARG_1_HI:
20380 case MULTI_ARG_1_QI:
20381 case MULTI_ARG_1_SI_DI:
20382 case MULTI_ARG_1_HI_DI:
20383 case MULTI_ARG_1_HI_SI:
20384 case MULTI_ARG_1_QI_DI:
20385 case MULTI_ARG_1_QI_SI:
20386 case MULTI_ARG_1_QI_HI:
20387 case MULTI_ARG_1_PH2PS:
20388 case MULTI_ARG_1_PS2PH:
20392 case MULTI_ARG_2_SF_CMP:
20393 case MULTI_ARG_2_DF_CMP:
20394 case MULTI_ARG_2_DI_CMP:
20395 case MULTI_ARG_2_SI_CMP:
20396 case MULTI_ARG_2_HI_CMP:
20397 case MULTI_ARG_2_QI_CMP:
20399 comparison_p = true;
20402 case MULTI_ARG_2_SF_TF:
20403 case MULTI_ARG_2_DF_TF:
20404 case MULTI_ARG_2_DI_TF:
20405 case MULTI_ARG_2_SI_TF:
20406 case MULTI_ARG_2_HI_TF:
20407 case MULTI_ARG_2_QI_TF:
20412 case MULTI_ARG_UNKNOWN:
20414 gcc_unreachable ();
20417 if (optimize || !target
20418 || GET_MODE (target) != tmode
20419 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20420 target = gen_reg_rtx (tmode);
20422 gcc_assert (nargs <= 4);
20424 for (i = 0; i < nargs; i++)
20426 tree arg = CALL_EXPR_ARG (exp, i);
20427 rtx op = expand_normal (arg);
20428 int adjust = (comparison_p) ? 1 : 0;
20429 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
20431 if (last_arg_constant && i == nargs-1)
20433 if (GET_CODE (op) != CONST_INT)
20435 error ("last argument must be an immediate");
20436 return gen_reg_rtx (tmode);
20441 if (VECTOR_MODE_P (mode))
20442 op = safe_vector_operand (op, mode);
20444 /* If we aren't optimizing, only allow one memory operand to be
20446 if (memory_operand (op, mode))
20449 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
20452 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
20454 op = force_reg (mode, op);
20458 args[i].mode = mode;
20464 pat = GEN_FCN (icode) (target, args[0].op);
20469 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
20470 GEN_INT ((int)sub_code));
20471 else if (! comparison_p)
20472 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20475 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20479 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20484 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20488 gcc_unreachable ();
20498 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
20499 insns with vec_merge. */
20502 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
20506 tree arg0 = CALL_EXPR_ARG (exp, 0);
20507 rtx op1, op0 = expand_normal (arg0);
20508 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20509 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20511 if (optimize || !target
20512 || GET_MODE (target) != tmode
20513 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20514 target = gen_reg_rtx (tmode);
20516 if (VECTOR_MODE_P (mode0))
20517 op0 = safe_vector_operand (op0, mode0);
20519 if ((optimize && !register_operand (op0, mode0))
20520 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20521 op0 = copy_to_mode_reg (mode0, op0);
20524 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20525 op1 = copy_to_mode_reg (mode0, op1);
20527 pat = GEN_FCN (icode) (target, op0, op1);
20534 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20537 ix86_expand_sse_compare (const struct builtin_description *d,
20538 tree exp, rtx target, bool swap)
20541 tree arg0 = CALL_EXPR_ARG (exp, 0);
20542 tree arg1 = CALL_EXPR_ARG (exp, 1);
20543 rtx op0 = expand_normal (arg0);
20544 rtx op1 = expand_normal (arg1);
20546 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20547 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20548 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20549 enum rtx_code comparison = d->comparison;
20551 if (VECTOR_MODE_P (mode0))
20552 op0 = safe_vector_operand (op0, mode0);
20553 if (VECTOR_MODE_P (mode1))
20554 op1 = safe_vector_operand (op1, mode1);
20556 /* Swap operands if we have a comparison that isn't available in
20560 rtx tmp = gen_reg_rtx (mode1);
20561 emit_move_insn (tmp, op1);
20566 if (optimize || !target
20567 || GET_MODE (target) != tmode
20568 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20569 target = gen_reg_rtx (tmode);
20571 if ((optimize && !register_operand (op0, mode0))
20572 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20573 op0 = copy_to_mode_reg (mode0, op0);
20574 if ((optimize && !register_operand (op1, mode1))
20575 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20576 op1 = copy_to_mode_reg (mode1, op1);
20578 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20579 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20586 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20589 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20593 tree arg0 = CALL_EXPR_ARG (exp, 0);
20594 tree arg1 = CALL_EXPR_ARG (exp, 1);
20595 rtx op0 = expand_normal (arg0);
20596 rtx op1 = expand_normal (arg1);
20597 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20598 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20599 enum rtx_code comparison = d->comparison;
20601 if (VECTOR_MODE_P (mode0))
20602 op0 = safe_vector_operand (op0, mode0);
20603 if (VECTOR_MODE_P (mode1))
20604 op1 = safe_vector_operand (op1, mode1);
20606 /* Swap operands if we have a comparison that isn't available in
20608 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20615 target = gen_reg_rtx (SImode);
20616 emit_move_insn (target, const0_rtx);
20617 target = gen_rtx_SUBREG (QImode, target, 0);
20619 if ((optimize && !register_operand (op0, mode0))
20620 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20621 op0 = copy_to_mode_reg (mode0, op0);
20622 if ((optimize && !register_operand (op1, mode1))
20623 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20624 op1 = copy_to_mode_reg (mode1, op1);
20626 pat = GEN_FCN (d->icode) (op0, op1);
20630 emit_insn (gen_rtx_SET (VOIDmode,
20631 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20632 gen_rtx_fmt_ee (comparison, QImode,
20636 return SUBREG_REG (target);
20639 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20642 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20646 tree arg0 = CALL_EXPR_ARG (exp, 0);
20647 tree arg1 = CALL_EXPR_ARG (exp, 1);
20648 rtx op0 = expand_normal (arg0);
20649 rtx op1 = expand_normal (arg1);
20650 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20651 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20652 enum rtx_code comparison = d->comparison;
20654 if (VECTOR_MODE_P (mode0))
20655 op0 = safe_vector_operand (op0, mode0);
20656 if (VECTOR_MODE_P (mode1))
20657 op1 = safe_vector_operand (op1, mode1);
20659 target = gen_reg_rtx (SImode);
20660 emit_move_insn (target, const0_rtx);
20661 target = gen_rtx_SUBREG (QImode, target, 0);
20663 if ((optimize && !register_operand (op0, mode0))
20664 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20665 op0 = copy_to_mode_reg (mode0, op0);
20666 if ((optimize && !register_operand (op1, mode1))
20667 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20668 op1 = copy_to_mode_reg (mode1, op1);
20670 pat = GEN_FCN (d->icode) (op0, op1);
20674 emit_insn (gen_rtx_SET (VOIDmode,
20675 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20676 gen_rtx_fmt_ee (comparison, QImode,
20680 return SUBREG_REG (target);
20683 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20686 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20687 tree exp, rtx target)
20690 tree arg0 = CALL_EXPR_ARG (exp, 0);
20691 tree arg1 = CALL_EXPR_ARG (exp, 1);
20692 tree arg2 = CALL_EXPR_ARG (exp, 2);
20693 tree arg3 = CALL_EXPR_ARG (exp, 3);
20694 tree arg4 = CALL_EXPR_ARG (exp, 4);
20695 rtx scratch0, scratch1;
20696 rtx op0 = expand_normal (arg0);
20697 rtx op1 = expand_normal (arg1);
20698 rtx op2 = expand_normal (arg2);
20699 rtx op3 = expand_normal (arg3);
20700 rtx op4 = expand_normal (arg4);
20701 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20703 tmode0 = insn_data[d->icode].operand[0].mode;
20704 tmode1 = insn_data[d->icode].operand[1].mode;
20705 modev2 = insn_data[d->icode].operand[2].mode;
20706 modei3 = insn_data[d->icode].operand[3].mode;
20707 modev4 = insn_data[d->icode].operand[4].mode;
20708 modei5 = insn_data[d->icode].operand[5].mode;
20709 modeimm = insn_data[d->icode].operand[6].mode;
20711 if (VECTOR_MODE_P (modev2))
20712 op0 = safe_vector_operand (op0, modev2);
20713 if (VECTOR_MODE_P (modev4))
20714 op2 = safe_vector_operand (op2, modev4);
20716 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20717 op0 = copy_to_mode_reg (modev2, op0);
20718 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20719 op1 = copy_to_mode_reg (modei3, op1);
20720 if ((optimize && !register_operand (op2, modev4))
20721 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20722 op2 = copy_to_mode_reg (modev4, op2);
20723 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20724 op3 = copy_to_mode_reg (modei5, op3);
20726 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20728 error ("the fifth argument must be a 8-bit immediate");
20732 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20734 if (optimize || !target
20735 || GET_MODE (target) != tmode0
20736 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20737 target = gen_reg_rtx (tmode0);
20739 scratch1 = gen_reg_rtx (tmode1);
20741 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20743 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20745 if (optimize || !target
20746 || GET_MODE (target) != tmode1
20747 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20748 target = gen_reg_rtx (tmode1);
20750 scratch0 = gen_reg_rtx (tmode0);
20752 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20756 gcc_assert (d->flag);
20758 scratch0 = gen_reg_rtx (tmode0);
20759 scratch1 = gen_reg_rtx (tmode1);
20761 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20771 target = gen_reg_rtx (SImode);
20772 emit_move_insn (target, const0_rtx);
20773 target = gen_rtx_SUBREG (QImode, target, 0);
20776 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20777 gen_rtx_fmt_ee (EQ, QImode,
20778 gen_rtx_REG ((enum machine_mode) d->flag,
20781 return SUBREG_REG (target);
20788 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20791 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20792 tree exp, rtx target)
20795 tree arg0 = CALL_EXPR_ARG (exp, 0);
20796 tree arg1 = CALL_EXPR_ARG (exp, 1);
20797 tree arg2 = CALL_EXPR_ARG (exp, 2);
20798 rtx scratch0, scratch1;
20799 rtx op0 = expand_normal (arg0);
20800 rtx op1 = expand_normal (arg1);
20801 rtx op2 = expand_normal (arg2);
20802 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20804 tmode0 = insn_data[d->icode].operand[0].mode;
20805 tmode1 = insn_data[d->icode].operand[1].mode;
20806 modev2 = insn_data[d->icode].operand[2].mode;
20807 modev3 = insn_data[d->icode].operand[3].mode;
20808 modeimm = insn_data[d->icode].operand[4].mode;
20810 if (VECTOR_MODE_P (modev2))
20811 op0 = safe_vector_operand (op0, modev2);
20812 if (VECTOR_MODE_P (modev3))
20813 op1 = safe_vector_operand (op1, modev3);
20815 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20816 op0 = copy_to_mode_reg (modev2, op0);
20817 if ((optimize && !register_operand (op1, modev3))
20818 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20819 op1 = copy_to_mode_reg (modev3, op1);
20821 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20823 error ("the third argument must be a 8-bit immediate");
20827 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20829 if (optimize || !target
20830 || GET_MODE (target) != tmode0
20831 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20832 target = gen_reg_rtx (tmode0);
20834 scratch1 = gen_reg_rtx (tmode1);
20836 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20838 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20840 if (optimize || !target
20841 || GET_MODE (target) != tmode1
20842 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20843 target = gen_reg_rtx (tmode1);
20845 scratch0 = gen_reg_rtx (tmode0);
20847 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20851 gcc_assert (d->flag);
20853 scratch0 = gen_reg_rtx (tmode0);
20854 scratch1 = gen_reg_rtx (tmode1);
20856 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
20866 target = gen_reg_rtx (SImode);
20867 emit_move_insn (target, const0_rtx);
20868 target = gen_rtx_SUBREG (QImode, target, 0);
20871 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20872 gen_rtx_fmt_ee (EQ, QImode,
20873 gen_rtx_REG ((enum machine_mode) d->flag,
20876 return SUBREG_REG (target);
20882 /* Subroutine of ix86_expand_builtin to take care of insns with
20883 variable number of operands. */
20886 ix86_expand_args_builtin (const struct builtin_description *d,
20887 tree exp, rtx target)
20889 rtx pat, real_target;
20890 unsigned int i, nargs;
20891 unsigned int nargs_constant = 0;
20892 int num_memory = 0;
20896 enum machine_mode mode;
20898 bool last_arg_count = false;
20899 enum insn_code icode = d->icode;
20900 const struct insn_data *insn_p = &insn_data[icode];
20901 enum machine_mode tmode = insn_p->operand[0].mode;
20902 enum machine_mode rmode = VOIDmode;
20904 enum rtx_code comparison = d->comparison;
20906 switch ((enum ix86_builtin_type) d->flag)
20908 case INT_FTYPE_V2DI_V2DI_PTEST:
20909 return ix86_expand_sse_ptest (d, exp, target);
20910 case FLOAT128_FTYPE_FLOAT128:
20911 case FLOAT_FTYPE_FLOAT:
20912 case INT64_FTYPE_V4SF:
20913 case INT64_FTYPE_V2DF:
20914 case INT_FTYPE_V16QI:
20915 case INT_FTYPE_V8QI:
20916 case INT_FTYPE_V4SF:
20917 case INT_FTYPE_V2DF:
20918 case V16QI_FTYPE_V16QI:
20919 case V8HI_FTYPE_V8HI:
20920 case V8HI_FTYPE_V16QI:
20921 case V8QI_FTYPE_V8QI:
20922 case V4SI_FTYPE_V4SI:
20923 case V4SI_FTYPE_V16QI:
20924 case V4SI_FTYPE_V4SF:
20925 case V4SI_FTYPE_V8HI:
20926 case V4SI_FTYPE_V2DF:
20927 case V4HI_FTYPE_V4HI:
20928 case V4SF_FTYPE_V4SF:
20929 case V4SF_FTYPE_V4SI:
20930 case V4SF_FTYPE_V2DF:
20931 case V2DI_FTYPE_V2DI:
20932 case V2DI_FTYPE_V16QI:
20933 case V2DI_FTYPE_V8HI:
20934 case V2DI_FTYPE_V4SI:
20935 case V2DF_FTYPE_V2DF:
20936 case V2DF_FTYPE_V4SI:
20937 case V2DF_FTYPE_V4SF:
20938 case V2DF_FTYPE_V2SI:
20939 case V2SI_FTYPE_V2SI:
20940 case V2SI_FTYPE_V4SF:
20941 case V2SI_FTYPE_V2SF:
20942 case V2SI_FTYPE_V2DF:
20943 case V2SF_FTYPE_V2SF:
20944 case V2SF_FTYPE_V2SI:
20947 case V4SF_FTYPE_V4SF_VEC_MERGE:
20948 case V2DF_FTYPE_V2DF_VEC_MERGE:
20949 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
20950 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
20951 case V16QI_FTYPE_V16QI_V16QI:
20952 case V16QI_FTYPE_V8HI_V8HI:
20953 case V8QI_FTYPE_V8QI_V8QI:
20954 case V8QI_FTYPE_V4HI_V4HI:
20955 case V8HI_FTYPE_V8HI_V8HI:
20956 case V8HI_FTYPE_V16QI_V16QI:
20957 case V8HI_FTYPE_V4SI_V4SI:
20958 case V4SI_FTYPE_V4SI_V4SI:
20959 case V4SI_FTYPE_V8HI_V8HI:
20960 case V4SI_FTYPE_V4SF_V4SF:
20961 case V4SI_FTYPE_V2DF_V2DF:
20962 case V4HI_FTYPE_V4HI_V4HI:
20963 case V4HI_FTYPE_V8QI_V8QI:
20964 case V4HI_FTYPE_V2SI_V2SI:
20965 case V4SF_FTYPE_V4SF_V4SF:
20966 case V4SF_FTYPE_V4SF_V2SI:
20967 case V4SF_FTYPE_V4SF_V2DF:
20968 case V4SF_FTYPE_V4SF_DI:
20969 case V4SF_FTYPE_V4SF_SI:
20970 case V2DI_FTYPE_V2DI_V2DI:
20971 case V2DI_FTYPE_V16QI_V16QI:
20972 case V2DI_FTYPE_V4SI_V4SI:
20973 case V2DI_FTYPE_V2DI_V16QI:
20974 case V2DI_FTYPE_V2DF_V2DF:
20975 case V2SI_FTYPE_V2SI_V2SI:
20976 case V2SI_FTYPE_V4HI_V4HI:
20977 case V2SI_FTYPE_V2SF_V2SF:
20978 case V2DF_FTYPE_V2DF_V2DF:
20979 case V2DF_FTYPE_V2DF_V4SF:
20980 case V2DF_FTYPE_V2DF_DI:
20981 case V2DF_FTYPE_V2DF_SI:
20982 case V2SF_FTYPE_V2SF_V2SF:
20983 case V1DI_FTYPE_V1DI_V1DI:
20984 case V1DI_FTYPE_V8QI_V8QI:
20985 case V1DI_FTYPE_V2SI_V2SI:
20986 if (comparison == UNKNOWN)
20987 return ix86_expand_binop_builtin (icode, exp, target);
20990 case V4SF_FTYPE_V4SF_V4SF_SWAP:
20991 case V2DF_FTYPE_V2DF_V2DF_SWAP:
20992 gcc_assert (comparison != UNKNOWN);
20996 case V8HI_FTYPE_V8HI_V8HI_COUNT:
20997 case V8HI_FTYPE_V8HI_SI_COUNT:
20998 case V4SI_FTYPE_V4SI_V4SI_COUNT:
20999 case V4SI_FTYPE_V4SI_SI_COUNT:
21000 case V4HI_FTYPE_V4HI_V4HI_COUNT:
21001 case V4HI_FTYPE_V4HI_SI_COUNT:
21002 case V2DI_FTYPE_V2DI_V2DI_COUNT:
21003 case V2DI_FTYPE_V2DI_SI_COUNT:
21004 case V2SI_FTYPE_V2SI_V2SI_COUNT:
21005 case V2SI_FTYPE_V2SI_SI_COUNT:
21006 case V1DI_FTYPE_V1DI_V1DI_COUNT:
21007 case V1DI_FTYPE_V1DI_SI_COUNT:
21009 last_arg_count = true;
21011 case UINT64_FTYPE_UINT64_UINT64:
21012 case UINT_FTYPE_UINT_UINT:
21013 case UINT_FTYPE_UINT_USHORT:
21014 case UINT_FTYPE_UINT_UCHAR:
21017 case V2DI2TI_FTYPE_V2DI_INT:
21020 nargs_constant = 1;
21022 case V8HI_FTYPE_V8HI_INT:
21023 case V4SI_FTYPE_V4SI_INT:
21024 case V4HI_FTYPE_V4HI_INT:
21025 case V4SF_FTYPE_V4SF_INT:
21026 case V2DI_FTYPE_V2DI_INT:
21027 case V2DF_FTYPE_V2DF_INT:
21029 nargs_constant = 1;
21031 case V16QI_FTYPE_V16QI_V16QI_V16QI:
21032 case V4SF_FTYPE_V4SF_V4SF_V4SF:
21033 case V2DF_FTYPE_V2DF_V2DF_V2DF:
21036 case V16QI_FTYPE_V16QI_V16QI_INT:
21037 case V8HI_FTYPE_V8HI_V8HI_INT:
21038 case V4SI_FTYPE_V4SI_V4SI_INT:
21039 case V4SF_FTYPE_V4SF_V4SF_INT:
21040 case V2DI_FTYPE_V2DI_V2DI_INT:
21041 case V2DF_FTYPE_V2DF_V2DF_INT:
21043 nargs_constant = 1;
21045 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
21048 nargs_constant = 1;
21050 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
21053 nargs_constant = 1;
21055 case V2DI_FTYPE_V2DI_UINT_UINT:
21057 nargs_constant = 2;
21059 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
21061 nargs_constant = 2;
21064 gcc_unreachable ();
21067 gcc_assert (nargs <= ARRAY_SIZE (args));
21069 if (comparison != UNKNOWN)
21071 gcc_assert (nargs == 2);
21072 return ix86_expand_sse_compare (d, exp, target, swap);
21075 if (rmode == VOIDmode || rmode == tmode)
21079 || GET_MODE (target) != tmode
21080 || ! (*insn_p->operand[0].predicate) (target, tmode))
21081 target = gen_reg_rtx (tmode);
21082 real_target = target;
21086 target = gen_reg_rtx (rmode);
21087 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
21090 for (i = 0; i < nargs; i++)
21092 tree arg = CALL_EXPR_ARG (exp, i);
21093 rtx op = expand_normal (arg);
21094 enum machine_mode mode = insn_p->operand[i + 1].mode;
21095 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
21097 if (last_arg_count && (i + 1) == nargs)
21099 /* SIMD shift insns take either an 8-bit immediate or
21100 register as count. But builtin functions take int as
21101 count. If count doesn't match, we put it in register. */
21104 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
21105 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
21106 op = copy_to_reg (op);
21109 else if ((nargs - i) <= nargs_constant)
21114 case CODE_FOR_sse4_1_roundpd:
21115 case CODE_FOR_sse4_1_roundps:
21116 case CODE_FOR_sse4_1_roundsd:
21117 case CODE_FOR_sse4_1_roundss:
21118 case CODE_FOR_sse4_1_blendps:
21119 error ("the last argument must be a 4-bit immediate");
21122 case CODE_FOR_sse4_1_blendpd:
21123 error ("the last argument must be a 2-bit immediate");
21127 switch (nargs_constant)
21130 if ((nargs - i) == nargs_constant)
21132 error ("the next to last argument must be an 8-bit immediate");
21136 error ("the last argument must be an 8-bit immediate");
21139 gcc_unreachable ();
21146 if (VECTOR_MODE_P (mode))
21147 op = safe_vector_operand (op, mode);
21149 /* If we aren't optimizing, only allow one memory operand to
21151 if (memory_operand (op, mode))
21154 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
21156 if (optimize || !match || num_memory > 1)
21157 op = copy_to_mode_reg (mode, op);
21161 op = copy_to_reg (op);
21162 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
21167 args[i].mode = mode;
21173 pat = GEN_FCN (icode) (real_target, args[0].op);
21176 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
21179 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21183 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21184 args[2].op, args[3].op);
21187 gcc_unreachable ();
21197 /* Subroutine of ix86_expand_builtin to take care of special insns
21198 with variable number of operands. */
21201 ix86_expand_special_args_builtin (const struct builtin_description *d,
21202 tree exp, rtx target)
21206 unsigned int i, nargs, arg_adjust, memory;
21210 enum machine_mode mode;
21212 enum insn_code icode = d->icode;
21213 bool last_arg_constant = false;
21214 const struct insn_data *insn_p = &insn_data[icode];
21215 enum machine_mode tmode = insn_p->operand[0].mode;
21216 enum { load, store } class;
21218 switch ((enum ix86_special_builtin_type) d->flag)
21220 case VOID_FTYPE_VOID:
21221 emit_insn (GEN_FCN (icode) (target));
21223 case V2DI_FTYPE_PV2DI:
21224 case V16QI_FTYPE_PCCHAR:
21225 case V4SF_FTYPE_PCFLOAT:
21226 case V2DF_FTYPE_PCDOUBLE:
21231 case VOID_FTYPE_PV2SF_V4SF:
21232 case VOID_FTYPE_PV2DI_V2DI:
21233 case VOID_FTYPE_PCHAR_V16QI:
21234 case VOID_FTYPE_PFLOAT_V4SF:
21235 case VOID_FTYPE_PDOUBLE_V2DF:
21236 case VOID_FTYPE_PDI_DI:
21237 case VOID_FTYPE_PINT_INT:
21240 /* Reserve memory operand for target. */
21241 memory = ARRAY_SIZE (args);
21243 case V4SF_FTYPE_V4SF_PCV2SF:
21244 case V2DF_FTYPE_V2DF_PCDOUBLE:
21250 gcc_unreachable ();
21253 gcc_assert (nargs <= ARRAY_SIZE (args));
21255 if (class == store)
21257 arg = CALL_EXPR_ARG (exp, 0);
21258 op = expand_normal (arg);
21259 gcc_assert (target == 0);
21260 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
21268 || GET_MODE (target) != tmode
21269 || ! (*insn_p->operand[0].predicate) (target, tmode))
21270 target = gen_reg_rtx (tmode);
21273 for (i = 0; i < nargs; i++)
21275 enum machine_mode mode = insn_p->operand[i + 1].mode;
21278 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
21279 op = expand_normal (arg);
21280 match = (*insn_p->operand[i + 1].predicate) (op, mode);
21282 if (last_arg_constant && (i + 1) == nargs)
21288 error ("the last argument must be an 8-bit immediate");
21296 /* This must be the memory operand. */
21297 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
21298 gcc_assert (GET_MODE (op) == mode
21299 || GET_MODE (op) == VOIDmode);
21303 /* This must be register. */
21304 if (VECTOR_MODE_P (mode))
21305 op = safe_vector_operand (op, mode);
21307 gcc_assert (GET_MODE (op) == mode
21308 || GET_MODE (op) == VOIDmode);
21309 op = copy_to_mode_reg (mode, op);
21314 args[i].mode = mode;
21320 pat = GEN_FCN (icode) (target, args[0].op);
21323 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
21326 gcc_unreachable ();
21332 return class == store ? 0 : target;
21335 /* Return the integer constant in ARG. Constrain it to be in the range
21336 of the subparts of VEC_TYPE; issue an error if not. */
21339 get_element_number (tree vec_type, tree arg)
21341 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
21343 if (!host_integerp (arg, 1)
21344 || (elt = tree_low_cst (arg, 1), elt > max))
21346 error ("selector must be an integer constant in the range 0..%wi", max);
21353 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21354 ix86_expand_vector_init. We DO have language-level syntax for this, in
21355 the form of (type){ init-list }. Except that since we can't place emms
21356 instructions from inside the compiler, we can't allow the use of MMX
21357 registers unless the user explicitly asks for it. So we do *not* define
21358 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
21359 we have builtins invoked by mmintrin.h that gives us license to emit
21360 these sorts of instructions. */
21363 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
21365 enum machine_mode tmode = TYPE_MODE (type);
21366 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
21367 int i, n_elt = GET_MODE_NUNITS (tmode);
21368 rtvec v = rtvec_alloc (n_elt);
21370 gcc_assert (VECTOR_MODE_P (tmode));
21371 gcc_assert (call_expr_nargs (exp) == n_elt);
21373 for (i = 0; i < n_elt; ++i)
21375 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
21376 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
21379 if (!target || !register_operand (target, tmode))
21380 target = gen_reg_rtx (tmode);
21382 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
21386 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21387 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
21388 had a language-level syntax for referencing vector elements. */
21391 ix86_expand_vec_ext_builtin (tree exp, rtx target)
21393 enum machine_mode tmode, mode0;
21398 arg0 = CALL_EXPR_ARG (exp, 0);
21399 arg1 = CALL_EXPR_ARG (exp, 1);
21401 op0 = expand_normal (arg0);
21402 elt = get_element_number (TREE_TYPE (arg0), arg1);
21404 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21405 mode0 = TYPE_MODE (TREE_TYPE (arg0));
21406 gcc_assert (VECTOR_MODE_P (mode0));
21408 op0 = force_reg (mode0, op0);
21410 if (optimize || !target || !register_operand (target, tmode))
21411 target = gen_reg_rtx (tmode);
21413 ix86_expand_vector_extract (true, target, op0, elt);
21418 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21419 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
21420 a language-level syntax for referencing vector elements. */
21423 ix86_expand_vec_set_builtin (tree exp)
21425 enum machine_mode tmode, mode1;
21426 tree arg0, arg1, arg2;
21428 rtx op0, op1, target;
21430 arg0 = CALL_EXPR_ARG (exp, 0);
21431 arg1 = CALL_EXPR_ARG (exp, 1);
21432 arg2 = CALL_EXPR_ARG (exp, 2);
21434 tmode = TYPE_MODE (TREE_TYPE (arg0));
21435 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21436 gcc_assert (VECTOR_MODE_P (tmode));
21438 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
21439 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
21440 elt = get_element_number (TREE_TYPE (arg0), arg2);
21442 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
21443 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
21445 op0 = force_reg (tmode, op0);
21446 op1 = force_reg (mode1, op1);
21448 /* OP0 is the source of these builtin functions and shouldn't be
21449 modified. Create a copy, use it and return it as target. */
21450 target = gen_reg_rtx (tmode);
21451 emit_move_insn (target, op0);
21452 ix86_expand_vector_set (true, target, op1, elt);
21457 /* Expand an expression EXP that calls a built-in function,
21458 with result going to TARGET if that's convenient
21459 (and in mode MODE if that's convenient).
21460 SUBTARGET may be used as the target for computing one of EXP's operands.
21461 IGNORE is nonzero if the value is to be ignored. */
21464 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
21465 enum machine_mode mode ATTRIBUTE_UNUSED,
21466 int ignore ATTRIBUTE_UNUSED)
21468 const struct builtin_description *d;
21470 enum insn_code icode;
21471 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21472 tree arg0, arg1, arg2;
21473 rtx op0, op1, op2, pat;
21474 enum machine_mode mode0, mode1, mode2;
21475 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
21479 case IX86_BUILTIN_MASKMOVQ:
21480 case IX86_BUILTIN_MASKMOVDQU:
21481 icode = (fcode == IX86_BUILTIN_MASKMOVQ
21482 ? CODE_FOR_mmx_maskmovq
21483 : CODE_FOR_sse2_maskmovdqu);
21484 /* Note the arg order is different from the operand order. */
21485 arg1 = CALL_EXPR_ARG (exp, 0);
21486 arg2 = CALL_EXPR_ARG (exp, 1);
21487 arg0 = CALL_EXPR_ARG (exp, 2);
21488 op0 = expand_normal (arg0);
21489 op1 = expand_normal (arg1);
21490 op2 = expand_normal (arg2);
21491 mode0 = insn_data[icode].operand[0].mode;
21492 mode1 = insn_data[icode].operand[1].mode;
21493 mode2 = insn_data[icode].operand[2].mode;
21495 op0 = force_reg (Pmode, op0);
21496 op0 = gen_rtx_MEM (mode1, op0);
21498 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
21499 op0 = copy_to_mode_reg (mode0, op0);
21500 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
21501 op1 = copy_to_mode_reg (mode1, op1);
21502 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
21503 op2 = copy_to_mode_reg (mode2, op2);
21504 pat = GEN_FCN (icode) (op0, op1, op2);
21510 case IX86_BUILTIN_LDMXCSR:
21511 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
21512 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21513 emit_move_insn (target, op0);
21514 emit_insn (gen_sse_ldmxcsr (target));
21517 case IX86_BUILTIN_STMXCSR:
21518 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21519 emit_insn (gen_sse_stmxcsr (target));
21520 return copy_to_mode_reg (SImode, target);
21522 case IX86_BUILTIN_CLFLUSH:
21523 arg0 = CALL_EXPR_ARG (exp, 0);
21524 op0 = expand_normal (arg0);
21525 icode = CODE_FOR_sse2_clflush;
21526 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21527 op0 = copy_to_mode_reg (Pmode, op0);
21529 emit_insn (gen_sse2_clflush (op0));
21532 case IX86_BUILTIN_MONITOR:
21533 arg0 = CALL_EXPR_ARG (exp, 0);
21534 arg1 = CALL_EXPR_ARG (exp, 1);
21535 arg2 = CALL_EXPR_ARG (exp, 2);
21536 op0 = expand_normal (arg0);
21537 op1 = expand_normal (arg1);
21538 op2 = expand_normal (arg2);
21540 op0 = copy_to_mode_reg (Pmode, op0);
21542 op1 = copy_to_mode_reg (SImode, op1);
21544 op2 = copy_to_mode_reg (SImode, op2);
21546 emit_insn (gen_sse3_monitor (op0, op1, op2));
21548 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
21551 case IX86_BUILTIN_MWAIT:
21552 arg0 = CALL_EXPR_ARG (exp, 0);
21553 arg1 = CALL_EXPR_ARG (exp, 1);
21554 op0 = expand_normal (arg0);
21555 op1 = expand_normal (arg1);
21557 op0 = copy_to_mode_reg (SImode, op0);
21559 op1 = copy_to_mode_reg (SImode, op1);
21560 emit_insn (gen_sse3_mwait (op0, op1));
21563 case IX86_BUILTIN_VEC_INIT_V2SI:
21564 case IX86_BUILTIN_VEC_INIT_V4HI:
21565 case IX86_BUILTIN_VEC_INIT_V8QI:
21566 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21568 case IX86_BUILTIN_VEC_EXT_V2DF:
21569 case IX86_BUILTIN_VEC_EXT_V2DI:
21570 case IX86_BUILTIN_VEC_EXT_V4SF:
21571 case IX86_BUILTIN_VEC_EXT_V4SI:
21572 case IX86_BUILTIN_VEC_EXT_V8HI:
21573 case IX86_BUILTIN_VEC_EXT_V2SI:
21574 case IX86_BUILTIN_VEC_EXT_V4HI:
21575 case IX86_BUILTIN_VEC_EXT_V16QI:
21576 return ix86_expand_vec_ext_builtin (exp, target);
21578 case IX86_BUILTIN_VEC_SET_V2DI:
21579 case IX86_BUILTIN_VEC_SET_V4SF:
21580 case IX86_BUILTIN_VEC_SET_V4SI:
21581 case IX86_BUILTIN_VEC_SET_V8HI:
21582 case IX86_BUILTIN_VEC_SET_V4HI:
21583 case IX86_BUILTIN_VEC_SET_V16QI:
21584 return ix86_expand_vec_set_builtin (exp);
21586 case IX86_BUILTIN_INFQ:
21588 REAL_VALUE_TYPE inf;
21592 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21594 tmp = validize_mem (force_const_mem (mode, tmp));
21597 target = gen_reg_rtx (mode);
21599 emit_move_insn (target, tmp);
21607 for (i = 0, d = bdesc_special_args;
21608 i < ARRAY_SIZE (bdesc_special_args);
21610 if (d->code == fcode)
21611 return ix86_expand_special_args_builtin (d, exp, target);
21613 for (i = 0, d = bdesc_args;
21614 i < ARRAY_SIZE (bdesc_args);
21616 if (d->code == fcode)
21617 return ix86_expand_args_builtin (d, exp, target);
21619 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21620 if (d->code == fcode)
21621 return ix86_expand_sse_comi (d, exp, target);
21623 for (i = 0, d = bdesc_pcmpestr;
21624 i < ARRAY_SIZE (bdesc_pcmpestr);
21626 if (d->code == fcode)
21627 return ix86_expand_sse_pcmpestr (d, exp, target);
21629 for (i = 0, d = bdesc_pcmpistr;
21630 i < ARRAY_SIZE (bdesc_pcmpistr);
21632 if (d->code == fcode)
21633 return ix86_expand_sse_pcmpistr (d, exp, target);
21635 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21636 if (d->code == fcode)
21637 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21638 (enum multi_arg_type)d->flag,
21641 gcc_unreachable ();
21644 /* Returns a function decl for a vectorized version of the builtin function
21645 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21646 if it is not available. */
21649 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21652 enum machine_mode in_mode, out_mode;
21655 if (TREE_CODE (type_out) != VECTOR_TYPE
21656 || TREE_CODE (type_in) != VECTOR_TYPE)
21659 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21660 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21661 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21662 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21666 case BUILT_IN_SQRT:
21667 if (out_mode == DFmode && out_n == 2
21668 && in_mode == DFmode && in_n == 2)
21669 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21672 case BUILT_IN_SQRTF:
21673 if (out_mode == SFmode && out_n == 4
21674 && in_mode == SFmode && in_n == 4)
21675 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21678 case BUILT_IN_LRINT:
21679 if (out_mode == SImode && out_n == 4
21680 && in_mode == DFmode && in_n == 2)
21681 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21684 case BUILT_IN_LRINTF:
21685 if (out_mode == SImode && out_n == 4
21686 && in_mode == SFmode && in_n == 4)
21687 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21694 /* Dispatch to a handler for a vectorization library. */
21695 if (ix86_veclib_handler)
21696 return (*ix86_veclib_handler)(fn, type_out, type_in);
21701 /* Handler for an SVML-style interface to
21702 a library with vectorized intrinsics. */
21705 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
21708 tree fntype, new_fndecl, args;
21711 enum machine_mode el_mode, in_mode;
21714 /* The SVML is suitable for unsafe math only. */
21715 if (!flag_unsafe_math_optimizations)
21718 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21719 n = TYPE_VECTOR_SUBPARTS (type_out);
21720 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21721 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21722 if (el_mode != in_mode
21730 case BUILT_IN_LOG10:
21732 case BUILT_IN_TANH:
21734 case BUILT_IN_ATAN:
21735 case BUILT_IN_ATAN2:
21736 case BUILT_IN_ATANH:
21737 case BUILT_IN_CBRT:
21738 case BUILT_IN_SINH:
21740 case BUILT_IN_ASINH:
21741 case BUILT_IN_ASIN:
21742 case BUILT_IN_COSH:
21744 case BUILT_IN_ACOSH:
21745 case BUILT_IN_ACOS:
21746 if (el_mode != DFmode || n != 2)
21750 case BUILT_IN_EXPF:
21751 case BUILT_IN_LOGF:
21752 case BUILT_IN_LOG10F:
21753 case BUILT_IN_POWF:
21754 case BUILT_IN_TANHF:
21755 case BUILT_IN_TANF:
21756 case BUILT_IN_ATANF:
21757 case BUILT_IN_ATAN2F:
21758 case BUILT_IN_ATANHF:
21759 case BUILT_IN_CBRTF:
21760 case BUILT_IN_SINHF:
21761 case BUILT_IN_SINF:
21762 case BUILT_IN_ASINHF:
21763 case BUILT_IN_ASINF:
21764 case BUILT_IN_COSHF:
21765 case BUILT_IN_COSF:
21766 case BUILT_IN_ACOSHF:
21767 case BUILT_IN_ACOSF:
21768 if (el_mode != SFmode || n != 4)
21776 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21778 if (fn == BUILT_IN_LOGF)
21779 strcpy (name, "vmlsLn4");
21780 else if (fn == BUILT_IN_LOG)
21781 strcpy (name, "vmldLn2");
21784 sprintf (name, "vmls%s", bname+10);
21785 name[strlen (name)-1] = '4';
21788 sprintf (name, "vmld%s2", bname+10);
21790 /* Convert to uppercase. */
21794 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21795 args = TREE_CHAIN (args))
21799 fntype = build_function_type_list (type_out, type_in, NULL);
21801 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21803 /* Build a function declaration for the vectorized function. */
21804 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21805 TREE_PUBLIC (new_fndecl) = 1;
21806 DECL_EXTERNAL (new_fndecl) = 1;
21807 DECL_IS_NOVOPS (new_fndecl) = 1;
21808 TREE_READONLY (new_fndecl) = 1;
21813 /* Handler for an ACML-style interface to
21814 a library with vectorized intrinsics. */
21817 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21819 char name[20] = "__vr.._";
21820 tree fntype, new_fndecl, args;
21823 enum machine_mode el_mode, in_mode;
21826 /* The ACML is 64bits only and suitable for unsafe math only as
21827 it does not correctly support parts of IEEE with the required
21828 precision such as denormals. */
21830 || !flag_unsafe_math_optimizations)
21833 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21834 n = TYPE_VECTOR_SUBPARTS (type_out);
21835 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21836 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21837 if (el_mode != in_mode
21847 case BUILT_IN_LOG2:
21848 case BUILT_IN_LOG10:
21851 if (el_mode != DFmode
21856 case BUILT_IN_SINF:
21857 case BUILT_IN_COSF:
21858 case BUILT_IN_EXPF:
21859 case BUILT_IN_POWF:
21860 case BUILT_IN_LOGF:
21861 case BUILT_IN_LOG2F:
21862 case BUILT_IN_LOG10F:
21865 if (el_mode != SFmode
21874 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21875 sprintf (name + 7, "%s", bname+10);
21878 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21879 args = TREE_CHAIN (args))
21883 fntype = build_function_type_list (type_out, type_in, NULL);
21885 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21887 /* Build a function declaration for the vectorized function. */
21888 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21889 TREE_PUBLIC (new_fndecl) = 1;
21890 DECL_EXTERNAL (new_fndecl) = 1;
21891 DECL_IS_NOVOPS (new_fndecl) = 1;
21892 TREE_READONLY (new_fndecl) = 1;
21898 /* Returns a decl of a function that implements conversion of the
21899 input vector of type TYPE, or NULL_TREE if it is not available. */
21902 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
21904 if (TREE_CODE (type) != VECTOR_TYPE)
21910 switch (TYPE_MODE (type))
21913 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
21918 case FIX_TRUNC_EXPR:
21919 switch (TYPE_MODE (type))
21922 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
21932 /* Returns a code for a target-specific builtin that implements
21933 reciprocal of the function, or NULL_TREE if not available. */
21936 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
21937 bool sqrt ATTRIBUTE_UNUSED)
21939 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
21940 && flag_finite_math_only && !flag_trapping_math
21941 && flag_unsafe_math_optimizations))
21945 /* Machine dependent builtins. */
21948 /* Vectorized version of sqrt to rsqrt conversion. */
21949 case IX86_BUILTIN_SQRTPS_NR:
21950 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
21956 /* Normal builtins. */
21959 /* Sqrt to rsqrt conversion. */
21960 case BUILT_IN_SQRTF:
21961 return ix86_builtins[IX86_BUILTIN_RSQRTF];
21968 /* Store OPERAND to the memory after reload is completed. This means
21969 that we can't easily use assign_stack_local. */
21971 ix86_force_to_memory (enum machine_mode mode, rtx operand)
21975 gcc_assert (reload_completed);
21976 if (TARGET_RED_ZONE)
21978 result = gen_rtx_MEM (mode,
21979 gen_rtx_PLUS (Pmode,
21981 GEN_INT (-RED_ZONE_SIZE)));
21982 emit_move_insn (result, operand);
21984 else if (!TARGET_RED_ZONE && TARGET_64BIT)
21990 operand = gen_lowpart (DImode, operand);
21994 gen_rtx_SET (VOIDmode,
21995 gen_rtx_MEM (DImode,
21996 gen_rtx_PRE_DEC (DImode,
21997 stack_pointer_rtx)),
22001 gcc_unreachable ();
22003 result = gen_rtx_MEM (mode, stack_pointer_rtx);
22012 split_di (&operand, 1, operands, operands + 1);
22014 gen_rtx_SET (VOIDmode,
22015 gen_rtx_MEM (SImode,
22016 gen_rtx_PRE_DEC (Pmode,
22017 stack_pointer_rtx)),
22020 gen_rtx_SET (VOIDmode,
22021 gen_rtx_MEM (SImode,
22022 gen_rtx_PRE_DEC (Pmode,
22023 stack_pointer_rtx)),
22028 /* Store HImodes as SImodes. */
22029 operand = gen_lowpart (SImode, operand);
22033 gen_rtx_SET (VOIDmode,
22034 gen_rtx_MEM (GET_MODE (operand),
22035 gen_rtx_PRE_DEC (SImode,
22036 stack_pointer_rtx)),
22040 gcc_unreachable ();
22042 result = gen_rtx_MEM (mode, stack_pointer_rtx);
22047 /* Free operand from the memory. */
22049 ix86_free_from_memory (enum machine_mode mode)
22051 if (!TARGET_RED_ZONE)
22055 if (mode == DImode || TARGET_64BIT)
22059 /* Use LEA to deallocate stack space. In peephole2 it will be converted
22060 to pop or add instruction if registers are available. */
22061 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22062 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
22067 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
22068 QImode must go into class Q_REGS.
22069 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
22070 movdf to do mem-to-mem moves through integer regs. */
22072 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
22074 enum machine_mode mode = GET_MODE (x);
22076 /* We're only allowed to return a subclass of CLASS. Many of the
22077 following checks fail for NO_REGS, so eliminate that early. */
22078 if (regclass == NO_REGS)
22081 /* All classes can load zeros. */
22082 if (x == CONST0_RTX (mode))
22085 /* Force constants into memory if we are loading a (nonzero) constant into
22086 an MMX or SSE register. This is because there are no MMX/SSE instructions
22087 to load from a constant. */
22089 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
22092 /* Prefer SSE regs only, if we can use them for math. */
22093 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
22094 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
22096 /* Floating-point constants need more complex checks. */
22097 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
22099 /* General regs can load everything. */
22100 if (reg_class_subset_p (regclass, GENERAL_REGS))
22103 /* Floats can load 0 and 1 plus some others. Note that we eliminated
22104 zero above. We only want to wind up preferring 80387 registers if
22105 we plan on doing computation with them. */
22107 && standard_80387_constant_p (x))
22109 /* Limit class to non-sse. */
22110 if (regclass == FLOAT_SSE_REGS)
22112 if (regclass == FP_TOP_SSE_REGS)
22114 if (regclass == FP_SECOND_SSE_REGS)
22115 return FP_SECOND_REG;
22116 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
22123 /* Generally when we see PLUS here, it's the function invariant
22124 (plus soft-fp const_int). Which can only be computed into general
22126 if (GET_CODE (x) == PLUS)
22127 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
22129 /* QImode constants are easy to load, but non-constant QImode data
22130 must go into Q_REGS. */
22131 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
22133 if (reg_class_subset_p (regclass, Q_REGS))
22135 if (reg_class_subset_p (Q_REGS, regclass))
22143 /* Discourage putting floating-point values in SSE registers unless
22144 SSE math is being used, and likewise for the 387 registers. */
22146 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
22148 enum machine_mode mode = GET_MODE (x);
22150 /* Restrict the output reload class to the register bank that we are doing
22151 math on. If we would like not to return a subset of CLASS, reject this
22152 alternative: if reload cannot do this, it will still use its choice. */
22153 mode = GET_MODE (x);
22154 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22155 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
22157 if (X87_FLOAT_MODE_P (mode))
22159 if (regclass == FP_TOP_SSE_REGS)
22161 else if (regclass == FP_SECOND_SSE_REGS)
22162 return FP_SECOND_REG;
22164 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
22170 static enum reg_class
22171 ix86_secondary_reload (bool in_p, rtx x, enum reg_class class,
22172 enum machine_mode mode,
22173 secondary_reload_info *sri ATTRIBUTE_UNUSED)
22175 /* QImode spills from non-QI registers require
22176 intermediate register on 32bit targets. */
22177 if (!in_p && mode == QImode && !TARGET_64BIT
22178 && (class == GENERAL_REGS
22179 || class == LEGACY_REGS
22180 || class == INDEX_REGS))
22189 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
22190 regno = true_regnum (x);
22192 /* Return Q_REGS if the operand is in memory. */
22200 /* If we are copying between general and FP registers, we need a memory
22201 location. The same is true for SSE and MMX registers.
22203 To optimize register_move_cost performance, allow inline variant.
22205 The macro can't work reliably when one of the CLASSES is class containing
22206 registers from multiple units (SSE, MMX, integer). We avoid this by never
22207 combining those units in single alternative in the machine description.
22208 Ensure that this constraint holds to avoid unexpected surprises.
22210 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
22211 enforce these sanity checks. */
22214 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22215 enum machine_mode mode, int strict)
22217 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
22218 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
22219 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
22220 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
22221 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
22222 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
22224 gcc_assert (!strict);
22228 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
22231 /* ??? This is a lie. We do have moves between mmx/general, and for
22232 mmx/sse2. But by saying we need secondary memory we discourage the
22233 register allocator from using the mmx registers unless needed. */
22234 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
22237 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22239 /* SSE1 doesn't have any direct moves from other classes. */
22243 /* If the target says that inter-unit moves are more expensive
22244 than moving through memory, then don't generate them. */
22245 if (!TARGET_INTER_UNIT_MOVES)
22248 /* Between SSE and general, we have moves no larger than word size. */
22249 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22257 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22258 enum machine_mode mode, int strict)
22260 return inline_secondary_memory_needed (class1, class2, mode, strict);
22263 /* Return true if the registers in CLASS cannot represent the change from
22264 modes FROM to TO. */
22267 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
22268 enum reg_class regclass)
22273 /* x87 registers can't do subreg at all, as all values are reformatted
22274 to extended precision. */
22275 if (MAYBE_FLOAT_CLASS_P (regclass))
22278 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
22280 /* Vector registers do not support QI or HImode loads. If we don't
22281 disallow a change to these modes, reload will assume it's ok to
22282 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
22283 the vec_dupv4hi pattern. */
22284 if (GET_MODE_SIZE (from) < 4)
22287 /* Vector registers do not support subreg with nonzero offsets, which
22288 are otherwise valid for integer registers. Since we can't see
22289 whether we have a nonzero offset from here, prohibit all
22290 nonparadoxical subregs changing size. */
22291 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
22298 /* Return the cost of moving data of mode M between a
22299 register and memory. A value of 2 is the default; this cost is
22300 relative to those in `REGISTER_MOVE_COST'.
22302 This function is used extensively by register_move_cost that is used to
22303 build tables at startup. Make it inline in this case.
22304 When IN is 2, return maximum of in and out move cost.
22306 If moving between registers and memory is more expensive than
22307 between two registers, you should define this macro to express the
22310 Model also increased moving costs of QImode registers in non
22314 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
22318 if (FLOAT_CLASS_P (regclass))
22336 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
22337 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
22339 if (SSE_CLASS_P (regclass))
22342 switch (GET_MODE_SIZE (mode))
22357 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
22358 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
22360 if (MMX_CLASS_P (regclass))
22363 switch (GET_MODE_SIZE (mode))
22375 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
22376 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
22378 switch (GET_MODE_SIZE (mode))
22381 if (Q_CLASS_P (regclass) || TARGET_64BIT)
22384 return ix86_cost->int_store[0];
22385 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
22386 cost = ix86_cost->movzbl_load;
22388 cost = ix86_cost->int_load[0];
22390 return MAX (cost, ix86_cost->int_store[0]);
22396 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
22398 return ix86_cost->movzbl_load;
22400 return ix86_cost->int_store[0] + 4;
22405 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
22406 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
22408 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
22409 if (mode == TFmode)
22412 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
22414 cost = ix86_cost->int_load[2];
22416 cost = ix86_cost->int_store[2];
22417 return (cost * (((int) GET_MODE_SIZE (mode)
22418 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
22423 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
22425 return inline_memory_move_cost (mode, regclass, in);
22429 /* Return the cost of moving data from a register in class CLASS1 to
22430 one in class CLASS2.
22432 It is not required that the cost always equal 2 when FROM is the same as TO;
22433 on some machines it is expensive to move between registers if they are not
22434 general registers. */
22437 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
22438 enum reg_class class2)
22440 /* In case we require secondary memory, compute cost of the store followed
22441 by load. In order to avoid bad register allocation choices, we need
22442 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
22444 if (inline_secondary_memory_needed (class1, class2, mode, 0))
22448 cost += inline_memory_move_cost (mode, class1, 2);
22449 cost += inline_memory_move_cost (mode, class2, 2);
22451 /* In case of copying from general_purpose_register we may emit multiple
22452 stores followed by single load causing memory size mismatch stall.
22453 Count this as arbitrarily high cost of 20. */
22454 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
22457 /* In the case of FP/MMX moves, the registers actually overlap, and we
22458 have to switch modes in order to treat them differently. */
22459 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
22460 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22466 /* Moves between SSE/MMX and integer unit are expensive. */
22467 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22468 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22470 /* ??? By keeping returned value relatively high, we limit the number
22471 of moves between integer and MMX/SSE registers for all targets.
22472 Additionally, high value prevents problem with x86_modes_tieable_p(),
22473 where integer modes in MMX/SSE registers are not tieable
22474 because of missing QImode and HImode moves to, from or between
22475 MMX/SSE registers. */
22476 return MAX (8, ix86_cost->mmxsse_to_integer);
22478 if (MAYBE_FLOAT_CLASS_P (class1))
22479 return ix86_cost->fp_move;
22480 if (MAYBE_SSE_CLASS_P (class1))
22481 return ix86_cost->sse_move;
22482 if (MAYBE_MMX_CLASS_P (class1))
22483 return ix86_cost->mmx_move;
22487 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
22490 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
22492 /* Flags and only flags can only hold CCmode values. */
22493 if (CC_REGNO_P (regno))
22494 return GET_MODE_CLASS (mode) == MODE_CC;
22495 if (GET_MODE_CLASS (mode) == MODE_CC
22496 || GET_MODE_CLASS (mode) == MODE_RANDOM
22497 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22499 if (FP_REGNO_P (regno))
22500 return VALID_FP_MODE_P (mode);
22501 if (SSE_REGNO_P (regno))
22503 /* We implement the move patterns for all vector modes into and
22504 out of SSE registers, even when no operation instructions
22506 return (VALID_SSE_REG_MODE (mode)
22507 || VALID_SSE2_REG_MODE (mode)
22508 || VALID_MMX_REG_MODE (mode)
22509 || VALID_MMX_REG_MODE_3DNOW (mode));
22511 if (MMX_REGNO_P (regno))
22513 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22514 so if the register is available at all, then we can move data of
22515 the given mode into or out of it. */
22516 return (VALID_MMX_REG_MODE (mode)
22517 || VALID_MMX_REG_MODE_3DNOW (mode));
22520 if (mode == QImode)
22522 /* Take care for QImode values - they can be in non-QI regs,
22523 but then they do cause partial register stalls. */
22524 if (regno < 4 || TARGET_64BIT)
22526 if (!TARGET_PARTIAL_REG_STALL)
22528 return reload_in_progress || reload_completed;
22530 /* We handle both integer and floats in the general purpose registers. */
22531 else if (VALID_INT_MODE_P (mode))
22533 else if (VALID_FP_MODE_P (mode))
22535 else if (VALID_DFP_MODE_P (mode))
22537 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22538 on to use that value in smaller contexts, this can easily force a
22539 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22540 supporting DImode, allow it. */
22541 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22547 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22548 tieable integer mode. */
22551 ix86_tieable_integer_mode_p (enum machine_mode mode)
22560 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22563 return TARGET_64BIT;
22570 /* Return true if MODE1 is accessible in a register that can hold MODE2
22571 without copying. That is, all register classes that can hold MODE2
22572 can also hold MODE1. */
22575 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22577 if (mode1 == mode2)
22580 if (ix86_tieable_integer_mode_p (mode1)
22581 && ix86_tieable_integer_mode_p (mode2))
22584 /* MODE2 being XFmode implies fp stack or general regs, which means we
22585 can tie any smaller floating point modes to it. Note that we do not
22586 tie this with TFmode. */
22587 if (mode2 == XFmode)
22588 return mode1 == SFmode || mode1 == DFmode;
22590 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22591 that we can tie it with SFmode. */
22592 if (mode2 == DFmode)
22593 return mode1 == SFmode;
22595 /* If MODE2 is only appropriate for an SSE register, then tie with
22596 any other mode acceptable to SSE registers. */
22597 if (GET_MODE_SIZE (mode2) == 16
22598 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22599 return (GET_MODE_SIZE (mode1) == 16
22600 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22602 /* If MODE2 is appropriate for an MMX register, then tie
22603 with any other mode acceptable to MMX registers. */
22604 if (GET_MODE_SIZE (mode2) == 8
22605 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22606 return (GET_MODE_SIZE (mode1) == 8
22607 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22612 /* Compute a (partial) cost for rtx X. Return true if the complete
22613 cost has been computed, and false if subexpressions should be
22614 scanned. In either case, *TOTAL contains the cost result. */
22617 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22619 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22620 enum machine_mode mode = GET_MODE (x);
22628 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22630 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22632 else if (flag_pic && SYMBOLIC_CONST (x)
22634 || (!GET_CODE (x) != LABEL_REF
22635 && (GET_CODE (x) != SYMBOL_REF
22636 || !SYMBOL_REF_LOCAL_P (x)))))
22643 if (mode == VOIDmode)
22646 switch (standard_80387_constant_p (x))
22651 default: /* Other constants */
22656 /* Start with (MEM (SYMBOL_REF)), since that's where
22657 it'll probably end up. Add a penalty for size. */
22658 *total = (COSTS_N_INSNS (1)
22659 + (flag_pic != 0 && !TARGET_64BIT)
22660 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22666 /* The zero extensions is often completely free on x86_64, so make
22667 it as cheap as possible. */
22668 if (TARGET_64BIT && mode == DImode
22669 && GET_MODE (XEXP (x, 0)) == SImode)
22671 else if (TARGET_ZERO_EXTEND_WITH_AND)
22672 *total = ix86_cost->add;
22674 *total = ix86_cost->movzx;
22678 *total = ix86_cost->movsx;
22682 if (CONST_INT_P (XEXP (x, 1))
22683 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22685 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22688 *total = ix86_cost->add;
22691 if ((value == 2 || value == 3)
22692 && ix86_cost->lea <= ix86_cost->shift_const)
22694 *total = ix86_cost->lea;
22704 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22706 if (CONST_INT_P (XEXP (x, 1)))
22708 if (INTVAL (XEXP (x, 1)) > 32)
22709 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22711 *total = ix86_cost->shift_const * 2;
22715 if (GET_CODE (XEXP (x, 1)) == AND)
22716 *total = ix86_cost->shift_var * 2;
22718 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22723 if (CONST_INT_P (XEXP (x, 1)))
22724 *total = ix86_cost->shift_const;
22726 *total = ix86_cost->shift_var;
22731 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22733 /* ??? SSE scalar cost should be used here. */
22734 *total = ix86_cost->fmul;
22737 else if (X87_FLOAT_MODE_P (mode))
22739 *total = ix86_cost->fmul;
22742 else if (FLOAT_MODE_P (mode))
22744 /* ??? SSE vector cost should be used here. */
22745 *total = ix86_cost->fmul;
22750 rtx op0 = XEXP (x, 0);
22751 rtx op1 = XEXP (x, 1);
22753 if (CONST_INT_P (XEXP (x, 1)))
22755 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22756 for (nbits = 0; value != 0; value &= value - 1)
22760 /* This is arbitrary. */
22763 /* Compute costs correctly for widening multiplication. */
22764 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22765 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22766 == GET_MODE_SIZE (mode))
22768 int is_mulwiden = 0;
22769 enum machine_mode inner_mode = GET_MODE (op0);
22771 if (GET_CODE (op0) == GET_CODE (op1))
22772 is_mulwiden = 1, op1 = XEXP (op1, 0);
22773 else if (CONST_INT_P (op1))
22775 if (GET_CODE (op0) == SIGN_EXTEND)
22776 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22779 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22783 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22786 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22787 + nbits * ix86_cost->mult_bit
22788 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22797 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22798 /* ??? SSE cost should be used here. */
22799 *total = ix86_cost->fdiv;
22800 else if (X87_FLOAT_MODE_P (mode))
22801 *total = ix86_cost->fdiv;
22802 else if (FLOAT_MODE_P (mode))
22803 /* ??? SSE vector cost should be used here. */
22804 *total = ix86_cost->fdiv;
22806 *total = ix86_cost->divide[MODE_INDEX (mode)];
22810 if (GET_MODE_CLASS (mode) == MODE_INT
22811 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22813 if (GET_CODE (XEXP (x, 0)) == PLUS
22814 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22815 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22816 && CONSTANT_P (XEXP (x, 1)))
22818 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22819 if (val == 2 || val == 4 || val == 8)
22821 *total = ix86_cost->lea;
22822 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22823 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22825 *total += rtx_cost (XEXP (x, 1), outer_code);
22829 else if (GET_CODE (XEXP (x, 0)) == MULT
22830 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22832 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22833 if (val == 2 || val == 4 || val == 8)
22835 *total = ix86_cost->lea;
22836 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22837 *total += rtx_cost (XEXP (x, 1), outer_code);
22841 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22843 *total = ix86_cost->lea;
22844 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22845 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22846 *total += rtx_cost (XEXP (x, 1), outer_code);
22853 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22855 /* ??? SSE cost should be used here. */
22856 *total = ix86_cost->fadd;
22859 else if (X87_FLOAT_MODE_P (mode))
22861 *total = ix86_cost->fadd;
22864 else if (FLOAT_MODE_P (mode))
22866 /* ??? SSE vector cost should be used here. */
22867 *total = ix86_cost->fadd;
22875 if (!TARGET_64BIT && mode == DImode)
22877 *total = (ix86_cost->add * 2
22878 + (rtx_cost (XEXP (x, 0), outer_code)
22879 << (GET_MODE (XEXP (x, 0)) != DImode))
22880 + (rtx_cost (XEXP (x, 1), outer_code)
22881 << (GET_MODE (XEXP (x, 1)) != DImode)));
22887 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22889 /* ??? SSE cost should be used here. */
22890 *total = ix86_cost->fchs;
22893 else if (X87_FLOAT_MODE_P (mode))
22895 *total = ix86_cost->fchs;
22898 else if (FLOAT_MODE_P (mode))
22900 /* ??? SSE vector cost should be used here. */
22901 *total = ix86_cost->fchs;
22907 if (!TARGET_64BIT && mode == DImode)
22908 *total = ix86_cost->add * 2;
22910 *total = ix86_cost->add;
22914 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
22915 && XEXP (XEXP (x, 0), 1) == const1_rtx
22916 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
22917 && XEXP (x, 1) == const0_rtx)
22919 /* This kind of construct is implemented using test[bwl].
22920 Treat it as if we had an AND. */
22921 *total = (ix86_cost->add
22922 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
22923 + rtx_cost (const1_rtx, outer_code));
22929 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
22934 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22935 /* ??? SSE cost should be used here. */
22936 *total = ix86_cost->fabs;
22937 else if (X87_FLOAT_MODE_P (mode))
22938 *total = ix86_cost->fabs;
22939 else if (FLOAT_MODE_P (mode))
22940 /* ??? SSE vector cost should be used here. */
22941 *total = ix86_cost->fabs;
22945 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22946 /* ??? SSE cost should be used here. */
22947 *total = ix86_cost->fsqrt;
22948 else if (X87_FLOAT_MODE_P (mode))
22949 *total = ix86_cost->fsqrt;
22950 else if (FLOAT_MODE_P (mode))
22951 /* ??? SSE vector cost should be used here. */
22952 *total = ix86_cost->fsqrt;
22956 if (XINT (x, 1) == UNSPEC_TP)
22967 static int current_machopic_label_num;
22969 /* Given a symbol name and its associated stub, write out the
22970 definition of the stub. */
22973 machopic_output_stub (FILE *file, const char *symb, const char *stub)
22975 unsigned int length;
22976 char *binder_name, *symbol_name, lazy_ptr_name[32];
22977 int label = ++current_machopic_label_num;
22979 /* For 64-bit we shouldn't get here. */
22980 gcc_assert (!TARGET_64BIT);
22982 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
22983 symb = (*targetm.strip_name_encoding) (symb);
22985 length = strlen (stub);
22986 binder_name = alloca (length + 32);
22987 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
22989 length = strlen (symb);
22990 symbol_name = alloca (length + 32);
22991 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
22993 sprintf (lazy_ptr_name, "L%d$lz", label);
22996 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
22998 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
23000 fprintf (file, "%s:\n", stub);
23001 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23005 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
23006 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
23007 fprintf (file, "\tjmp\t*%%edx\n");
23010 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
23012 fprintf (file, "%s:\n", binder_name);
23016 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
23017 fprintf (file, "\tpushl\t%%eax\n");
23020 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
23022 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
23024 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
23025 fprintf (file, "%s:\n", lazy_ptr_name);
23026 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23027 fprintf (file, "\t.long %s\n", binder_name);
23031 darwin_x86_file_end (void)
23033 darwin_file_end ();
23036 #endif /* TARGET_MACHO */
23038 /* Order the registers for register allocator. */
23041 x86_order_regs_for_local_alloc (void)
23046 /* First allocate the local general purpose registers. */
23047 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23048 if (GENERAL_REGNO_P (i) && call_used_regs[i])
23049 reg_alloc_order [pos++] = i;
23051 /* Global general purpose registers. */
23052 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23053 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
23054 reg_alloc_order [pos++] = i;
23056 /* x87 registers come first in case we are doing FP math
23058 if (!TARGET_SSE_MATH)
23059 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23060 reg_alloc_order [pos++] = i;
23062 /* SSE registers. */
23063 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
23064 reg_alloc_order [pos++] = i;
23065 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
23066 reg_alloc_order [pos++] = i;
23068 /* x87 registers. */
23069 if (TARGET_SSE_MATH)
23070 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23071 reg_alloc_order [pos++] = i;
23073 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
23074 reg_alloc_order [pos++] = i;
23076 /* Initialize the rest of array as we do not allocate some registers
23078 while (pos < FIRST_PSEUDO_REGISTER)
23079 reg_alloc_order [pos++] = 0;
23082 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
23083 struct attribute_spec.handler. */
23085 ix86_handle_struct_attribute (tree *node, tree name,
23086 tree args ATTRIBUTE_UNUSED,
23087 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
23090 if (DECL_P (*node))
23092 if (TREE_CODE (*node) == TYPE_DECL)
23093 type = &TREE_TYPE (*node);
23098 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
23099 || TREE_CODE (*type) == UNION_TYPE)))
23101 warning (OPT_Wattributes, "%qs attribute ignored",
23102 IDENTIFIER_POINTER (name));
23103 *no_add_attrs = true;
23106 else if ((is_attribute_p ("ms_struct", name)
23107 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
23108 || ((is_attribute_p ("gcc_struct", name)
23109 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
23111 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
23112 IDENTIFIER_POINTER (name));
23113 *no_add_attrs = true;
23120 ix86_ms_bitfield_layout_p (const_tree record_type)
23122 return (TARGET_MS_BITFIELD_LAYOUT &&
23123 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
23124 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
23127 /* Returns an expression indicating where the this parameter is
23128 located on entry to the FUNCTION. */
23131 x86_this_parameter (tree function)
23133 tree type = TREE_TYPE (function);
23134 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
23139 const int *parm_regs;
23141 if (ix86_function_type_abi (type) == MS_ABI)
23142 parm_regs = x86_64_ms_abi_int_parameter_registers;
23144 parm_regs = x86_64_int_parameter_registers;
23145 return gen_rtx_REG (DImode, parm_regs[aggr]);
23148 nregs = ix86_function_regparm (type, function);
23150 if (nregs > 0 && !stdarg_p (type))
23154 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
23155 regno = aggr ? DX_REG : CX_REG;
23163 return gen_rtx_MEM (SImode,
23164 plus_constant (stack_pointer_rtx, 4));
23167 return gen_rtx_REG (SImode, regno);
23170 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
23173 /* Determine whether x86_output_mi_thunk can succeed. */
23176 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
23177 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
23178 HOST_WIDE_INT vcall_offset, const_tree function)
23180 /* 64-bit can handle anything. */
23184 /* For 32-bit, everything's fine if we have one free register. */
23185 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23188 /* Need a free register for vcall_offset. */
23192 /* Need a free register for GOT references. */
23193 if (flag_pic && !(*targetm.binds_local_p) (function))
23196 /* Otherwise ok. */
23200 /* Output the assembler code for a thunk function. THUNK_DECL is the
23201 declaration for the thunk function itself, FUNCTION is the decl for
23202 the target function. DELTA is an immediate constant offset to be
23203 added to THIS. If VCALL_OFFSET is nonzero, the word at
23204 *(*this + vcall_offset) should be added to THIS. */
23207 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
23208 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
23209 HOST_WIDE_INT vcall_offset, tree function)
23212 rtx this_param = x86_this_parameter (function);
23215 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23216 pull it in now and let DELTA benefit. */
23217 if (REG_P (this_param))
23218 this_reg = this_param;
23219 else if (vcall_offset)
23221 /* Put the this parameter into %eax. */
23222 xops[0] = this_param;
23223 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
23225 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23227 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23230 this_reg = NULL_RTX;
23232 /* Adjust the this parameter by a fixed constant. */
23235 xops[0] = GEN_INT (delta);
23236 xops[1] = this_reg ? this_reg : this_param;
23239 if (!x86_64_general_operand (xops[0], DImode))
23241 tmp = gen_rtx_REG (DImode, R10_REG);
23243 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
23245 xops[1] = this_param;
23247 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23250 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23253 /* Adjust the this parameter by a value stored in the vtable. */
23257 tmp = gen_rtx_REG (DImode, R10_REG);
23260 int tmp_regno = CX_REG;
23261 if (lookup_attribute ("fastcall",
23262 TYPE_ATTRIBUTES (TREE_TYPE (function))))
23263 tmp_regno = AX_REG;
23264 tmp = gen_rtx_REG (SImode, tmp_regno);
23267 xops[0] = gen_rtx_MEM (Pmode, this_reg);
23270 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23272 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23274 /* Adjust the this parameter. */
23275 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
23276 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
23278 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
23279 xops[0] = GEN_INT (vcall_offset);
23281 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23282 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
23284 xops[1] = this_reg;
23286 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23288 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23291 /* If necessary, drop THIS back to its stack slot. */
23292 if (this_reg && this_reg != this_param)
23294 xops[0] = this_reg;
23295 xops[1] = this_param;
23297 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23299 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
23302 xops[0] = XEXP (DECL_RTL (function), 0);
23305 if (!flag_pic || (*targetm.binds_local_p) (function))
23306 output_asm_insn ("jmp\t%P0", xops);
23307 /* All thunks should be in the same object as their target,
23308 and thus binds_local_p should be true. */
23309 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
23310 gcc_unreachable ();
23313 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
23314 tmp = gen_rtx_CONST (Pmode, tmp);
23315 tmp = gen_rtx_MEM (QImode, tmp);
23317 output_asm_insn ("jmp\t%A0", xops);
23322 if (!flag_pic || (*targetm.binds_local_p) (function))
23323 output_asm_insn ("jmp\t%P0", xops);
23328 rtx sym_ref = XEXP (DECL_RTL (function), 0);
23329 tmp = (gen_rtx_SYMBOL_REF
23331 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
23332 tmp = gen_rtx_MEM (QImode, tmp);
23334 output_asm_insn ("jmp\t%0", xops);
23337 #endif /* TARGET_MACHO */
23339 tmp = gen_rtx_REG (SImode, CX_REG);
23340 output_set_got (tmp, NULL_RTX);
23343 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
23344 output_asm_insn ("jmp\t{*}%1", xops);
23350 x86_file_start (void)
23352 default_file_start ();
23354 darwin_file_start ();
23356 if (X86_FILE_START_VERSION_DIRECTIVE)
23357 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23358 if (X86_FILE_START_FLTUSED)
23359 fputs ("\t.global\t__fltused\n", asm_out_file);
23360 if (ix86_asm_dialect == ASM_INTEL)
23361 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
23365 x86_field_alignment (tree field, int computed)
23367 enum machine_mode mode;
23368 tree type = TREE_TYPE (field);
23370 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23372 mode = TYPE_MODE (strip_array_types (type));
23373 if (mode == DFmode || mode == DCmode
23374 || GET_MODE_CLASS (mode) == MODE_INT
23375 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23376 return MIN (32, computed);
23380 /* Output assembler code to FILE to increment profiler label # LABELNO
23381 for profiling a function entry. */
23383 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23387 #ifndef NO_PROFILE_COUNTERS
23388 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
23391 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
23392 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
23394 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23398 #ifndef NO_PROFILE_COUNTERS
23399 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
23400 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
23402 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
23406 #ifndef NO_PROFILE_COUNTERS
23407 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
23408 PROFILE_COUNT_REGISTER);
23410 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23414 /* We don't have exact information about the insn sizes, but we may assume
23415 quite safely that we are informed about all 1 byte insns and memory
23416 address sizes. This is enough to eliminate unnecessary padding in
23420 min_insn_size (rtx insn)
23424 if (!INSN_P (insn) || !active_insn_p (insn))
23427 /* Discard alignments we've emit and jump instructions. */
23428 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23429 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23432 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
23433 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
23436 /* Important case - calls are always 5 bytes.
23437 It is common to have many calls in the row. */
23439 && symbolic_reference_mentioned_p (PATTERN (insn))
23440 && !SIBLING_CALL_P (insn))
23442 if (get_attr_length (insn) <= 1)
23445 /* For normal instructions we may rely on the sizes of addresses
23446 and the presence of symbol to require 4 bytes of encoding.
23447 This is not the case for jumps where references are PC relative. */
23448 if (!JUMP_P (insn))
23450 l = get_attr_length_address (insn);
23451 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23460 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23464 ix86_avoid_jump_misspredicts (void)
23466 rtx insn, start = get_insns ();
23467 int nbytes = 0, njumps = 0;
23470 /* Look for all minimal intervals of instructions containing 4 jumps.
23471 The intervals are bounded by START and INSN. NBYTES is the total
23472 size of instructions in the interval including INSN and not including
23473 START. When the NBYTES is smaller than 16 bytes, it is possible
23474 that the end of START and INSN ends up in the same 16byte page.
23476 The smallest offset in the page INSN can start is the case where START
23477 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23478 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
23480 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23483 nbytes += min_insn_size (insn);
23485 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
23486 INSN_UID (insn), min_insn_size (insn));
23488 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23489 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
23497 start = NEXT_INSN (start);
23498 if ((JUMP_P (start)
23499 && GET_CODE (PATTERN (start)) != ADDR_VEC
23500 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
23502 njumps--, isjump = 1;
23505 nbytes -= min_insn_size (start);
23507 gcc_assert (njumps >= 0);
23509 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23510 INSN_UID (start), INSN_UID (insn), nbytes);
23512 if (njumps == 3 && isjump && nbytes < 16)
23514 int padsize = 15 - nbytes + min_insn_size (insn);
23517 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23518 INSN_UID (insn), padsize);
23519 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23524 /* AMD Athlon works faster
23525 when RET is not destination of conditional jump or directly preceded
23526 by other jump instruction. We avoid the penalty by inserting NOP just
23527 before the RET instructions in such cases. */
23529 ix86_pad_returns (void)
23534 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23536 basic_block bb = e->src;
23537 rtx ret = BB_END (bb);
23539 bool replace = false;
23541 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23542 || !maybe_hot_bb_p (bb))
23544 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23545 if (active_insn_p (prev) || LABEL_P (prev))
23547 if (prev && LABEL_P (prev))
23552 FOR_EACH_EDGE (e, ei, bb->preds)
23553 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23554 && !(e->flags & EDGE_FALLTHRU))
23559 prev = prev_active_insn (ret);
23561 && ((JUMP_P (prev) && any_condjump_p (prev))
23564 /* Empty functions get branch mispredict even when the jump destination
23565 is not visible to us. */
23566 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23571 emit_insn_before (gen_return_internal_long (), ret);
23577 /* Implement machine specific optimizations. We implement padding of returns
23578 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23582 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23583 ix86_pad_returns ();
23584 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23585 ix86_avoid_jump_misspredicts ();
23588 /* Return nonzero when QImode register that must be represented via REX prefix
23591 x86_extended_QIreg_mentioned_p (rtx insn)
23594 extract_insn_cached (insn);
23595 for (i = 0; i < recog_data.n_operands; i++)
23596 if (REG_P (recog_data.operand[i])
23597 && REGNO (recog_data.operand[i]) >= 4)
23602 /* Return nonzero when P points to register encoded via REX prefix.
23603 Called via for_each_rtx. */
23605 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23607 unsigned int regno;
23610 regno = REGNO (*p);
23611 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23614 /* Return true when INSN mentions register that must be encoded using REX
23617 x86_extended_reg_mentioned_p (rtx insn)
23619 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23622 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23623 optabs would emit if we didn't have TFmode patterns. */
23626 x86_emit_floatuns (rtx operands[2])
23628 rtx neglab, donelab, i0, i1, f0, in, out;
23629 enum machine_mode mode, inmode;
23631 inmode = GET_MODE (operands[1]);
23632 gcc_assert (inmode == SImode || inmode == DImode);
23635 in = force_reg (inmode, operands[1]);
23636 mode = GET_MODE (out);
23637 neglab = gen_label_rtx ();
23638 donelab = gen_label_rtx ();
23639 f0 = gen_reg_rtx (mode);
23641 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23643 expand_float (out, in, 0);
23645 emit_jump_insn (gen_jump (donelab));
23648 emit_label (neglab);
23650 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23652 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23654 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23656 expand_float (f0, i0, 0);
23658 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23660 emit_label (donelab);
23663 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23664 with all elements equal to VAR. Return true if successful. */
23667 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23668 rtx target, rtx val)
23670 enum machine_mode smode, wsmode, wvmode;
23685 val = force_reg (GET_MODE_INNER (mode), val);
23686 x = gen_rtx_VEC_DUPLICATE (mode, val);
23687 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23693 if (TARGET_SSE || TARGET_3DNOW_A)
23695 val = gen_lowpart (SImode, val);
23696 x = gen_rtx_TRUNCATE (HImode, val);
23697 x = gen_rtx_VEC_DUPLICATE (mode, x);
23698 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23720 /* Extend HImode to SImode using a paradoxical SUBREG. */
23721 tmp1 = gen_reg_rtx (SImode);
23722 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23723 /* Insert the SImode value as low element of V4SImode vector. */
23724 tmp2 = gen_reg_rtx (V4SImode);
23725 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23726 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23727 CONST0_RTX (V4SImode),
23729 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23730 /* Cast the V4SImode vector back to a V8HImode vector. */
23731 tmp1 = gen_reg_rtx (V8HImode);
23732 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23733 /* Duplicate the low short through the whole low SImode word. */
23734 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23735 /* Cast the V8HImode vector back to a V4SImode vector. */
23736 tmp2 = gen_reg_rtx (V4SImode);
23737 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23738 /* Replicate the low element of the V4SImode vector. */
23739 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23740 /* Cast the V2SImode back to V8HImode, and store in target. */
23741 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23752 /* Extend QImode to SImode using a paradoxical SUBREG. */
23753 tmp1 = gen_reg_rtx (SImode);
23754 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23755 /* Insert the SImode value as low element of V4SImode vector. */
23756 tmp2 = gen_reg_rtx (V4SImode);
23757 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23758 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23759 CONST0_RTX (V4SImode),
23761 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23762 /* Cast the V4SImode vector back to a V16QImode vector. */
23763 tmp1 = gen_reg_rtx (V16QImode);
23764 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23765 /* Duplicate the low byte through the whole low SImode word. */
23766 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23767 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23768 /* Cast the V16QImode vector back to a V4SImode vector. */
23769 tmp2 = gen_reg_rtx (V4SImode);
23770 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23771 /* Replicate the low element of the V4SImode vector. */
23772 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23773 /* Cast the V2SImode back to V16QImode, and store in target. */
23774 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23782 /* Replicate the value once into the next wider mode and recurse. */
23783 val = convert_modes (wsmode, smode, val, true);
23784 x = expand_simple_binop (wsmode, ASHIFT, val,
23785 GEN_INT (GET_MODE_BITSIZE (smode)),
23786 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23787 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23789 x = gen_reg_rtx (wvmode);
23790 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23791 gcc_unreachable ();
23792 emit_move_insn (target, gen_lowpart (mode, x));
23800 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23801 whose ONE_VAR element is VAR, and other elements are zero. Return true
23805 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23806 rtx target, rtx var, int one_var)
23808 enum machine_mode vsimode;
23811 bool use_vector_set = false;
23816 use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
23821 use_vector_set = TARGET_SSE4_1;
23824 use_vector_set = TARGET_SSE2;
23827 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
23833 if (use_vector_set)
23835 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
23836 var = force_reg (GET_MODE_INNER (mode), var);
23837 ix86_expand_vector_set (mmx_ok, target, var, one_var);
23853 var = force_reg (GET_MODE_INNER (mode), var);
23854 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
23855 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23860 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
23861 new_target = gen_reg_rtx (mode);
23863 new_target = target;
23864 var = force_reg (GET_MODE_INNER (mode), var);
23865 x = gen_rtx_VEC_DUPLICATE (mode, var);
23866 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
23867 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
23870 /* We need to shuffle the value to the correct position, so
23871 create a new pseudo to store the intermediate result. */
23873 /* With SSE2, we can use the integer shuffle insns. */
23874 if (mode != V4SFmode && TARGET_SSE2)
23876 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
23878 GEN_INT (one_var == 1 ? 0 : 1),
23879 GEN_INT (one_var == 2 ? 0 : 1),
23880 GEN_INT (one_var == 3 ? 0 : 1)));
23881 if (target != new_target)
23882 emit_move_insn (target, new_target);
23886 /* Otherwise convert the intermediate result to V4SFmode and
23887 use the SSE1 shuffle instructions. */
23888 if (mode != V4SFmode)
23890 tmp = gen_reg_rtx (V4SFmode);
23891 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
23896 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
23898 GEN_INT (one_var == 1 ? 0 : 1),
23899 GEN_INT (one_var == 2 ? 0+4 : 1+4),
23900 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
23902 if (mode != V4SFmode)
23903 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
23904 else if (tmp != target)
23905 emit_move_insn (target, tmp);
23907 else if (target != new_target)
23908 emit_move_insn (target, new_target);
23913 vsimode = V4SImode;
23919 vsimode = V2SImode;
23925 /* Zero extend the variable element to SImode and recurse. */
23926 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
23928 x = gen_reg_rtx (vsimode);
23929 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
23931 gcc_unreachable ();
23933 emit_move_insn (target, gen_lowpart (mode, x));
23941 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23942 consisting of the values in VALS. It is known that all elements
23943 except ONE_VAR are constants. Return true if successful. */
23946 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
23947 rtx target, rtx vals, int one_var)
23949 rtx var = XVECEXP (vals, 0, one_var);
23950 enum machine_mode wmode;
23953 const_vec = copy_rtx (vals);
23954 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
23955 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
23963 /* For the two element vectors, it's just as easy to use
23964 the general case. */
23982 /* There's no way to set one QImode entry easily. Combine
23983 the variable value with its adjacent constant value, and
23984 promote to an HImode set. */
23985 x = XVECEXP (vals, 0, one_var ^ 1);
23988 var = convert_modes (HImode, QImode, var, true);
23989 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
23990 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23991 x = GEN_INT (INTVAL (x) & 0xff);
23995 var = convert_modes (HImode, QImode, var, true);
23996 x = gen_int_mode (INTVAL (x) << 8, HImode);
23998 if (x != const0_rtx)
23999 var = expand_simple_binop (HImode, IOR, var, x, var,
24000 1, OPTAB_LIB_WIDEN);
24002 x = gen_reg_rtx (wmode);
24003 emit_move_insn (x, gen_lowpart (wmode, const_vec));
24004 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
24006 emit_move_insn (target, gen_lowpart (mode, x));
24013 emit_move_insn (target, const_vec);
24014 ix86_expand_vector_set (mmx_ok, target, var, one_var);
24018 /* A subroutine of ix86_expand_vector_init_general. Use vector
24019 concatenate to handle the most general case: all values variable,
24020 and none identical. */
24023 ix86_expand_vector_init_concat (enum machine_mode mode,
24024 rtx target, rtx *ops, int n)
24026 enum machine_mode cmode, hmode = VOIDmode;
24027 rtx first[4], second[2];
24055 gcc_unreachable ();
24058 if (!register_operand (ops[1], cmode))
24059 ops[1] = force_reg (cmode, ops[1]);
24060 if (!register_operand (ops[0], cmode))
24061 ops[0] = force_reg (cmode, ops[0]);
24062 emit_insn (gen_rtx_SET (VOIDmode, target,
24063 gen_rtx_VEC_CONCAT (mode, ops[0],
24077 gcc_unreachable ();
24082 /* FIXME: We process inputs backward to help RA. PR 36222. */
24085 for (; i > 0; i -= 2, j--)
24087 first[j] = gen_reg_rtx (cmode);
24088 v = gen_rtvec (2, ops[i - 1], ops[i]);
24089 ix86_expand_vector_init (false, first[j],
24090 gen_rtx_PARALLEL (cmode, v));
24096 gcc_assert (hmode != VOIDmode);
24097 for (i = j = 0; i < n; i += 2, j++)
24099 second[j] = gen_reg_rtx (hmode);
24100 ix86_expand_vector_init_concat (hmode, second [j],
24104 ix86_expand_vector_init_concat (mode, target, second, n);
24107 ix86_expand_vector_init_concat (mode, target, first, n);
24111 gcc_unreachable ();
24115 /* A subroutine of ix86_expand_vector_init_general. Use vector
24116 interleave to handle the most general case: all values variable,
24117 and none identical. */
24120 ix86_expand_vector_init_interleave (enum machine_mode mode,
24121 rtx target, rtx *ops, int n)
24123 enum machine_mode first_imode, second_imode, third_imode;
24126 rtx (*gen_load_even) (rtx, rtx, rtx);
24127 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
24128 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
24133 gen_load_even = gen_vec_setv8hi;
24134 gen_interleave_first_low = gen_vec_interleave_lowv4si;
24135 gen_interleave_second_low = gen_vec_interleave_lowv2di;
24136 first_imode = V4SImode;
24137 second_imode = V2DImode;
24138 third_imode = VOIDmode;
24141 gen_load_even = gen_vec_setv16qi;
24142 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
24143 gen_interleave_second_low = gen_vec_interleave_lowv4si;
24144 first_imode = V8HImode;
24145 second_imode = V4SImode;
24146 third_imode = V2DImode;
24149 gcc_unreachable ();
24152 for (i = 0; i < n; i++)
24154 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
24155 op0 = gen_reg_rtx (SImode);
24156 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
24158 /* Insert the SImode value as low element of V4SImode vector. */
24159 op1 = gen_reg_rtx (V4SImode);
24160 op0 = gen_rtx_VEC_MERGE (V4SImode,
24161 gen_rtx_VEC_DUPLICATE (V4SImode,
24163 CONST0_RTX (V4SImode),
24165 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
24167 /* Cast the V4SImode vector back to a vector in orignal mode. */
24168 op0 = gen_reg_rtx (mode);
24169 emit_move_insn (op0, gen_lowpart (mode, op1));
24171 /* Load even elements into the second positon. */
24172 emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
24175 /* Cast vector to FIRST_IMODE vector. */
24176 ops[i] = gen_reg_rtx (first_imode);
24177 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
24180 /* Interleave low FIRST_IMODE vectors. */
24181 for (i = j = 0; i < n; i += 2, j++)
24183 op0 = gen_reg_rtx (first_imode);
24184 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
24186 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
24187 ops[j] = gen_reg_rtx (second_imode);
24188 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
24191 /* Interleave low SECOND_IMODE vectors. */
24192 switch (second_imode)
24195 for (i = j = 0; i < n / 2; i += 2, j++)
24197 op0 = gen_reg_rtx (second_imode);
24198 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
24201 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
24203 ops[j] = gen_reg_rtx (third_imode);
24204 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
24206 second_imode = V2DImode;
24207 gen_interleave_second_low = gen_vec_interleave_lowv2di;
24211 op0 = gen_reg_rtx (second_imode);
24212 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
24215 /* Cast the SECOND_IMODE vector back to a vector on original
24217 emit_insn (gen_rtx_SET (VOIDmode, target,
24218 gen_lowpart (mode, op0)));
24222 gcc_unreachable ();
24226 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
24227 all values variable, and none identical. */
24230 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
24231 rtx target, rtx vals)
24240 if (!mmx_ok && !TARGET_SSE)
24248 n = GET_MODE_NUNITS (mode);
24249 for (i = 0; i < n; i++)
24250 ops[i] = XVECEXP (vals, 0, i);
24251 ix86_expand_vector_init_concat (mode, target, ops, n);
24255 if (!TARGET_SSE4_1)
24263 n = GET_MODE_NUNITS (mode);
24264 for (i = 0; i < n; i++)
24265 ops[i] = XVECEXP (vals, 0, i);
24266 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
24274 gcc_unreachable ();
24278 int i, j, n_elts, n_words, n_elt_per_word;
24279 enum machine_mode inner_mode;
24280 rtx words[4], shift;
24282 inner_mode = GET_MODE_INNER (mode);
24283 n_elts = GET_MODE_NUNITS (mode);
24284 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
24285 n_elt_per_word = n_elts / n_words;
24286 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
24288 for (i = 0; i < n_words; ++i)
24290 rtx word = NULL_RTX;
24292 for (j = 0; j < n_elt_per_word; ++j)
24294 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
24295 elt = convert_modes (word_mode, inner_mode, elt, true);
24301 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
24302 word, 1, OPTAB_LIB_WIDEN);
24303 word = expand_simple_binop (word_mode, IOR, word, elt,
24304 word, 1, OPTAB_LIB_WIDEN);
24312 emit_move_insn (target, gen_lowpart (mode, words[0]));
24313 else if (n_words == 2)
24315 rtx tmp = gen_reg_rtx (mode);
24316 emit_clobber (tmp);
24317 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
24318 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
24319 emit_move_insn (target, tmp);
24321 else if (n_words == 4)
24323 rtx tmp = gen_reg_rtx (V4SImode);
24324 gcc_assert (word_mode == SImode);
24325 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
24326 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
24327 emit_move_insn (target, gen_lowpart (mode, tmp));
24330 gcc_unreachable ();
24334 /* Initialize vector TARGET via VALS. Suppress the use of MMX
24335 instructions unless MMX_OK is true. */
24338 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
24340 enum machine_mode mode = GET_MODE (target);
24341 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24342 int n_elts = GET_MODE_NUNITS (mode);
24343 int n_var = 0, one_var = -1;
24344 bool all_same = true, all_const_zero = true;
24348 for (i = 0; i < n_elts; ++i)
24350 x = XVECEXP (vals, 0, i);
24351 if (!(CONST_INT_P (x)
24352 || GET_CODE (x) == CONST_DOUBLE
24353 || GET_CODE (x) == CONST_FIXED))
24354 n_var++, one_var = i;
24355 else if (x != CONST0_RTX (inner_mode))
24356 all_const_zero = false;
24357 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
24361 /* Constants are best loaded from the constant pool. */
24364 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
24368 /* If all values are identical, broadcast the value. */
24370 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
24371 XVECEXP (vals, 0, 0)))
24374 /* Values where only one field is non-constant are best loaded from
24375 the pool and overwritten via move later. */
24379 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
24380 XVECEXP (vals, 0, one_var),
24384 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
24388 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
24392 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
24394 enum machine_mode mode = GET_MODE (target);
24395 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24396 bool use_vec_merge = false;
24405 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
24406 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
24408 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
24410 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
24411 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24417 use_vec_merge = TARGET_SSE4_1;
24425 /* For the two element vectors, we implement a VEC_CONCAT with
24426 the extraction of the other element. */
24428 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
24429 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
24432 op0 = val, op1 = tmp;
24434 op0 = tmp, op1 = val;
24436 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
24437 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24442 use_vec_merge = TARGET_SSE4_1;
24449 use_vec_merge = true;
24453 /* tmp = target = A B C D */
24454 tmp = copy_to_reg (target);
24455 /* target = A A B B */
24456 emit_insn (gen_sse_unpcklps (target, target, target));
24457 /* target = X A B B */
24458 ix86_expand_vector_set (false, target, val, 0);
24459 /* target = A X C D */
24460 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24461 GEN_INT (1), GEN_INT (0),
24462 GEN_INT (2+4), GEN_INT (3+4)));
24466 /* tmp = target = A B C D */
24467 tmp = copy_to_reg (target);
24468 /* tmp = X B C D */
24469 ix86_expand_vector_set (false, tmp, val, 0);
24470 /* target = A B X D */
24471 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24472 GEN_INT (0), GEN_INT (1),
24473 GEN_INT (0+4), GEN_INT (3+4)));
24477 /* tmp = target = A B C D */
24478 tmp = copy_to_reg (target);
24479 /* tmp = X B C D */
24480 ix86_expand_vector_set (false, tmp, val, 0);
24481 /* target = A B X D */
24482 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24483 GEN_INT (0), GEN_INT (1),
24484 GEN_INT (2+4), GEN_INT (0+4)));
24488 gcc_unreachable ();
24493 use_vec_merge = TARGET_SSE4_1;
24497 /* Element 0 handled by vec_merge below. */
24500 use_vec_merge = true;
24506 /* With SSE2, use integer shuffles to swap element 0 and ELT,
24507 store into element 0, then shuffle them back. */
24511 order[0] = GEN_INT (elt);
24512 order[1] = const1_rtx;
24513 order[2] = const2_rtx;
24514 order[3] = GEN_INT (3);
24515 order[elt] = const0_rtx;
24517 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24518 order[1], order[2], order[3]));
24520 ix86_expand_vector_set (false, target, val, 0);
24522 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24523 order[1], order[2], order[3]));
24527 /* For SSE1, we have to reuse the V4SF code. */
24528 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
24529 gen_lowpart (SFmode, val), elt);
24534 use_vec_merge = TARGET_SSE2;
24537 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24541 use_vec_merge = TARGET_SSE4_1;
24551 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
24552 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
24553 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24557 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24559 emit_move_insn (mem, target);
24561 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24562 emit_move_insn (tmp, val);
24564 emit_move_insn (target, mem);
24569 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
24571 enum machine_mode mode = GET_MODE (vec);
24572 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24573 bool use_vec_extr = false;
24586 use_vec_extr = true;
24590 use_vec_extr = TARGET_SSE4_1;
24602 tmp = gen_reg_rtx (mode);
24603 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
24604 GEN_INT (elt), GEN_INT (elt),
24605 GEN_INT (elt+4), GEN_INT (elt+4)));
24609 tmp = gen_reg_rtx (mode);
24610 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
24614 gcc_unreachable ();
24617 use_vec_extr = true;
24622 use_vec_extr = TARGET_SSE4_1;
24636 tmp = gen_reg_rtx (mode);
24637 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
24638 GEN_INT (elt), GEN_INT (elt),
24639 GEN_INT (elt), GEN_INT (elt)));
24643 tmp = gen_reg_rtx (mode);
24644 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
24648 gcc_unreachable ();
24651 use_vec_extr = true;
24656 /* For SSE1, we have to reuse the V4SF code. */
24657 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
24658 gen_lowpart (V4SFmode, vec), elt);
24664 use_vec_extr = TARGET_SSE2;
24667 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24671 use_vec_extr = TARGET_SSE4_1;
24675 /* ??? Could extract the appropriate HImode element and shift. */
24682 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
24683 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
24685 /* Let the rtl optimizers know about the zero extension performed. */
24686 if (inner_mode == QImode || inner_mode == HImode)
24688 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
24689 target = gen_lowpart (SImode, target);
24692 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24696 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24698 emit_move_insn (mem, vec);
24700 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24701 emit_move_insn (target, tmp);
24705 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
24706 pattern to reduce; DEST is the destination; IN is the input vector. */
24709 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24711 rtx tmp1, tmp2, tmp3;
24713 tmp1 = gen_reg_rtx (V4SFmode);
24714 tmp2 = gen_reg_rtx (V4SFmode);
24715 tmp3 = gen_reg_rtx (V4SFmode);
24717 emit_insn (gen_sse_movhlps (tmp1, in, in));
24718 emit_insn (fn (tmp2, tmp1, in));
24720 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24721 GEN_INT (1), GEN_INT (1),
24722 GEN_INT (1+4), GEN_INT (1+4)));
24723 emit_insn (fn (dest, tmp2, tmp3));
24726 /* Target hook for scalar_mode_supported_p. */
24728 ix86_scalar_mode_supported_p (enum machine_mode mode)
24730 if (DECIMAL_FLOAT_MODE_P (mode))
24732 else if (mode == TFmode)
24733 return TARGET_64BIT;
24735 return default_scalar_mode_supported_p (mode);
24738 /* Implements target hook vector_mode_supported_p. */
24740 ix86_vector_mode_supported_p (enum machine_mode mode)
24742 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24744 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24746 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24748 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24753 /* Target hook for c_mode_for_suffix. */
24754 static enum machine_mode
24755 ix86_c_mode_for_suffix (char suffix)
24757 if (TARGET_64BIT && suffix == 'q')
24759 if (TARGET_MMX && suffix == 'w')
24765 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24767 We do this in the new i386 backend to maintain source compatibility
24768 with the old cc0-based compiler. */
24771 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24772 tree inputs ATTRIBUTE_UNUSED,
24775 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24777 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24782 /* Implements target vector targetm.asm.encode_section_info. This
24783 is not used by netware. */
24785 static void ATTRIBUTE_UNUSED
24786 ix86_encode_section_info (tree decl, rtx rtl, int first)
24788 default_encode_section_info (decl, rtl, first);
24790 if (TREE_CODE (decl) == VAR_DECL
24791 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24792 && ix86_in_large_data_p (decl))
24793 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24796 /* Worker function for REVERSE_CONDITION. */
24799 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24801 return (mode != CCFPmode && mode != CCFPUmode
24802 ? reverse_condition (code)
24803 : reverse_condition_maybe_unordered (code));
24806 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24810 output_387_reg_move (rtx insn, rtx *operands)
24812 if (REG_P (operands[0]))
24814 if (REG_P (operands[1])
24815 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24817 if (REGNO (operands[0]) == FIRST_STACK_REG)
24818 return output_387_ffreep (operands, 0);
24819 return "fstp\t%y0";
24821 if (STACK_TOP_P (operands[0]))
24822 return "fld%z1\t%y1";
24825 else if (MEM_P (operands[0]))
24827 gcc_assert (REG_P (operands[1]));
24828 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
24829 return "fstp%z0\t%y0";
24832 /* There is no non-popping store to memory for XFmode.
24833 So if we need one, follow the store with a load. */
24834 if (GET_MODE (operands[0]) == XFmode)
24835 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
24837 return "fst%z0\t%y0";
24844 /* Output code to perform a conditional jump to LABEL, if C2 flag in
24845 FP status register is set. */
24848 ix86_emit_fp_unordered_jump (rtx label)
24850 rtx reg = gen_reg_rtx (HImode);
24853 emit_insn (gen_x86_fnstsw_1 (reg));
24855 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
24857 emit_insn (gen_x86_sahf_1 (reg));
24859 temp = gen_rtx_REG (CCmode, FLAGS_REG);
24860 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
24864 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
24866 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
24867 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
24870 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
24871 gen_rtx_LABEL_REF (VOIDmode, label),
24873 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
24875 emit_jump_insn (temp);
24876 predict_jump (REG_BR_PROB_BASE * 10 / 100);
24879 /* Output code to perform a log1p XFmode calculation. */
24881 void ix86_emit_i387_log1p (rtx op0, rtx op1)
24883 rtx label1 = gen_label_rtx ();
24884 rtx label2 = gen_label_rtx ();
24886 rtx tmp = gen_reg_rtx (XFmode);
24887 rtx tmp2 = gen_reg_rtx (XFmode);
24889 emit_insn (gen_absxf2 (tmp, op1));
24890 emit_insn (gen_cmpxf (tmp,
24891 CONST_DOUBLE_FROM_REAL_VALUE (
24892 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
24894 emit_jump_insn (gen_bge (label1));
24896 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24897 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
24898 emit_jump (label2);
24900 emit_label (label1);
24901 emit_move_insn (tmp, CONST1_RTX (XFmode));
24902 emit_insn (gen_addxf3 (tmp, op1, tmp));
24903 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
24904 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
24906 emit_label (label2);
24909 /* Output code to perform a Newton-Rhapson approximation of a single precision
24910 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
24912 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
24914 rtx x0, x1, e0, e1, two;
24916 x0 = gen_reg_rtx (mode);
24917 e0 = gen_reg_rtx (mode);
24918 e1 = gen_reg_rtx (mode);
24919 x1 = gen_reg_rtx (mode);
24921 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
24923 if (VECTOR_MODE_P (mode))
24924 two = ix86_build_const_vector (SFmode, true, two);
24926 two = force_reg (mode, two);
24928 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
24930 /* x0 = rcp(b) estimate */
24931 emit_insn (gen_rtx_SET (VOIDmode, x0,
24932 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
24935 emit_insn (gen_rtx_SET (VOIDmode, e0,
24936 gen_rtx_MULT (mode, x0, b)));
24938 emit_insn (gen_rtx_SET (VOIDmode, e1,
24939 gen_rtx_MINUS (mode, two, e0)));
24941 emit_insn (gen_rtx_SET (VOIDmode, x1,
24942 gen_rtx_MULT (mode, x0, e1)));
24944 emit_insn (gen_rtx_SET (VOIDmode, res,
24945 gen_rtx_MULT (mode, a, x1)));
24948 /* Output code to perform a Newton-Rhapson approximation of a
24949 single precision floating point [reciprocal] square root. */
24951 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
24954 rtx x0, e0, e1, e2, e3, mthree, mhalf;
24957 x0 = gen_reg_rtx (mode);
24958 e0 = gen_reg_rtx (mode);
24959 e1 = gen_reg_rtx (mode);
24960 e2 = gen_reg_rtx (mode);
24961 e3 = gen_reg_rtx (mode);
24963 real_from_integer (&r, VOIDmode, -3, -1, 0);
24964 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24966 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
24967 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
24969 if (VECTOR_MODE_P (mode))
24971 mthree = ix86_build_const_vector (SFmode, true, mthree);
24972 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
24975 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
24976 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
24978 /* x0 = rsqrt(a) estimate */
24979 emit_insn (gen_rtx_SET (VOIDmode, x0,
24980 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
24983 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
24988 zero = gen_reg_rtx (mode);
24989 mask = gen_reg_rtx (mode);
24991 zero = force_reg (mode, CONST0_RTX(mode));
24992 emit_insn (gen_rtx_SET (VOIDmode, mask,
24993 gen_rtx_NE (mode, zero, a)));
24995 emit_insn (gen_rtx_SET (VOIDmode, x0,
24996 gen_rtx_AND (mode, x0, mask)));
25000 emit_insn (gen_rtx_SET (VOIDmode, e0,
25001 gen_rtx_MULT (mode, x0, a)));
25003 emit_insn (gen_rtx_SET (VOIDmode, e1,
25004 gen_rtx_MULT (mode, e0, x0)));
25007 mthree = force_reg (mode, mthree);
25008 emit_insn (gen_rtx_SET (VOIDmode, e2,
25009 gen_rtx_PLUS (mode, e1, mthree)));
25011 mhalf = force_reg (mode, mhalf);
25013 /* e3 = -.5 * x0 */
25014 emit_insn (gen_rtx_SET (VOIDmode, e3,
25015 gen_rtx_MULT (mode, x0, mhalf)));
25017 /* e3 = -.5 * e0 */
25018 emit_insn (gen_rtx_SET (VOIDmode, e3,
25019 gen_rtx_MULT (mode, e0, mhalf)));
25020 /* ret = e2 * e3 */
25021 emit_insn (gen_rtx_SET (VOIDmode, res,
25022 gen_rtx_MULT (mode, e2, e3)));
25025 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
25027 static void ATTRIBUTE_UNUSED
25028 i386_solaris_elf_named_section (const char *name, unsigned int flags,
25031 /* With Binutils 2.15, the "@unwind" marker must be specified on
25032 every occurrence of the ".eh_frame" section, not just the first
25035 && strcmp (name, ".eh_frame") == 0)
25037 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
25038 flags & SECTION_WRITE ? "aw" : "a");
25041 default_elf_asm_named_section (name, flags, decl);
25044 /* Return the mangling of TYPE if it is an extended fundamental type. */
25046 static const char *
25047 ix86_mangle_type (const_tree type)
25049 type = TYPE_MAIN_VARIANT (type);
25051 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
25052 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
25055 switch (TYPE_MODE (type))
25058 /* __float128 is "g". */
25061 /* "long double" or __float80 is "e". */
25068 /* For 32-bit code we can save PIC register setup by using
25069 __stack_chk_fail_local hidden function instead of calling
25070 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
25071 register, so it is better to call __stack_chk_fail directly. */
25074 ix86_stack_protect_fail (void)
25076 return TARGET_64BIT
25077 ? default_external_stack_protect_fail ()
25078 : default_hidden_stack_protect_fail ();
25081 /* Select a format to encode pointers in exception handling data. CODE
25082 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
25083 true if the symbol may be affected by dynamic relocations.
25085 ??? All x86 object file formats are capable of representing this.
25086 After all, the relocation needed is the same as for the call insn.
25087 Whether or not a particular assembler allows us to enter such, I
25088 guess we'll have to see. */
25090 asm_preferred_eh_data_format (int code, int global)
25094 int type = DW_EH_PE_sdata8;
25096 || ix86_cmodel == CM_SMALL_PIC
25097 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
25098 type = DW_EH_PE_sdata4;
25099 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
25101 if (ix86_cmodel == CM_SMALL
25102 || (ix86_cmodel == CM_MEDIUM && code))
25103 return DW_EH_PE_udata4;
25104 return DW_EH_PE_absptr;
25107 /* Expand copysign from SIGN to the positive value ABS_VALUE
25108 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
25111 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
25113 enum machine_mode mode = GET_MODE (sign);
25114 rtx sgn = gen_reg_rtx (mode);
25115 if (mask == NULL_RTX)
25117 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
25118 if (!VECTOR_MODE_P (mode))
25120 /* We need to generate a scalar mode mask in this case. */
25121 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25122 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25123 mask = gen_reg_rtx (mode);
25124 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25128 mask = gen_rtx_NOT (mode, mask);
25129 emit_insn (gen_rtx_SET (VOIDmode, sgn,
25130 gen_rtx_AND (mode, mask, sign)));
25131 emit_insn (gen_rtx_SET (VOIDmode, result,
25132 gen_rtx_IOR (mode, abs_value, sgn)));
25135 /* Expand fabs (OP0) and return a new rtx that holds the result. The
25136 mask for masking out the sign-bit is stored in *SMASK, if that is
25139 ix86_expand_sse_fabs (rtx op0, rtx *smask)
25141 enum machine_mode mode = GET_MODE (op0);
25144 xa = gen_reg_rtx (mode);
25145 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
25146 if (!VECTOR_MODE_P (mode))
25148 /* We need to generate a scalar mode mask in this case. */
25149 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25150 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25151 mask = gen_reg_rtx (mode);
25152 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25154 emit_insn (gen_rtx_SET (VOIDmode, xa,
25155 gen_rtx_AND (mode, op0, mask)));
25163 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
25164 swapping the operands if SWAP_OPERANDS is true. The expanded
25165 code is a forward jump to a newly created label in case the
25166 comparison is true. The generated label rtx is returned. */
25168 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
25169 bool swap_operands)
25180 label = gen_label_rtx ();
25181 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
25182 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25183 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
25184 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
25185 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25186 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
25187 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25188 JUMP_LABEL (tmp) = label;
25193 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
25194 using comparison code CODE. Operands are swapped for the comparison if
25195 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
25197 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
25198 bool swap_operands)
25200 enum machine_mode mode = GET_MODE (op0);
25201 rtx mask = gen_reg_rtx (mode);
25210 if (mode == DFmode)
25211 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
25212 gen_rtx_fmt_ee (code, mode, op0, op1)));
25214 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
25215 gen_rtx_fmt_ee (code, mode, op0, op1)));
25220 /* Generate and return a rtx of mode MODE for 2**n where n is the number
25221 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
25223 ix86_gen_TWO52 (enum machine_mode mode)
25225 REAL_VALUE_TYPE TWO52r;
25228 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
25229 TWO52 = const_double_from_real_value (TWO52r, mode);
25230 TWO52 = force_reg (mode, TWO52);
25235 /* Expand SSE sequence for computing lround from OP1 storing
25238 ix86_expand_lround (rtx op0, rtx op1)
25240 /* C code for the stuff we're doing below:
25241 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
25244 enum machine_mode mode = GET_MODE (op1);
25245 const struct real_format *fmt;
25246 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25249 /* load nextafter (0.5, 0.0) */
25250 fmt = REAL_MODE_FORMAT (mode);
25251 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25252 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25254 /* adj = copysign (0.5, op1) */
25255 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
25256 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
25258 /* adj = op1 + adj */
25259 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
25261 /* op0 = (imode)adj */
25262 expand_fix (op0, adj, 0);
25265 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
25268 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
25270 /* C code for the stuff we're doing below (for do_floor):
25272 xi -= (double)xi > op1 ? 1 : 0;
25275 enum machine_mode fmode = GET_MODE (op1);
25276 enum machine_mode imode = GET_MODE (op0);
25277 rtx ireg, freg, label, tmp;
25279 /* reg = (long)op1 */
25280 ireg = gen_reg_rtx (imode);
25281 expand_fix (ireg, op1, 0);
25283 /* freg = (double)reg */
25284 freg = gen_reg_rtx (fmode);
25285 expand_float (freg, ireg, 0);
25287 /* ireg = (freg > op1) ? ireg - 1 : ireg */
25288 label = ix86_expand_sse_compare_and_jump (UNLE,
25289 freg, op1, !do_floor);
25290 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
25291 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
25292 emit_move_insn (ireg, tmp);
25294 emit_label (label);
25295 LABEL_NUSES (label) = 1;
25297 emit_move_insn (op0, ireg);
25300 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
25301 result in OPERAND0. */
25303 ix86_expand_rint (rtx operand0, rtx operand1)
25305 /* C code for the stuff we're doing below:
25306 xa = fabs (operand1);
25307 if (!isless (xa, 2**52))
25309 xa = xa + 2**52 - 2**52;
25310 return copysign (xa, operand1);
25312 enum machine_mode mode = GET_MODE (operand0);
25313 rtx res, xa, label, TWO52, mask;
25315 res = gen_reg_rtx (mode);
25316 emit_move_insn (res, operand1);
25318 /* xa = abs (operand1) */
25319 xa = ix86_expand_sse_fabs (res, &mask);
25321 /* if (!isless (xa, TWO52)) goto label; */
25322 TWO52 = ix86_gen_TWO52 (mode);
25323 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25325 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25326 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25328 ix86_sse_copysign_to_positive (res, xa, res, mask);
25330 emit_label (label);
25331 LABEL_NUSES (label) = 1;
25333 emit_move_insn (operand0, res);
25336 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25339 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
25341 /* C code for the stuff we expand below.
25342 double xa = fabs (x), x2;
25343 if (!isless (xa, TWO52))
25345 xa = xa + TWO52 - TWO52;
25346 x2 = copysign (xa, x);
25355 enum machine_mode mode = GET_MODE (operand0);
25356 rtx xa, TWO52, tmp, label, one, res, mask;
25358 TWO52 = ix86_gen_TWO52 (mode);
25360 /* Temporary for holding the result, initialized to the input
25361 operand to ease control flow. */
25362 res = gen_reg_rtx (mode);
25363 emit_move_insn (res, operand1);
25365 /* xa = abs (operand1) */
25366 xa = ix86_expand_sse_fabs (res, &mask);
25368 /* if (!isless (xa, TWO52)) goto label; */
25369 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25371 /* xa = xa + TWO52 - TWO52; */
25372 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25373 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25375 /* xa = copysign (xa, operand1) */
25376 ix86_sse_copysign_to_positive (xa, xa, res, mask);
25378 /* generate 1.0 or -1.0 */
25379 one = force_reg (mode,
25380 const_double_from_real_value (do_floor
25381 ? dconst1 : dconstm1, mode));
25383 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25384 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25385 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25386 gen_rtx_AND (mode, one, tmp)));
25387 /* We always need to subtract here to preserve signed zero. */
25388 tmp = expand_simple_binop (mode, MINUS,
25389 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25390 emit_move_insn (res, tmp);
25392 emit_label (label);
25393 LABEL_NUSES (label) = 1;
25395 emit_move_insn (operand0, res);
25398 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25401 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
25403 /* C code for the stuff we expand below.
25404 double xa = fabs (x), x2;
25405 if (!isless (xa, TWO52))
25407 x2 = (double)(long)x;
25414 if (HONOR_SIGNED_ZEROS (mode))
25415 return copysign (x2, x);
25418 enum machine_mode mode = GET_MODE (operand0);
25419 rtx xa, xi, TWO52, tmp, label, one, res, mask;
25421 TWO52 = ix86_gen_TWO52 (mode);
25423 /* Temporary for holding the result, initialized to the input
25424 operand to ease control flow. */
25425 res = gen_reg_rtx (mode);
25426 emit_move_insn (res, operand1);
25428 /* xa = abs (operand1) */
25429 xa = ix86_expand_sse_fabs (res, &mask);
25431 /* if (!isless (xa, TWO52)) goto label; */
25432 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25434 /* xa = (double)(long)x */
25435 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25436 expand_fix (xi, res, 0);
25437 expand_float (xa, xi, 0);
25440 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25442 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25443 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25444 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25445 gen_rtx_AND (mode, one, tmp)));
25446 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
25447 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25448 emit_move_insn (res, tmp);
25450 if (HONOR_SIGNED_ZEROS (mode))
25451 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25453 emit_label (label);
25454 LABEL_NUSES (label) = 1;
25456 emit_move_insn (operand0, res);
25459 /* Expand SSE sequence for computing round from OPERAND1 storing
25460 into OPERAND0. Sequence that works without relying on DImode truncation
25461 via cvttsd2siq that is only available on 64bit targets. */
25463 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
25465 /* C code for the stuff we expand below.
25466 double xa = fabs (x), xa2, x2;
25467 if (!isless (xa, TWO52))
25469 Using the absolute value and copying back sign makes
25470 -0.0 -> -0.0 correct.
25471 xa2 = xa + TWO52 - TWO52;
25476 else if (dxa > 0.5)
25478 x2 = copysign (xa2, x);
25481 enum machine_mode mode = GET_MODE (operand0);
25482 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
25484 TWO52 = ix86_gen_TWO52 (mode);
25486 /* Temporary for holding the result, initialized to the input
25487 operand to ease control flow. */
25488 res = gen_reg_rtx (mode);
25489 emit_move_insn (res, operand1);
25491 /* xa = abs (operand1) */
25492 xa = ix86_expand_sse_fabs (res, &mask);
25494 /* if (!isless (xa, TWO52)) goto label; */
25495 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25497 /* xa2 = xa + TWO52 - TWO52; */
25498 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25499 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
25501 /* dxa = xa2 - xa; */
25502 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
25504 /* generate 0.5, 1.0 and -0.5 */
25505 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
25506 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
25507 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
25511 tmp = gen_reg_rtx (mode);
25512 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
25513 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
25514 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25515 gen_rtx_AND (mode, one, tmp)));
25516 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25517 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
25518 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
25519 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25520 gen_rtx_AND (mode, one, tmp)));
25521 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25523 /* res = copysign (xa2, operand1) */
25524 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
25526 emit_label (label);
25527 LABEL_NUSES (label) = 1;
25529 emit_move_insn (operand0, res);
25532 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25535 ix86_expand_trunc (rtx operand0, rtx operand1)
25537 /* C code for SSE variant we expand below.
25538 double xa = fabs (x), x2;
25539 if (!isless (xa, TWO52))
25541 x2 = (double)(long)x;
25542 if (HONOR_SIGNED_ZEROS (mode))
25543 return copysign (x2, x);
25546 enum machine_mode mode = GET_MODE (operand0);
25547 rtx xa, xi, TWO52, label, res, mask;
25549 TWO52 = ix86_gen_TWO52 (mode);
25551 /* Temporary for holding the result, initialized to the input
25552 operand to ease control flow. */
25553 res = gen_reg_rtx (mode);
25554 emit_move_insn (res, operand1);
25556 /* xa = abs (operand1) */
25557 xa = ix86_expand_sse_fabs (res, &mask);
25559 /* if (!isless (xa, TWO52)) goto label; */
25560 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25562 /* x = (double)(long)x */
25563 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25564 expand_fix (xi, res, 0);
25565 expand_float (res, xi, 0);
25567 if (HONOR_SIGNED_ZEROS (mode))
25568 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25570 emit_label (label);
25571 LABEL_NUSES (label) = 1;
25573 emit_move_insn (operand0, res);
25576 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25579 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
25581 enum machine_mode mode = GET_MODE (operand0);
25582 rtx xa, mask, TWO52, label, one, res, smask, tmp;
25584 /* C code for SSE variant we expand below.
25585 double xa = fabs (x), x2;
25586 if (!isless (xa, TWO52))
25588 xa2 = xa + TWO52 - TWO52;
25592 x2 = copysign (xa2, x);
25596 TWO52 = ix86_gen_TWO52 (mode);
25598 /* Temporary for holding the result, initialized to the input
25599 operand to ease control flow. */
25600 res = gen_reg_rtx (mode);
25601 emit_move_insn (res, operand1);
25603 /* xa = abs (operand1) */
25604 xa = ix86_expand_sse_fabs (res, &smask);
25606 /* if (!isless (xa, TWO52)) goto label; */
25607 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25609 /* res = xa + TWO52 - TWO52; */
25610 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25611 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
25612 emit_move_insn (res, tmp);
25615 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25617 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
25618 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
25619 emit_insn (gen_rtx_SET (VOIDmode, mask,
25620 gen_rtx_AND (mode, mask, one)));
25621 tmp = expand_simple_binop (mode, MINUS,
25622 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
25623 emit_move_insn (res, tmp);
25625 /* res = copysign (res, operand1) */
25626 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
25628 emit_label (label);
25629 LABEL_NUSES (label) = 1;
25631 emit_move_insn (operand0, res);
25634 /* Expand SSE sequence for computing round from OPERAND1 storing
25637 ix86_expand_round (rtx operand0, rtx operand1)
25639 /* C code for the stuff we're doing below:
25640 double xa = fabs (x);
25641 if (!isless (xa, TWO52))
25643 xa = (double)(long)(xa + nextafter (0.5, 0.0));
25644 return copysign (xa, x);
25646 enum machine_mode mode = GET_MODE (operand0);
25647 rtx res, TWO52, xa, label, xi, half, mask;
25648 const struct real_format *fmt;
25649 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25651 /* Temporary for holding the result, initialized to the input
25652 operand to ease control flow. */
25653 res = gen_reg_rtx (mode);
25654 emit_move_insn (res, operand1);
25656 TWO52 = ix86_gen_TWO52 (mode);
25657 xa = ix86_expand_sse_fabs (res, &mask);
25658 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25660 /* load nextafter (0.5, 0.0) */
25661 fmt = REAL_MODE_FORMAT (mode);
25662 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25663 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25665 /* xa = xa + 0.5 */
25666 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
25667 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
25669 /* xa = (double)(int64_t)xa */
25670 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25671 expand_fix (xi, xa, 0);
25672 expand_float (xa, xi, 0);
25674 /* res = copysign (xa, operand1) */
25675 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
25677 emit_label (label);
25678 LABEL_NUSES (label) = 1;
25680 emit_move_insn (operand0, res);
25684 /* Validate whether a SSE5 instruction is valid or not.
25685 OPERANDS is the array of operands.
25686 NUM is the number of operands.
25687 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
25688 NUM_MEMORY is the maximum number of memory operands to accept. */
25691 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
25692 bool uses_oc0, int num_memory)
25698 /* Count the number of memory arguments */
25701 for (i = 0; i < num; i++)
25703 enum machine_mode mode = GET_MODE (operands[i]);
25704 if (register_operand (operands[i], mode))
25707 else if (memory_operand (operands[i], mode))
25709 mem_mask |= (1 << i);
25715 rtx pattern = PATTERN (insn);
25717 /* allow 0 for pcmov */
25718 if (GET_CODE (pattern) != SET
25719 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25721 || operands[i] != CONST0_RTX (mode))
25726 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
25727 a memory operation. */
25728 if (num_memory < 0)
25730 num_memory = -num_memory;
25731 if ((mem_mask & (1 << (num-1))) != 0)
25733 mem_mask &= ~(1 << (num-1));
25738 /* If there were no memory operations, allow the insn */
25742 /* Do not allow the destination register to be a memory operand. */
25743 else if (mem_mask & (1 << 0))
25746 /* If there are too many memory operations, disallow the instruction. While
25747 the hardware only allows 1 memory reference, before register allocation
25748 for some insns, we allow two memory operations sometimes in order to allow
25749 code like the following to be optimized:
25751 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25753 or similar cases that are vectorized into using the fmaddss
25755 else if (mem_count > num_memory)
25758 /* Don't allow more than one memory operation if not optimizing. */
25759 else if (mem_count > 1 && !optimize)
25762 else if (num == 4 && mem_count == 1)
25764 /* formats (destination is the first argument), example fmaddss:
25765 xmm1, xmm1, xmm2, xmm3/mem
25766 xmm1, xmm1, xmm2/mem, xmm3
25767 xmm1, xmm2, xmm3/mem, xmm1
25768 xmm1, xmm2/mem, xmm3, xmm1 */
25770 return ((mem_mask == (1 << 1))
25771 || (mem_mask == (1 << 2))
25772 || (mem_mask == (1 << 3)));
25774 /* format, example pmacsdd:
25775 xmm1, xmm2, xmm3/mem, xmm1 */
25777 return (mem_mask == (1 << 2));
25780 else if (num == 4 && num_memory == 2)
25782 /* If there are two memory operations, we can load one of the memory ops
25783 into the destination register. This is for optimizing the
25784 multiply/add ops, which the combiner has optimized both the multiply
25785 and the add insns to have a memory operation. We have to be careful
25786 that the destination doesn't overlap with the inputs. */
25787 rtx op0 = operands[0];
25789 if (reg_mentioned_p (op0, operands[1])
25790 || reg_mentioned_p (op0, operands[2])
25791 || reg_mentioned_p (op0, operands[3]))
25794 /* formats (destination is the first argument), example fmaddss:
25795 xmm1, xmm1, xmm2, xmm3/mem
25796 xmm1, xmm1, xmm2/mem, xmm3
25797 xmm1, xmm2, xmm3/mem, xmm1
25798 xmm1, xmm2/mem, xmm3, xmm1
25800 For the oc0 case, we will load either operands[1] or operands[3] into
25801 operands[0], so any combination of 2 memory operands is ok. */
25805 /* format, example pmacsdd:
25806 xmm1, xmm2, xmm3/mem, xmm1
25808 For the integer multiply/add instructions be more restrictive and
25809 require operands[2] and operands[3] to be the memory operands. */
25811 return (mem_mask == ((1 << 2) | (1 << 3)));
25814 else if (num == 3 && num_memory == 1)
25816 /* formats, example protb:
25817 xmm1, xmm2, xmm3/mem
25818 xmm1, xmm2/mem, xmm3 */
25820 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
25822 /* format, example comeq:
25823 xmm1, xmm2, xmm3/mem */
25825 return (mem_mask == (1 << 2));
25829 gcc_unreachable ();
25835 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
25836 hardware will allow by using the destination register to load one of the
25837 memory operations. Presently this is used by the multiply/add routines to
25838 allow 2 memory references. */
25841 ix86_expand_sse5_multiple_memory (rtx operands[],
25843 enum machine_mode mode)
25845 rtx op0 = operands[0];
25847 || memory_operand (op0, mode)
25848 || reg_mentioned_p (op0, operands[1])
25849 || reg_mentioned_p (op0, operands[2])
25850 || reg_mentioned_p (op0, operands[3]))
25851 gcc_unreachable ();
25853 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
25854 the destination register. */
25855 if (memory_operand (operands[1], mode))
25857 emit_move_insn (op0, operands[1]);
25860 else if (memory_operand (operands[3], mode))
25862 emit_move_insn (op0, operands[3]);
25866 gcc_unreachable ();
25872 /* Table of valid machine attributes. */
25873 static const struct attribute_spec ix86_attribute_table[] =
25875 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
25876 /* Stdcall attribute says callee is responsible for popping arguments
25877 if they are not variable. */
25878 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25879 /* Fastcall attribute says callee is responsible for popping arguments
25880 if they are not variable. */
25881 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25882 /* Cdecl attribute says the callee is a normal C declaration */
25883 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25884 /* Regparm attribute specifies how many integer arguments are to be
25885 passed in registers. */
25886 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
25887 /* Sseregparm attribute says we are using x86_64 calling conventions
25888 for FP arguments. */
25889 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
25890 /* force_align_arg_pointer says this function realigns the stack at entry. */
25891 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
25892 false, true, true, ix86_handle_cconv_attribute },
25893 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25894 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
25895 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
25896 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
25898 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25899 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
25900 #ifdef SUBTARGET_ATTRIBUTE_TABLE
25901 SUBTARGET_ATTRIBUTE_TABLE,
25903 { NULL, 0, 0, false, false, false, NULL }
25906 /* Implement targetm.vectorize.builtin_vectorization_cost. */
25908 x86_builtin_vectorization_cost (bool runtime_test)
25910 /* If the branch of the runtime test is taken - i.e. - the vectorized
25911 version is skipped - this incurs a misprediction cost (because the
25912 vectorized version is expected to be the fall-through). So we subtract
25913 the latency of a mispredicted branch from the costs that are incured
25914 when the vectorized version is executed.
25916 TODO: The values in individual target tables have to be tuned or new
25917 fields may be needed. For eg. on K8, the default branch path is the
25918 not-taken path. If the taken path is predicted correctly, the minimum
25919 penalty of going down the taken-path is 1 cycle. If the taken-path is
25920 not predicted correctly, then the minimum penalty is 10 cycles. */
25924 return (-(ix86_cost->cond_taken_branch_cost));
25930 /* Initialize the GCC target structure. */
25931 #undef TARGET_RETURN_IN_MEMORY
25932 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
25934 #undef TARGET_ATTRIBUTE_TABLE
25935 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
25936 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
25937 # undef TARGET_MERGE_DECL_ATTRIBUTES
25938 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
25941 #undef TARGET_COMP_TYPE_ATTRIBUTES
25942 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
25944 #undef TARGET_INIT_BUILTINS
25945 #define TARGET_INIT_BUILTINS ix86_init_builtins
25946 #undef TARGET_EXPAND_BUILTIN
25947 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
25949 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
25950 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
25951 ix86_builtin_vectorized_function
25953 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
25954 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
25956 #undef TARGET_BUILTIN_RECIPROCAL
25957 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
25959 #undef TARGET_ASM_FUNCTION_EPILOGUE
25960 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
25962 #undef TARGET_ENCODE_SECTION_INFO
25963 #ifndef SUBTARGET_ENCODE_SECTION_INFO
25964 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
25966 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
25969 #undef TARGET_ASM_OPEN_PAREN
25970 #define TARGET_ASM_OPEN_PAREN ""
25971 #undef TARGET_ASM_CLOSE_PAREN
25972 #define TARGET_ASM_CLOSE_PAREN ""
25974 #undef TARGET_ASM_ALIGNED_HI_OP
25975 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
25976 #undef TARGET_ASM_ALIGNED_SI_OP
25977 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
25979 #undef TARGET_ASM_ALIGNED_DI_OP
25980 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
25983 #undef TARGET_ASM_UNALIGNED_HI_OP
25984 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
25985 #undef TARGET_ASM_UNALIGNED_SI_OP
25986 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
25987 #undef TARGET_ASM_UNALIGNED_DI_OP
25988 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
25990 #undef TARGET_SCHED_ADJUST_COST
25991 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
25992 #undef TARGET_SCHED_ISSUE_RATE
25993 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
25994 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
25995 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
25996 ia32_multipass_dfa_lookahead
25998 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
25999 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
26002 #undef TARGET_HAVE_TLS
26003 #define TARGET_HAVE_TLS true
26005 #undef TARGET_CANNOT_FORCE_CONST_MEM
26006 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
26007 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
26008 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
26010 #undef TARGET_DELEGITIMIZE_ADDRESS
26011 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
26013 #undef TARGET_MS_BITFIELD_LAYOUT_P
26014 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
26017 #undef TARGET_BINDS_LOCAL_P
26018 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
26020 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26021 #undef TARGET_BINDS_LOCAL_P
26022 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
26025 #undef TARGET_ASM_OUTPUT_MI_THUNK
26026 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
26027 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
26028 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
26030 #undef TARGET_ASM_FILE_START
26031 #define TARGET_ASM_FILE_START x86_file_start
26033 #undef TARGET_DEFAULT_TARGET_FLAGS
26034 #define TARGET_DEFAULT_TARGET_FLAGS \
26036 | TARGET_SUBTARGET_DEFAULT \
26037 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
26039 #undef TARGET_HANDLE_OPTION
26040 #define TARGET_HANDLE_OPTION ix86_handle_option
26042 #undef TARGET_RTX_COSTS
26043 #define TARGET_RTX_COSTS ix86_rtx_costs
26044 #undef TARGET_ADDRESS_COST
26045 #define TARGET_ADDRESS_COST ix86_address_cost
26047 #undef TARGET_FIXED_CONDITION_CODE_REGS
26048 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
26049 #undef TARGET_CC_MODES_COMPATIBLE
26050 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
26052 #undef TARGET_MACHINE_DEPENDENT_REORG
26053 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
26055 #undef TARGET_BUILD_BUILTIN_VA_LIST
26056 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
26058 #undef TARGET_EXPAND_BUILTIN_VA_START
26059 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
26061 #undef TARGET_MD_ASM_CLOBBERS
26062 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
26064 #undef TARGET_PROMOTE_PROTOTYPES
26065 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
26066 #undef TARGET_STRUCT_VALUE_RTX
26067 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
26068 #undef TARGET_SETUP_INCOMING_VARARGS
26069 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
26070 #undef TARGET_MUST_PASS_IN_STACK
26071 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
26072 #undef TARGET_PASS_BY_REFERENCE
26073 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
26074 #undef TARGET_INTERNAL_ARG_POINTER
26075 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
26076 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
26077 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
26078 #undef TARGET_STRICT_ARGUMENT_NAMING
26079 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
26081 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
26082 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
26084 #undef TARGET_SCALAR_MODE_SUPPORTED_P
26085 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
26087 #undef TARGET_VECTOR_MODE_SUPPORTED_P
26088 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
26090 #undef TARGET_C_MODE_FOR_SUFFIX
26091 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
26094 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
26095 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
26098 #ifdef SUBTARGET_INSERT_ATTRIBUTES
26099 #undef TARGET_INSERT_ATTRIBUTES
26100 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
26103 #undef TARGET_MANGLE_TYPE
26104 #define TARGET_MANGLE_TYPE ix86_mangle_type
26106 #undef TARGET_STACK_PROTECT_FAIL
26107 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
26109 #undef TARGET_FUNCTION_VALUE
26110 #define TARGET_FUNCTION_VALUE ix86_function_value
26112 #undef TARGET_SECONDARY_RELOAD
26113 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
26115 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
26116 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
26118 struct gcc_target targetm = TARGET_INITIALIZER;
26120 #include "gt-i386.h"