1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
45 #include "basic-block.h"
48 #include "target-def.h"
49 #include "langhooks.h"
51 #include "tree-gimple.h"
54 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs size_cost = { /* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of loading integer registers */
995 2, /* cost of moving MMX register */
996 {6, 6}, /* cost of loading MMX registers
997 in SImode and DImode */
998 {4, 4}, /* cost of storing MMX registers
999 in SImode and DImode */
1000 2, /* cost of moving SSE register */
1001 {6, 6, 6}, /* cost of loading SSE registers
1002 in SImode, DImode and TImode */
1003 {4, 4, 4}, /* cost of storing SSE registers
1004 in SImode, DImode and TImode */
1005 2, /* MMX or SSE register to integer */
1006 32, /* size of l1 cache. */
1007 2048, /* size of l2 cache. */
1008 128, /* size of prefetch block */
1009 8, /* number of parallel prefetches */
1010 3, /* Branch cost */
1011 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1012 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1013 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1014 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1015 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1016 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1017 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1018 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1019 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1020 {{libcall, {{8, loop}, {15, unrolled_loop},
1021 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1022 {libcall, {{24, loop}, {32, unrolled_loop},
1023 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1024 1, /* scalar_stmt_cost. */
1025 1, /* scalar load_cost. */
1026 1, /* scalar_store_cost. */
1027 1, /* vec_stmt_cost. */
1028 1, /* vec_to_scalar_cost. */
1029 1, /* scalar_to_vec_cost. */
1030 1, /* vec_align_load_cost. */
1031 2, /* vec_unalign_load_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1037 /* Generic64 should produce code tuned for Nocona and K8. */
1039 struct processor_costs generic64_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 /* On all chips taken into consideration lea is 2 cycles and more. With
1042 this cost however our current implementation of synth_mult results in
1043 use of unnecessary temporary registers causing regression on several
1044 SPECfp benchmarks. */
1045 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1046 COSTS_N_INSNS (1), /* variable shift costs */
1047 COSTS_N_INSNS (1), /* constant shift costs */
1048 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1049 COSTS_N_INSNS (4), /* HI */
1050 COSTS_N_INSNS (3), /* SI */
1051 COSTS_N_INSNS (4), /* DI */
1052 COSTS_N_INSNS (2)}, /* other */
1053 0, /* cost of multiply per each bit set */
1054 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1055 COSTS_N_INSNS (26), /* HI */
1056 COSTS_N_INSNS (42), /* SI */
1057 COSTS_N_INSNS (74), /* DI */
1058 COSTS_N_INSNS (74)}, /* other */
1059 COSTS_N_INSNS (1), /* cost of movsx */
1060 COSTS_N_INSNS (1), /* cost of movzx */
1061 8, /* "large" insn */
1062 17, /* MOVE_RATIO */
1063 4, /* cost for loading QImode using movzbl */
1064 {4, 4, 4}, /* cost of loading integer registers
1065 in QImode, HImode and SImode.
1066 Relative to reg-reg move (2). */
1067 {4, 4, 4}, /* cost of storing integer registers */
1068 4, /* cost of reg,reg fld/fst */
1069 {12, 12, 12}, /* cost of loading fp registers
1070 in SFmode, DFmode and XFmode */
1071 {6, 6, 8}, /* cost of storing fp registers
1072 in SFmode, DFmode and XFmode */
1073 2, /* cost of moving MMX register */
1074 {8, 8}, /* cost of loading MMX registers
1075 in SImode and DImode */
1076 {8, 8}, /* cost of storing MMX registers
1077 in SImode and DImode */
1078 2, /* cost of moving SSE register */
1079 {8, 8, 8}, /* cost of loading SSE registers
1080 in SImode, DImode and TImode */
1081 {8, 8, 8}, /* cost of storing SSE registers
1082 in SImode, DImode and TImode */
1083 5, /* MMX or SSE register to integer */
1084 32, /* size of l1 cache. */
1085 512, /* size of l2 cache. */
1086 64, /* size of prefetch block */
1087 6, /* number of parallel prefetches */
1088 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1089 is increased to perhaps more appropriate value of 5. */
1090 3, /* Branch cost */
1091 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1092 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1093 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1094 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1095 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1096 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1097 {DUMMY_STRINGOP_ALGS,
1098 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1, /* scalar_stmt_cost. */
1102 1, /* scalar load_cost. */
1103 1, /* scalar_store_cost. */
1104 1, /* vec_stmt_cost. */
1105 1, /* vec_to_scalar_cost. */
1106 1, /* scalar_to_vec_cost. */
1107 1, /* vec_align_load_cost. */
1108 2, /* vec_unalign_load_cost. */
1109 1, /* vec_store_cost. */
1110 3, /* cond_taken_branch_cost. */
1111 1, /* cond_not_taken_branch_cost. */
1114 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1116 struct processor_costs generic32_cost = {
1117 COSTS_N_INSNS (1), /* cost of an add instruction */
1118 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1119 COSTS_N_INSNS (1), /* variable shift costs */
1120 COSTS_N_INSNS (1), /* constant shift costs */
1121 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1122 COSTS_N_INSNS (4), /* HI */
1123 COSTS_N_INSNS (3), /* SI */
1124 COSTS_N_INSNS (4), /* DI */
1125 COSTS_N_INSNS (2)}, /* other */
1126 0, /* cost of multiply per each bit set */
1127 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1128 COSTS_N_INSNS (26), /* HI */
1129 COSTS_N_INSNS (42), /* SI */
1130 COSTS_N_INSNS (74), /* DI */
1131 COSTS_N_INSNS (74)}, /* other */
1132 COSTS_N_INSNS (1), /* cost of movsx */
1133 COSTS_N_INSNS (1), /* cost of movzx */
1134 8, /* "large" insn */
1135 17, /* MOVE_RATIO */
1136 4, /* cost for loading QImode using movzbl */
1137 {4, 4, 4}, /* cost of loading integer registers
1138 in QImode, HImode and SImode.
1139 Relative to reg-reg move (2). */
1140 {4, 4, 4}, /* cost of storing integer registers */
1141 4, /* cost of reg,reg fld/fst */
1142 {12, 12, 12}, /* cost of loading fp registers
1143 in SFmode, DFmode and XFmode */
1144 {6, 6, 8}, /* cost of storing fp registers
1145 in SFmode, DFmode and XFmode */
1146 2, /* cost of moving MMX register */
1147 {8, 8}, /* cost of loading MMX registers
1148 in SImode and DImode */
1149 {8, 8}, /* cost of storing MMX registers
1150 in SImode and DImode */
1151 2, /* cost of moving SSE register */
1152 {8, 8, 8}, /* cost of loading SSE registers
1153 in SImode, DImode and TImode */
1154 {8, 8, 8}, /* cost of storing SSE registers
1155 in SImode, DImode and TImode */
1156 5, /* MMX or SSE register to integer */
1157 32, /* size of l1 cache. */
1158 256, /* size of l2 cache. */
1159 64, /* size of prefetch block */
1160 6, /* number of parallel prefetches */
1161 3, /* Branch cost */
1162 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1163 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1164 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1165 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1166 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1167 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1168 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1169 DUMMY_STRINGOP_ALGS},
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 1, /* scalar_stmt_cost. */
1173 1, /* scalar load_cost. */
1174 1, /* scalar_store_cost. */
1175 1, /* vec_stmt_cost. */
1176 1, /* vec_to_scalar_cost. */
1177 1, /* scalar_to_vec_cost. */
1178 1, /* vec_align_load_cost. */
1179 2, /* vec_unalign_load_cost. */
1180 1, /* vec_store_cost. */
1181 3, /* cond_taken_branch_cost. */
1182 1, /* cond_not_taken_branch_cost. */
1185 const struct processor_costs *ix86_cost = &pentium_cost;
1187 /* Processor feature/optimization bitmasks. */
1188 #define m_386 (1<<PROCESSOR_I386)
1189 #define m_486 (1<<PROCESSOR_I486)
1190 #define m_PENT (1<<PROCESSOR_PENTIUM)
1191 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1192 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1193 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1194 #define m_CORE2 (1<<PROCESSOR_CORE2)
1196 #define m_GEODE (1<<PROCESSOR_GEODE)
1197 #define m_K6 (1<<PROCESSOR_K6)
1198 #define m_K6_GEODE (m_K6 | m_GEODE)
1199 #define m_K8 (1<<PROCESSOR_K8)
1200 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1201 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1202 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1203 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1205 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1206 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1208 /* Generic instruction choice should be common subset of supported CPUs
1209 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1210 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1212 /* Feature tests against the various tunings. */
1213 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1214 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1215 negatively, so enabling for Generic64 seems like good code size
1216 tradeoff. We can't enable it for 32bit generic because it does not
1217 work well with PPro base chips. */
1218 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1220 /* X86_TUNE_PUSH_MEMORY */
1221 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1222 | m_NOCONA | m_CORE2 | m_GENERIC,
1224 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1227 /* X86_TUNE_USE_BIT_TEST */
1230 /* X86_TUNE_UNROLL_STRLEN */
1231 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1233 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1234 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1236 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1237 on simulation result. But after P4 was made, no performance benefit
1238 was observed with branch hints. It also increases the code size.
1239 As a result, icc never generates branch hints. */
1242 /* X86_TUNE_DOUBLE_WITH_ADD */
1245 /* X86_TUNE_USE_SAHF */
1246 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1247 | m_NOCONA | m_CORE2 | m_GENERIC,
1249 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1250 partial dependencies. */
1251 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1252 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1254 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1255 register stalls on Generic32 compilation setting as well. However
1256 in current implementation the partial register stalls are not eliminated
1257 very well - they can be introduced via subregs synthesized by combine
1258 and can happen in caller/callee saving sequences. Because this option
1259 pays back little on PPro based chips and is in conflict with partial reg
1260 dependencies used by Athlon/P4 based chips, it is better to leave it off
1261 for generic32 for now. */
1264 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1265 m_CORE2 | m_GENERIC,
1267 /* X86_TUNE_USE_HIMODE_FIOP */
1268 m_386 | m_486 | m_K6_GEODE,
1270 /* X86_TUNE_USE_SIMODE_FIOP */
1271 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1273 /* X86_TUNE_USE_MOV0 */
1276 /* X86_TUNE_USE_CLTD */
1277 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1279 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1282 /* X86_TUNE_SPLIT_LONG_MOVES */
1285 /* X86_TUNE_READ_MODIFY_WRITE */
1288 /* X86_TUNE_READ_MODIFY */
1291 /* X86_TUNE_PROMOTE_QIMODE */
1292 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1293 | m_GENERIC /* | m_PENT4 ? */,
1295 /* X86_TUNE_FAST_PREFIX */
1296 ~(m_PENT | m_486 | m_386),
1298 /* X86_TUNE_SINGLE_STRINGOP */
1299 m_386 | m_PENT4 | m_NOCONA,
1301 /* X86_TUNE_QIMODE_MATH */
1304 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1305 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1306 might be considered for Generic32 if our scheme for avoiding partial
1307 stalls was more effective. */
1310 /* X86_TUNE_PROMOTE_QI_REGS */
1313 /* X86_TUNE_PROMOTE_HI_REGS */
1316 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1317 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1319 /* X86_TUNE_ADD_ESP_8 */
1320 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1321 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1323 /* X86_TUNE_SUB_ESP_4 */
1324 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_SUB_ESP_8 */
1327 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1328 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1330 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1331 for DFmode copies */
1332 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1333 | m_GENERIC | m_GEODE),
1335 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1336 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1338 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1339 conflict here in between PPro/Pentium4 based chips that thread 128bit
1340 SSE registers as single units versus K8 based chips that divide SSE
1341 registers to two 64bit halves. This knob promotes all store destinations
1342 to be 128bit to allow register renaming on 128bit SSE units, but usually
1343 results in one extra microop on 64bit SSE units. Experimental results
1344 shows that disabling this option on P4 brings over 20% SPECfp regression,
1345 while enabling it on K8 brings roughly 2.4% regression that can be partly
1346 masked by careful scheduling of moves. */
1347 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1349 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1352 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1353 are resolved on SSE register parts instead of whole registers, so we may
1354 maintain just lower part of scalar values in proper format leaving the
1355 upper part undefined. */
1358 /* X86_TUNE_SSE_TYPELESS_STORES */
1361 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1362 m_PPRO | m_PENT4 | m_NOCONA,
1364 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1365 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1367 /* X86_TUNE_PROLOGUE_USING_MOVE */
1368 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1370 /* X86_TUNE_EPILOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1373 /* X86_TUNE_SHIFT1 */
1376 /* X86_TUNE_USE_FFREEP */
1379 /* X86_TUNE_INTER_UNIT_MOVES */
1380 ~(m_AMD_MULTIPLE | m_GENERIC),
1382 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1385 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1386 than 4 branch instructions in the 16 byte window. */
1387 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1389 /* X86_TUNE_SCHEDULE */
1390 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1392 /* X86_TUNE_USE_BT */
1393 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1395 /* X86_TUNE_USE_INCDEC */
1396 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1398 /* X86_TUNE_PAD_RETURNS */
1399 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_EXT_80387_CONSTANTS */
1402 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1404 /* X86_TUNE_SHORTEN_X87_SSE */
1407 /* X86_TUNE_AVOID_VECTOR_DECODE */
1410 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1411 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1414 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1415 vector path on AMD machines. */
1416 m_K8 | m_GENERIC64 | m_AMDFAM10,
1418 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1420 m_K8 | m_GENERIC64 | m_AMDFAM10,
1422 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1426 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1427 but one byte longer. */
1430 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1431 operand that cannot be represented using a modRM byte. The XOR
1432 replacement is long decoded, so this split helps here as well. */
1435 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1436 from integer to FP. */
1439 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1440 with a subsequent conditional jump instruction into a single
1441 compare-and-branch uop. */
1445 /* Feature tests against the various architecture variations. */
1446 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1447 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1448 ~(m_386 | m_486 | m_PENT | m_K6),
1450 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1453 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1456 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1459 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1463 static const unsigned int x86_accumulate_outgoing_args
1464 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1466 static const unsigned int x86_arch_always_fancy_math_387
1467 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1468 | m_NOCONA | m_CORE2 | m_GENERIC;
1470 static enum stringop_alg stringop_alg = no_stringop;
1472 /* In case the average insn count for single function invocation is
1473 lower than this constant, emit fast (but longer) prologue and
1475 #define FAST_PROLOGUE_INSN_COUNT 20
1477 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1478 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1479 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1480 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1482 /* Array of the smallest class containing reg number REGNO, indexed by
1483 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1485 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1487 /* ax, dx, cx, bx */
1488 AREG, DREG, CREG, BREG,
1489 /* si, di, bp, sp */
1490 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1492 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1493 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1496 /* flags, fpsr, fpcr, frame */
1497 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1499 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1502 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1505 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1506 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1507 /* SSE REX registers */
1508 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1512 /* The "default" register map used in 32bit mode. */
1514 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1516 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1517 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1519 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1520 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1521 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1522 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1525 static int const x86_64_int_parameter_registers[6] =
1527 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1528 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1531 static int const x86_64_ms_abi_int_parameter_registers[4] =
1533 2 /*RCX*/, 1 /*RDX*/,
1534 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1537 static int const x86_64_int_return_registers[4] =
1539 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1542 /* The "default" register map used in 64bit mode. */
1543 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1545 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1546 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1547 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1548 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1549 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1550 8,9,10,11,12,13,14,15, /* extended integer registers */
1551 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1554 /* Define the register numbers to be used in Dwarf debugging information.
1555 The SVR4 reference port C compiler uses the following register numbers
1556 in its Dwarf output code:
1557 0 for %eax (gcc regno = 0)
1558 1 for %ecx (gcc regno = 2)
1559 2 for %edx (gcc regno = 1)
1560 3 for %ebx (gcc regno = 3)
1561 4 for %esp (gcc regno = 7)
1562 5 for %ebp (gcc regno = 6)
1563 6 for %esi (gcc regno = 4)
1564 7 for %edi (gcc regno = 5)
1565 The following three DWARF register numbers are never generated by
1566 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1567 believes these numbers have these meanings.
1568 8 for %eip (no gcc equivalent)
1569 9 for %eflags (gcc regno = 17)
1570 10 for %trapno (no gcc equivalent)
1571 It is not at all clear how we should number the FP stack registers
1572 for the x86 architecture. If the version of SDB on x86/svr4 were
1573 a bit less brain dead with respect to floating-point then we would
1574 have a precedent to follow with respect to DWARF register numbers
1575 for x86 FP registers, but the SDB on x86/svr4 is so completely
1576 broken with respect to FP registers that it is hardly worth thinking
1577 of it as something to strive for compatibility with.
1578 The version of x86/svr4 SDB I have at the moment does (partially)
1579 seem to believe that DWARF register number 11 is associated with
1580 the x86 register %st(0), but that's about all. Higher DWARF
1581 register numbers don't seem to be associated with anything in
1582 particular, and even for DWARF regno 11, SDB only seems to under-
1583 stand that it should say that a variable lives in %st(0) (when
1584 asked via an `=' command) if we said it was in DWARF regno 11,
1585 but SDB still prints garbage when asked for the value of the
1586 variable in question (via a `/' command).
1587 (Also note that the labels SDB prints for various FP stack regs
1588 when doing an `x' command are all wrong.)
1589 Note that these problems generally don't affect the native SVR4
1590 C compiler because it doesn't allow the use of -O with -g and
1591 because when it is *not* optimizing, it allocates a memory
1592 location for each floating-point variable, and the memory
1593 location is what gets described in the DWARF AT_location
1594 attribute for the variable in question.
1595 Regardless of the severe mental illness of the x86/svr4 SDB, we
1596 do something sensible here and we use the following DWARF
1597 register numbers. Note that these are all stack-top-relative
1599 11 for %st(0) (gcc regno = 8)
1600 12 for %st(1) (gcc regno = 9)
1601 13 for %st(2) (gcc regno = 10)
1602 14 for %st(3) (gcc regno = 11)
1603 15 for %st(4) (gcc regno = 12)
1604 16 for %st(5) (gcc regno = 13)
1605 17 for %st(6) (gcc regno = 14)
1606 18 for %st(7) (gcc regno = 15)
1608 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1610 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1611 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1612 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1613 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1614 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1615 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1616 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1619 /* Test and compare insns in i386.md store the information needed to
1620 generate branch and scc insns here. */
1622 rtx ix86_compare_op0 = NULL_RTX;
1623 rtx ix86_compare_op1 = NULL_RTX;
1624 rtx ix86_compare_emitted = NULL_RTX;
1626 /* Size of the register save area. */
1627 #define X86_64_VARARGS_SIZE (X86_64_REGPARM_MAX * UNITS_PER_WORD + X86_64_SSE_REGPARM_MAX * 16)
1629 /* Define the structure for the machine field in struct function. */
1631 struct stack_local_entry GTY(())
1633 unsigned short mode;
1636 struct stack_local_entry *next;
1639 /* Structure describing stack frame layout.
1640 Stack grows downward:
1646 saved frame pointer if frame_pointer_needed
1647 <- HARD_FRAME_POINTER
1652 [va_arg registers] (
1653 > to_allocate <- FRAME_POINTER
1663 HOST_WIDE_INT frame;
1665 int outgoing_arguments_size;
1668 HOST_WIDE_INT to_allocate;
1669 /* The offsets relative to ARG_POINTER. */
1670 HOST_WIDE_INT frame_pointer_offset;
1671 HOST_WIDE_INT hard_frame_pointer_offset;
1672 HOST_WIDE_INT stack_pointer_offset;
1674 /* When save_regs_using_mov is set, emit prologue using
1675 move instead of push instructions. */
1676 bool save_regs_using_mov;
1679 /* Code model option. */
1680 enum cmodel ix86_cmodel;
1682 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1684 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1686 /* Which unit we are generating floating point math for. */
1687 enum fpmath_unit ix86_fpmath;
1689 /* Which cpu are we scheduling for. */
1690 enum processor_type ix86_tune;
1692 /* Which instruction set architecture to use. */
1693 enum processor_type ix86_arch;
1695 /* true if sse prefetch instruction is not NOOP. */
1696 int x86_prefetch_sse;
1698 /* ix86_regparm_string as a number */
1699 static int ix86_regparm;
1701 /* -mstackrealign option */
1702 extern int ix86_force_align_arg_pointer;
1703 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1705 static rtx (*ix86_gen_leave) (void);
1706 static rtx (*ix86_gen_pop1) (rtx);
1707 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1708 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1709 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1710 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1711 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1713 /* Preferred alignment for stack boundary in bits. */
1714 unsigned int ix86_preferred_stack_boundary;
1716 /* Values 1-5: see jump.c */
1717 int ix86_branch_cost;
1719 /* Calling abi specific va_list type nodes. */
1720 static GTY(()) tree sysv_va_list_type_node;
1721 static GTY(()) tree ms_va_list_type_node;
1723 /* Variables which are this size or smaller are put in the data/bss
1724 or ldata/lbss sections. */
1726 int ix86_section_threshold = 65536;
1728 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1729 char internal_label_prefix[16];
1730 int internal_label_prefix_len;
1732 /* Fence to use after loop using movnt. */
1735 /* Register class used for passing given 64bit part of the argument.
1736 These represent classes as documented by the PS ABI, with the exception
1737 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1738 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1740 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1741 whenever possible (upper half does contain padding). */
1742 enum x86_64_reg_class
1745 X86_64_INTEGER_CLASS,
1746 X86_64_INTEGERSI_CLASS,
1753 X86_64_COMPLEX_X87_CLASS,
1756 static const char * const x86_64_reg_class_name[] =
1758 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1759 "sseup", "x87", "x87up", "cplx87", "no"
1762 #define MAX_CLASSES 4
1764 /* Table of constants used by fldpi, fldln2, etc.... */
1765 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1766 static bool ext_80387_constants_init = 0;
1769 static struct machine_function * ix86_init_machine_status (void);
1770 static rtx ix86_function_value (const_tree, const_tree, bool);
1771 static int ix86_function_regparm (const_tree, const_tree);
1772 static void ix86_compute_frame_layout (struct ix86_frame *);
1773 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1777 /* The svr4 ABI for the i386 says that records and unions are returned
1779 #ifndef DEFAULT_PCC_STRUCT_RETURN
1780 #define DEFAULT_PCC_STRUCT_RETURN 1
1783 /* Bit flags that specify the ISA we are compiling for. */
1784 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1786 /* A mask of ix86_isa_flags that includes bit X if X
1787 was set or cleared on the command line. */
1788 static int ix86_isa_flags_explicit;
1790 /* Define a set of ISAs which are available when a given ISA is
1791 enabled. MMX and SSE ISAs are handled separately. */
1793 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1794 #define OPTION_MASK_ISA_3DNOW_SET \
1795 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1797 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1798 #define OPTION_MASK_ISA_SSE2_SET \
1799 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1800 #define OPTION_MASK_ISA_SSE3_SET \
1801 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1802 #define OPTION_MASK_ISA_SSSE3_SET \
1803 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1804 #define OPTION_MASK_ISA_SSE4_1_SET \
1805 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1806 #define OPTION_MASK_ISA_SSE4_2_SET \
1807 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1809 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1811 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1813 #define OPTION_MASK_ISA_SSE4A_SET \
1814 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1815 #define OPTION_MASK_ISA_SSE5_SET \
1816 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1818 /* Define a set of ISAs which aren't available when a given ISA is
1819 disabled. MMX and SSE ISAs are handled separately. */
1821 #define OPTION_MASK_ISA_MMX_UNSET \
1822 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1823 #define OPTION_MASK_ISA_3DNOW_UNSET \
1824 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1825 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1827 #define OPTION_MASK_ISA_SSE_UNSET \
1828 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1829 #define OPTION_MASK_ISA_SSE2_UNSET \
1830 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1831 #define OPTION_MASK_ISA_SSE3_UNSET \
1832 (OPTION_MASK_ISA_SSE3 \
1833 | OPTION_MASK_ISA_SSSE3_UNSET \
1834 | OPTION_MASK_ISA_SSE4A_UNSET )
1835 #define OPTION_MASK_ISA_SSSE3_UNSET \
1836 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1837 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1838 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1839 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4_2
1841 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1843 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1845 #define OPTION_MASK_ISA_SSE4A_UNSET \
1846 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1848 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1850 /* Vectorization library interface and handlers. */
1851 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1852 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1853 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1855 /* Implement TARGET_HANDLE_OPTION. */
1858 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1865 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
1866 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
1870 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1871 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1878 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
1879 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
1883 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1884 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1894 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
1895 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
1899 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1900 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1907 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
1908 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
1912 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1913 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1920 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
1921 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
1925 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1926 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1933 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
1934 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
1938 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1939 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1946 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
1947 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
1951 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1952 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1959 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
1960 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
1964 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1965 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1970 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
1971 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
1975 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1976 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1982 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
1983 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
1987 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1988 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1995 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
1996 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2000 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2001 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2010 /* Sometimes certain combinations of command options do not make
2011 sense on a particular target machine. You can define a macro
2012 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2013 defined, is executed once just after all the command options have
2016 Don't use this macro to turn on various extra optimizations for
2017 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2020 override_options (void)
2023 int ix86_tune_defaulted = 0;
2024 int ix86_arch_specified = 0;
2025 unsigned int ix86_arch_mask, ix86_tune_mask;
2027 /* Comes from final.c -- no real reason to change it. */
2028 #define MAX_CODE_ALIGN 16
2032 const struct processor_costs *cost; /* Processor costs */
2033 const int align_loop; /* Default alignments. */
2034 const int align_loop_max_skip;
2035 const int align_jump;
2036 const int align_jump_max_skip;
2037 const int align_func;
2039 const processor_target_table[PROCESSOR_max] =
2041 {&i386_cost, 4, 3, 4, 3, 4},
2042 {&i486_cost, 16, 15, 16, 15, 16},
2043 {&pentium_cost, 16, 7, 16, 7, 16},
2044 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2045 {&geode_cost, 0, 0, 0, 0, 0},
2046 {&k6_cost, 32, 7, 32, 7, 32},
2047 {&athlon_cost, 16, 7, 16, 7, 16},
2048 {&pentium4_cost, 0, 0, 0, 0, 0},
2049 {&k8_cost, 16, 7, 16, 7, 16},
2050 {&nocona_cost, 0, 0, 0, 0, 0},
2051 {&core2_cost, 16, 10, 16, 10, 16},
2052 {&generic32_cost, 16, 7, 16, 7, 16},
2053 {&generic64_cost, 16, 10, 16, 10, 16},
2054 {&amdfam10_cost, 32, 24, 32, 7, 32}
2057 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2088 PTA_PREFETCH_SSE = 1 << 4,
2090 PTA_3DNOW_A = 1 << 6,
2094 PTA_POPCNT = 1 << 10,
2096 PTA_SSE4A = 1 << 12,
2097 PTA_NO_SAHF = 1 << 13,
2098 PTA_SSE4_1 = 1 << 14,
2099 PTA_SSE4_2 = 1 << 15,
2102 PTA_PCLMUL = 1 << 18
2107 const char *const name; /* processor name or nickname. */
2108 const enum processor_type processor;
2109 const unsigned /*enum pta_flags*/ flags;
2111 const processor_alias_table[] =
2113 {"i386", PROCESSOR_I386, 0},
2114 {"i486", PROCESSOR_I486, 0},
2115 {"i586", PROCESSOR_PENTIUM, 0},
2116 {"pentium", PROCESSOR_PENTIUM, 0},
2117 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
2118 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
2119 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2120 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
2121 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2122 {"i686", PROCESSOR_PENTIUMPRO, 0},
2123 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
2124 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
2125 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2126 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
2127 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
2128 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
2129 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
2130 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2131 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
2132 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2133 | PTA_CX16 | PTA_NO_SAHF)},
2134 {"core2", PROCESSOR_CORE2, (PTA_64BIT
2135 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2138 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2139 |PTA_PREFETCH_SSE)},
2140 {"k6", PROCESSOR_K6, PTA_MMX},
2141 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2142 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
2143 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2144 | PTA_PREFETCH_SSE)},
2145 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2146 | PTA_PREFETCH_SSE)},
2147 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2149 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2151 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2153 {"x86-64", PROCESSOR_K8, (PTA_64BIT
2154 | PTA_MMX | PTA_SSE | PTA_SSE2
2156 {"k8", PROCESSOR_K8, (PTA_64BIT
2157 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2158 | PTA_SSE | PTA_SSE2
2160 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
2161 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2162 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2164 {"opteron", PROCESSOR_K8, (PTA_64BIT
2165 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2166 | PTA_SSE | PTA_SSE2
2168 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
2169 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2170 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2172 {"athlon64", PROCESSOR_K8, (PTA_64BIT
2173 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2174 | PTA_SSE | PTA_SSE2
2176 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
2177 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2178 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2180 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
2181 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2182 | PTA_SSE | PTA_SSE2
2184 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
2185 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2186 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2188 | PTA_CX16 | PTA_ABM)},
2189 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
2190 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
2191 | PTA_SSE | PTA_SSE2 | PTA_SSE3
2193 | PTA_CX16 | PTA_ABM)},
2194 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
2195 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
2198 int const pta_size = ARRAY_SIZE (processor_alias_table);
2200 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2201 SUBTARGET_OVERRIDE_OPTIONS;
2204 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2205 SUBSUBTARGET_OVERRIDE_OPTIONS;
2208 /* -fPIC is the default for x86_64. */
2209 if (TARGET_MACHO && TARGET_64BIT)
2212 /* Set the default values for switches whose default depends on TARGET_64BIT
2213 in case they weren't overwritten by command line options. */
2216 /* Mach-O doesn't support omitting the frame pointer for now. */
2217 if (flag_omit_frame_pointer == 2)
2218 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2219 if (flag_asynchronous_unwind_tables == 2)
2220 flag_asynchronous_unwind_tables = 1;
2221 if (flag_pcc_struct_return == 2)
2222 flag_pcc_struct_return = 0;
2226 if (flag_omit_frame_pointer == 2)
2227 flag_omit_frame_pointer = 0;
2228 if (flag_asynchronous_unwind_tables == 2)
2229 flag_asynchronous_unwind_tables = 0;
2230 if (flag_pcc_struct_return == 2)
2231 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2234 /* Need to check -mtune=generic first. */
2235 if (ix86_tune_string)
2237 if (!strcmp (ix86_tune_string, "generic")
2238 || !strcmp (ix86_tune_string, "i686")
2239 /* As special support for cross compilers we read -mtune=native
2240 as -mtune=generic. With native compilers we won't see the
2241 -mtune=native, as it was changed by the driver. */
2242 || !strcmp (ix86_tune_string, "native"))
2245 ix86_tune_string = "generic64";
2247 ix86_tune_string = "generic32";
2249 else if (!strncmp (ix86_tune_string, "generic", 7))
2250 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2254 if (ix86_arch_string)
2255 ix86_tune_string = ix86_arch_string;
2256 if (!ix86_tune_string)
2258 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2259 ix86_tune_defaulted = 1;
2262 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2263 need to use a sensible tune option. */
2264 if (!strcmp (ix86_tune_string, "generic")
2265 || !strcmp (ix86_tune_string, "x86-64")
2266 || !strcmp (ix86_tune_string, "i686"))
2269 ix86_tune_string = "generic64";
2271 ix86_tune_string = "generic32";
2274 if (ix86_stringop_string)
2276 if (!strcmp (ix86_stringop_string, "rep_byte"))
2277 stringop_alg = rep_prefix_1_byte;
2278 else if (!strcmp (ix86_stringop_string, "libcall"))
2279 stringop_alg = libcall;
2280 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2281 stringop_alg = rep_prefix_4_byte;
2282 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2283 stringop_alg = rep_prefix_8_byte;
2284 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2285 stringop_alg = loop_1_byte;
2286 else if (!strcmp (ix86_stringop_string, "loop"))
2287 stringop_alg = loop;
2288 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2289 stringop_alg = unrolled_loop;
2291 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2293 if (!strcmp (ix86_tune_string, "x86-64"))
2294 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2295 "-mtune=generic instead as appropriate.");
2297 if (!ix86_arch_string)
2298 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2300 ix86_arch_specified = 1;
2302 if (!strcmp (ix86_arch_string, "generic"))
2303 error ("generic CPU can be used only for -mtune= switch");
2304 if (!strncmp (ix86_arch_string, "generic", 7))
2305 error ("bad value (%s) for -march= switch", ix86_arch_string);
2307 if (ix86_cmodel_string != 0)
2309 if (!strcmp (ix86_cmodel_string, "small"))
2310 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2311 else if (!strcmp (ix86_cmodel_string, "medium"))
2312 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2313 else if (!strcmp (ix86_cmodel_string, "large"))
2314 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2316 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2317 else if (!strcmp (ix86_cmodel_string, "32"))
2318 ix86_cmodel = CM_32;
2319 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2320 ix86_cmodel = CM_KERNEL;
2322 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2326 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2327 use of rip-relative addressing. This eliminates fixups that
2328 would otherwise be needed if this object is to be placed in a
2329 DLL, and is essentially just as efficient as direct addressing. */
2330 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2331 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2332 else if (TARGET_64BIT)
2333 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2335 ix86_cmodel = CM_32;
2337 if (ix86_asm_string != 0)
2340 && !strcmp (ix86_asm_string, "intel"))
2341 ix86_asm_dialect = ASM_INTEL;
2342 else if (!strcmp (ix86_asm_string, "att"))
2343 ix86_asm_dialect = ASM_ATT;
2345 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2347 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2348 error ("code model %qs not supported in the %s bit mode",
2349 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2350 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2351 sorry ("%i-bit mode not compiled in",
2352 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2354 for (i = 0; i < pta_size; i++)
2355 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2357 ix86_arch = processor_alias_table[i].processor;
2358 /* Default cpu tuning to the architecture. */
2359 ix86_tune = ix86_arch;
2361 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2362 error ("CPU you selected does not support x86-64 "
2365 if (processor_alias_table[i].flags & PTA_MMX
2366 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2367 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2368 if (processor_alias_table[i].flags & PTA_3DNOW
2369 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2370 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2371 if (processor_alias_table[i].flags & PTA_3DNOW_A
2372 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2373 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2374 if (processor_alias_table[i].flags & PTA_SSE
2375 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2376 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2377 if (processor_alias_table[i].flags & PTA_SSE2
2378 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2379 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2380 if (processor_alias_table[i].flags & PTA_SSE3
2381 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2382 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2383 if (processor_alias_table[i].flags & PTA_SSSE3
2384 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2385 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2386 if (processor_alias_table[i].flags & PTA_SSE4_1
2387 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2388 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2389 if (processor_alias_table[i].flags & PTA_SSE4_2
2390 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2391 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2392 if (processor_alias_table[i].flags & PTA_SSE4A
2393 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2394 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2395 if (processor_alias_table[i].flags & PTA_SSE5
2396 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2397 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2399 if (processor_alias_table[i].flags & PTA_ABM)
2401 if (processor_alias_table[i].flags & PTA_CX16)
2402 x86_cmpxchg16b = true;
2403 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2405 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2406 x86_prefetch_sse = true;
2407 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2409 if (processor_alias_table[i].flags & PTA_AES)
2411 if (processor_alias_table[i].flags & PTA_PCLMUL)
2418 error ("bad value (%s) for -march= switch", ix86_arch_string);
2420 ix86_arch_mask = 1u << ix86_arch;
2421 for (i = 0; i < X86_ARCH_LAST; ++i)
2422 ix86_arch_features[i] &= ix86_arch_mask;
2424 for (i = 0; i < pta_size; i++)
2425 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2427 ix86_tune = processor_alias_table[i].processor;
2428 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2430 if (ix86_tune_defaulted)
2432 ix86_tune_string = "x86-64";
2433 for (i = 0; i < pta_size; i++)
2434 if (! strcmp (ix86_tune_string,
2435 processor_alias_table[i].name))
2437 ix86_tune = processor_alias_table[i].processor;
2440 error ("CPU you selected does not support x86-64 "
2443 /* Intel CPUs have always interpreted SSE prefetch instructions as
2444 NOPs; so, we can enable SSE prefetch instructions even when
2445 -mtune (rather than -march) points us to a processor that has them.
2446 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2447 higher processors. */
2449 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2450 x86_prefetch_sse = true;
2454 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2456 /* Enable SSE2 if AES or PCLMUL is enabled. */
2457 if ((x86_aes || x86_pclmul)
2458 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2460 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2461 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2464 ix86_tune_mask = 1u << ix86_tune;
2465 for (i = 0; i < X86_TUNE_LAST; ++i)
2466 ix86_tune_features[i] &= ix86_tune_mask;
2469 ix86_cost = &size_cost;
2471 ix86_cost = processor_target_table[ix86_tune].cost;
2473 /* Arrange to set up i386_stack_locals for all functions. */
2474 init_machine_status = ix86_init_machine_status;
2476 /* Validate -mregparm= value. */
2477 if (ix86_regparm_string)
2480 warning (0, "-mregparm is ignored in 64-bit mode");
2481 i = atoi (ix86_regparm_string);
2482 if (i < 0 || i > REGPARM_MAX)
2483 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2488 ix86_regparm = REGPARM_MAX;
2490 /* If the user has provided any of the -malign-* options,
2491 warn and use that value only if -falign-* is not set.
2492 Remove this code in GCC 3.2 or later. */
2493 if (ix86_align_loops_string)
2495 warning (0, "-malign-loops is obsolete, use -falign-loops");
2496 if (align_loops == 0)
2498 i = atoi (ix86_align_loops_string);
2499 if (i < 0 || i > MAX_CODE_ALIGN)
2500 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2502 align_loops = 1 << i;
2506 if (ix86_align_jumps_string)
2508 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2509 if (align_jumps == 0)
2511 i = atoi (ix86_align_jumps_string);
2512 if (i < 0 || i > MAX_CODE_ALIGN)
2513 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2515 align_jumps = 1 << i;
2519 if (ix86_align_funcs_string)
2521 warning (0, "-malign-functions is obsolete, use -falign-functions");
2522 if (align_functions == 0)
2524 i = atoi (ix86_align_funcs_string);
2525 if (i < 0 || i > MAX_CODE_ALIGN)
2526 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2528 align_functions = 1 << i;
2532 /* Default align_* from the processor table. */
2533 if (align_loops == 0)
2535 align_loops = processor_target_table[ix86_tune].align_loop;
2536 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2538 if (align_jumps == 0)
2540 align_jumps = processor_target_table[ix86_tune].align_jump;
2541 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2543 if (align_functions == 0)
2545 align_functions = processor_target_table[ix86_tune].align_func;
2548 /* Validate -mbranch-cost= value, or provide default. */
2549 ix86_branch_cost = ix86_cost->branch_cost;
2550 if (ix86_branch_cost_string)
2552 i = atoi (ix86_branch_cost_string);
2554 error ("-mbranch-cost=%d is not between 0 and 5", i);
2556 ix86_branch_cost = i;
2558 if (ix86_section_threshold_string)
2560 i = atoi (ix86_section_threshold_string);
2562 error ("-mlarge-data-threshold=%d is negative", i);
2564 ix86_section_threshold = i;
2567 if (ix86_tls_dialect_string)
2569 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2570 ix86_tls_dialect = TLS_DIALECT_GNU;
2571 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2572 ix86_tls_dialect = TLS_DIALECT_GNU2;
2573 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2574 ix86_tls_dialect = TLS_DIALECT_SUN;
2576 error ("bad value (%s) for -mtls-dialect= switch",
2577 ix86_tls_dialect_string);
2580 if (ix87_precision_string)
2582 i = atoi (ix87_precision_string);
2583 if (i != 32 && i != 64 && i != 80)
2584 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2589 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2591 /* Enable by default the SSE and MMX builtins. Do allow the user to
2592 explicitly disable any of these. In particular, disabling SSE and
2593 MMX for kernel code is extremely useful. */
2594 if (!ix86_arch_specified)
2596 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2597 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2600 warning (0, "-mrtd is ignored in 64bit mode");
2604 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2606 if (!ix86_arch_specified)
2608 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2610 /* i386 ABI does not specify red zone. It still makes sense to use it
2611 when programmer takes care to stack from being destroyed. */
2612 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2613 target_flags |= MASK_NO_RED_ZONE;
2616 /* Keep nonleaf frame pointers. */
2617 if (flag_omit_frame_pointer)
2618 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2619 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2620 flag_omit_frame_pointer = 1;
2622 /* If we're doing fast math, we don't care about comparison order
2623 wrt NaNs. This lets us use a shorter comparison sequence. */
2624 if (flag_finite_math_only)
2625 target_flags &= ~MASK_IEEE_FP;
2627 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2628 since the insns won't need emulation. */
2629 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2630 target_flags &= ~MASK_NO_FANCY_MATH_387;
2632 /* Likewise, if the target doesn't have a 387, or we've specified
2633 software floating point, don't use 387 inline intrinsics. */
2635 target_flags |= MASK_NO_FANCY_MATH_387;
2637 /* Turn on MMX builtins for -msse. */
2640 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2641 x86_prefetch_sse = true;
2644 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2645 if (TARGET_SSE4_2 || TARGET_ABM)
2648 /* Validate -mpreferred-stack-boundary= value, or provide default.
2649 The default of 128 bits is for Pentium III's SSE __m128. We can't
2650 change it because of optimize_size. Otherwise, we can't mix object
2651 files compiled with -Os and -On. */
2652 ix86_preferred_stack_boundary = 128;
2653 if (ix86_preferred_stack_boundary_string)
2655 i = atoi (ix86_preferred_stack_boundary_string);
2656 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2657 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2658 TARGET_64BIT ? 4 : 2);
2660 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2663 /* Accept -msseregparm only if at least SSE support is enabled. */
2664 if (TARGET_SSEREGPARM
2666 error ("-msseregparm used without SSE enabled");
2668 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2669 if (ix86_fpmath_string != 0)
2671 if (! strcmp (ix86_fpmath_string, "387"))
2672 ix86_fpmath = FPMATH_387;
2673 else if (! strcmp (ix86_fpmath_string, "sse"))
2677 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2678 ix86_fpmath = FPMATH_387;
2681 ix86_fpmath = FPMATH_SSE;
2683 else if (! strcmp (ix86_fpmath_string, "387,sse")
2684 || ! strcmp (ix86_fpmath_string, "sse,387"))
2688 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2689 ix86_fpmath = FPMATH_387;
2691 else if (!TARGET_80387)
2693 warning (0, "387 instruction set disabled, using SSE arithmetics");
2694 ix86_fpmath = FPMATH_SSE;
2697 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2700 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2703 /* If the i387 is disabled, then do not return values in it. */
2705 target_flags &= ~MASK_FLOAT_RETURNS;
2707 /* Use external vectorized library in vectorizing intrinsics. */
2708 if (ix86_veclibabi_string)
2710 if (strcmp (ix86_veclibabi_string, "svml") == 0)
2711 ix86_veclib_handler = ix86_veclibabi_svml;
2712 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
2713 ix86_veclib_handler = ix86_veclibabi_acml;
2715 error ("unknown vectorization library ABI type (%s) for "
2716 "-mveclibabi= switch", ix86_veclibabi_string);
2719 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2720 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2722 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2724 /* ??? Unwind info is not correct around the CFG unless either a frame
2725 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2726 unwind info generation to be aware of the CFG and propagating states
2728 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2729 || flag_exceptions || flag_non_call_exceptions)
2730 && flag_omit_frame_pointer
2731 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2733 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2734 warning (0, "unwind tables currently require either a frame pointer "
2735 "or -maccumulate-outgoing-args for correctness");
2736 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2739 /* If stack probes are required, the space used for large function
2740 arguments on the stack must also be probed, so enable
2741 -maccumulate-outgoing-args so this happens in the prologue. */
2742 if (TARGET_STACK_PROBE
2743 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2745 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2746 warning (0, "stack probing requires -maccumulate-outgoing-args "
2748 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2751 /* For sane SSE instruction set generation we need fcomi instruction.
2752 It is safe to enable all CMOVE instructions. */
2756 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2759 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2760 p = strchr (internal_label_prefix, 'X');
2761 internal_label_prefix_len = p - internal_label_prefix;
2765 /* When scheduling description is not available, disable scheduler pass
2766 so it won't slow down the compilation and make x87 code slower. */
2767 if (!TARGET_SCHEDULE)
2768 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2770 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2771 set_param_value ("simultaneous-prefetches",
2772 ix86_cost->simultaneous_prefetches);
2773 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2774 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2775 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2776 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2777 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2778 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2780 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
2781 can be optimized to ap = __builtin_next_arg (0). */
2783 targetm.expand_builtin_va_start = NULL;
2787 ix86_gen_leave = gen_leave_rex64;
2788 ix86_gen_pop1 = gen_popdi1;
2789 ix86_gen_add3 = gen_adddi3;
2790 ix86_gen_sub3 = gen_subdi3;
2791 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
2792 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
2793 ix86_gen_monitor = gen_sse3_monitor64;
2797 ix86_gen_leave = gen_leave;
2798 ix86_gen_pop1 = gen_popsi1;
2799 ix86_gen_add3 = gen_addsi3;
2800 ix86_gen_sub3 = gen_subsi3;
2801 ix86_gen_sub3_carry = gen_subsi3_carry;
2802 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
2803 ix86_gen_monitor = gen_sse3_monitor;
2807 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
2809 target_flags |= MASK_CLD & ~target_flags_explicit;
2813 /* Return true if this goes in large data/bss. */
2816 ix86_in_large_data_p (tree exp)
2818 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2821 /* Functions are never large data. */
2822 if (TREE_CODE (exp) == FUNCTION_DECL)
2825 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2827 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2828 if (strcmp (section, ".ldata") == 0
2829 || strcmp (section, ".lbss") == 0)
2835 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2837 /* If this is an incomplete type with size 0, then we can't put it
2838 in data because it might be too big when completed. */
2839 if (!size || size > ix86_section_threshold)
2846 /* Switch to the appropriate section for output of DECL.
2847 DECL is either a `VAR_DECL' node or a constant of some sort.
2848 RELOC indicates whether forming the initial value of DECL requires
2849 link-time relocations. */
2851 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2855 x86_64_elf_select_section (tree decl, int reloc,
2856 unsigned HOST_WIDE_INT align)
2858 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2859 && ix86_in_large_data_p (decl))
2861 const char *sname = NULL;
2862 unsigned int flags = SECTION_WRITE;
2863 switch (categorize_decl_for_section (decl, reloc))
2868 case SECCAT_DATA_REL:
2869 sname = ".ldata.rel";
2871 case SECCAT_DATA_REL_LOCAL:
2872 sname = ".ldata.rel.local";
2874 case SECCAT_DATA_REL_RO:
2875 sname = ".ldata.rel.ro";
2877 case SECCAT_DATA_REL_RO_LOCAL:
2878 sname = ".ldata.rel.ro.local";
2882 flags |= SECTION_BSS;
2885 case SECCAT_RODATA_MERGE_STR:
2886 case SECCAT_RODATA_MERGE_STR_INIT:
2887 case SECCAT_RODATA_MERGE_CONST:
2891 case SECCAT_SRODATA:
2898 /* We don't split these for medium model. Place them into
2899 default sections and hope for best. */
2901 case SECCAT_EMUTLS_VAR:
2902 case SECCAT_EMUTLS_TMPL:
2907 /* We might get called with string constants, but get_named_section
2908 doesn't like them as they are not DECLs. Also, we need to set
2909 flags in that case. */
2911 return get_section (sname, flags, NULL);
2912 return get_named_section (decl, sname, reloc);
2915 return default_elf_select_section (decl, reloc, align);
2918 /* Build up a unique section name, expressed as a
2919 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2920 RELOC indicates whether the initial value of EXP requires
2921 link-time relocations. */
2923 static void ATTRIBUTE_UNUSED
2924 x86_64_elf_unique_section (tree decl, int reloc)
2926 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2927 && ix86_in_large_data_p (decl))
2929 const char *prefix = NULL;
2930 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2931 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2933 switch (categorize_decl_for_section (decl, reloc))
2936 case SECCAT_DATA_REL:
2937 case SECCAT_DATA_REL_LOCAL:
2938 case SECCAT_DATA_REL_RO:
2939 case SECCAT_DATA_REL_RO_LOCAL:
2940 prefix = one_only ? ".ld" : ".ldata";
2943 prefix = one_only ? ".lb" : ".lbss";
2946 case SECCAT_RODATA_MERGE_STR:
2947 case SECCAT_RODATA_MERGE_STR_INIT:
2948 case SECCAT_RODATA_MERGE_CONST:
2949 prefix = one_only ? ".lr" : ".lrodata";
2951 case SECCAT_SRODATA:
2958 /* We don't split these for medium model. Place them into
2959 default sections and hope for best. */
2961 case SECCAT_EMUTLS_VAR:
2962 prefix = targetm.emutls.var_section;
2964 case SECCAT_EMUTLS_TMPL:
2965 prefix = targetm.emutls.tmpl_section;
2970 const char *name, *linkonce;
2973 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2974 name = targetm.strip_name_encoding (name);
2976 /* If we're using one_only, then there needs to be a .gnu.linkonce
2977 prefix to the section name. */
2978 linkonce = one_only ? ".gnu.linkonce" : "";
2980 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
2982 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
2986 default_unique_section (decl, reloc);
2989 #ifdef COMMON_ASM_OP
2990 /* This says how to output assembler code to declare an
2991 uninitialized external linkage data object.
2993 For medium model x86-64 we need to use .largecomm opcode for
2996 x86_elf_aligned_common (FILE *file,
2997 const char *name, unsigned HOST_WIDE_INT size,
3000 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3001 && size > (unsigned int)ix86_section_threshold)
3002 fprintf (file, ".largecomm\t");
3004 fprintf (file, "%s", COMMON_ASM_OP);
3005 assemble_name (file, name);
3006 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
3007 size, align / BITS_PER_UNIT);
3011 /* Utility function for targets to use in implementing
3012 ASM_OUTPUT_ALIGNED_BSS. */
3015 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
3016 const char *name, unsigned HOST_WIDE_INT size,
3019 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3020 && size > (unsigned int)ix86_section_threshold)
3021 switch_to_section (get_named_section (decl, ".lbss", 0));
3023 switch_to_section (bss_section);
3024 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
3025 #ifdef ASM_DECLARE_OBJECT_NAME
3026 last_assemble_variable_decl = decl;
3027 ASM_DECLARE_OBJECT_NAME (file, name, decl);
3029 /* Standard thing is just output label for the object. */
3030 ASM_OUTPUT_LABEL (file, name);
3031 #endif /* ASM_DECLARE_OBJECT_NAME */
3032 ASM_OUTPUT_SKIP (file, size ? size : 1);
3036 optimization_options (int level, int size ATTRIBUTE_UNUSED)
3038 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
3039 make the problem with not enough registers even worse. */
3040 #ifdef INSN_SCHEDULING
3042 flag_schedule_insns = 0;
3046 /* The Darwin libraries never set errno, so we might as well
3047 avoid calling them when that's the only reason we would. */
3048 flag_errno_math = 0;
3050 /* The default values of these switches depend on the TARGET_64BIT
3051 that is not known at this moment. Mark these values with 2 and
3052 let user the to override these. In case there is no command line option
3053 specifying them, we will set the defaults in override_options. */
3055 flag_omit_frame_pointer = 2;
3056 flag_pcc_struct_return = 2;
3057 flag_asynchronous_unwind_tables = 2;
3058 flag_vect_cost_model = 1;
3059 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
3060 SUBTARGET_OPTIMIZATION_OPTIONS;
3064 /* Decide whether we can make a sibling call to a function. DECL is the
3065 declaration of the function being targeted by the call and EXP is the
3066 CALL_EXPR representing the call. */
3069 ix86_function_ok_for_sibcall (tree decl, tree exp)
3074 /* If we are generating position-independent code, we cannot sibcall
3075 optimize any indirect call, or a direct call to a global function,
3076 as the PLT requires %ebx be live. */
3077 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
3084 func = TREE_TYPE (CALL_EXPR_FN (exp));
3085 if (POINTER_TYPE_P (func))
3086 func = TREE_TYPE (func);
3089 /* Check that the return value locations are the same. Like
3090 if we are returning floats on the 80387 register stack, we cannot
3091 make a sibcall from a function that doesn't return a float to a
3092 function that does or, conversely, from a function that does return
3093 a float to a function that doesn't; the necessary stack adjustment
3094 would not be executed. This is also the place we notice
3095 differences in the return value ABI. Note that it is ok for one
3096 of the functions to have void return type as long as the return
3097 value of the other is passed in a register. */
3098 a = ix86_function_value (TREE_TYPE (exp), func, false);
3099 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
3101 if (STACK_REG_P (a) || STACK_REG_P (b))
3103 if (!rtx_equal_p (a, b))
3106 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
3108 else if (!rtx_equal_p (a, b))
3111 /* If this call is indirect, we'll need to be able to use a call-clobbered
3112 register for the address of the target function. Make sure that all
3113 such registers are not used for passing parameters. */
3114 if (!decl && !TARGET_64BIT)
3118 /* We're looking at the CALL_EXPR, we need the type of the function. */
3119 type = CALL_EXPR_FN (exp); /* pointer expression */
3120 type = TREE_TYPE (type); /* pointer type */
3121 type = TREE_TYPE (type); /* function type */
3123 if (ix86_function_regparm (type, NULL) >= 3)
3125 /* ??? Need to count the actual number of registers to be used,
3126 not the possible number of registers. Fix later. */
3131 /* Dllimport'd functions are also called indirectly. */
3132 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
3133 && decl && DECL_DLLIMPORT_P (decl)
3134 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
3137 /* If we forced aligned the stack, then sibcalling would unalign the
3138 stack, which may break the called function. */
3139 if (cfun->machine->force_align_arg_pointer)
3142 /* Otherwise okay. That also includes certain types of indirect calls. */
3146 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
3147 calling convention attributes;
3148 arguments as in struct attribute_spec.handler. */
3151 ix86_handle_cconv_attribute (tree *node, tree name,
3153 int flags ATTRIBUTE_UNUSED,
3156 if (TREE_CODE (*node) != FUNCTION_TYPE
3157 && TREE_CODE (*node) != METHOD_TYPE
3158 && TREE_CODE (*node) != FIELD_DECL
3159 && TREE_CODE (*node) != TYPE_DECL)
3161 warning (OPT_Wattributes, "%qs attribute only applies to functions",
3162 IDENTIFIER_POINTER (name));
3163 *no_add_attrs = true;
3167 /* Can combine regparm with all attributes but fastcall. */
3168 if (is_attribute_p ("regparm", name))
3172 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3174 error ("fastcall and regparm attributes are not compatible");
3177 cst = TREE_VALUE (args);
3178 if (TREE_CODE (cst) != INTEGER_CST)
3180 warning (OPT_Wattributes,
3181 "%qs attribute requires an integer constant argument",
3182 IDENTIFIER_POINTER (name));
3183 *no_add_attrs = true;
3185 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
3187 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
3188 IDENTIFIER_POINTER (name), REGPARM_MAX);
3189 *no_add_attrs = true;
3193 && lookup_attribute (ix86_force_align_arg_pointer_string,
3194 TYPE_ATTRIBUTES (*node))
3195 && compare_tree_int (cst, REGPARM_MAX-1))
3197 error ("%s functions limited to %d register parameters",
3198 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
3206 /* Do not warn when emulating the MS ABI. */
3207 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
3208 warning (OPT_Wattributes, "%qs attribute ignored",
3209 IDENTIFIER_POINTER (name));
3210 *no_add_attrs = true;
3214 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
3215 if (is_attribute_p ("fastcall", name))
3217 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3219 error ("fastcall and cdecl attributes are not compatible");
3221 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3223 error ("fastcall and stdcall attributes are not compatible");
3225 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
3227 error ("fastcall and regparm attributes are not compatible");
3231 /* Can combine stdcall with fastcall (redundant), regparm and
3233 else if (is_attribute_p ("stdcall", name))
3235 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
3237 error ("stdcall and cdecl attributes are not compatible");
3239 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3241 error ("stdcall and fastcall attributes are not compatible");
3245 /* Can combine cdecl with regparm and sseregparm. */
3246 else if (is_attribute_p ("cdecl", name))
3248 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
3250 error ("stdcall and cdecl attributes are not compatible");
3252 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
3254 error ("fastcall and cdecl attributes are not compatible");
3258 /* Can combine sseregparm with all attributes. */
3263 /* Return 0 if the attributes for two types are incompatible, 1 if they
3264 are compatible, and 2 if they are nearly compatible (which causes a
3265 warning to be generated). */
3268 ix86_comp_type_attributes (const_tree type1, const_tree type2)
3270 /* Check for mismatch of non-default calling convention. */
3271 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
3273 if (TREE_CODE (type1) != FUNCTION_TYPE
3274 && TREE_CODE (type1) != METHOD_TYPE)
3277 /* Check for mismatched fastcall/regparm types. */
3278 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
3279 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
3280 || (ix86_function_regparm (type1, NULL)
3281 != ix86_function_regparm (type2, NULL)))
3284 /* Check for mismatched sseregparm types. */
3285 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
3286 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
3289 /* Check for mismatched return types (cdecl vs stdcall). */
3290 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
3291 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
3297 /* Return the regparm value for a function with the indicated TYPE and DECL.
3298 DECL may be NULL when calling function indirectly
3299 or considering a libcall. */
3302 ix86_function_regparm (const_tree type, const_tree decl)
3305 int regparm = ix86_regparm;
3307 static bool error_issued;
3311 if (ix86_function_type_abi (type) == DEFAULT_ABI)
3313 return DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
3316 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3320 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3322 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
3324 /* We can't use regparm(3) for nested functions because
3325 these pass static chain pointer in %ecx register. */
3326 if (!error_issued && regparm == 3
3327 && decl_function_context (decl)
3328 && !DECL_NO_STATIC_CHAIN (decl))
3330 error ("nested functions are limited to 2 register parameters");
3331 error_issued = true;
3339 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3342 /* Use register calling convention for local functions when possible. */
3343 if (decl && TREE_CODE (decl) == FUNCTION_DECL
3344 && flag_unit_at_a_time && !profile_flag)
3346 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3347 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3350 int local_regparm, globals = 0, regno;
3353 /* Make sure no regparm register is taken by a
3354 fixed register variable. */
3355 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
3356 if (fixed_regs[local_regparm])
3359 /* We can't use regparm(3) for nested functions as these use
3360 static chain pointer in third argument. */
3361 if (local_regparm == 3
3362 && (decl_function_context (decl)
3363 || ix86_force_align_arg_pointer)
3364 && !DECL_NO_STATIC_CHAIN (decl))
3367 /* If the function realigns its stackpointer, the prologue will
3368 clobber %ecx. If we've already generated code for the callee,
3369 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3370 scanning the attributes for the self-realigning property. */
3371 f = DECL_STRUCT_FUNCTION (decl);
3372 if (local_regparm == 3
3373 && (f ? !!f->machine->force_align_arg_pointer
3374 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3375 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3378 /* Each fixed register usage increases register pressure,
3379 so less registers should be used for argument passing.
3380 This functionality can be overriden by an explicit
3382 for (regno = 0; regno <= DI_REG; regno++)
3383 if (fixed_regs[regno])
3387 = globals < local_regparm ? local_regparm - globals : 0;
3389 if (local_regparm > regparm)
3390 regparm = local_regparm;
3397 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3398 DFmode (2) arguments in SSE registers for a function with the
3399 indicated TYPE and DECL. DECL may be NULL when calling function
3400 indirectly or considering a libcall. Otherwise return 0. */
3403 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
3405 gcc_assert (!TARGET_64BIT);
3407 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3408 by the sseregparm attribute. */
3409 if (TARGET_SSEREGPARM
3410 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3417 error ("Calling %qD with attribute sseregparm without "
3418 "SSE/SSE2 enabled", decl);
3420 error ("Calling %qT with attribute sseregparm without "
3421 "SSE/SSE2 enabled", type);
3429 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3430 (and DFmode for SSE2) arguments in SSE registers. */
3431 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3433 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3434 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3436 return TARGET_SSE2 ? 2 : 1;
3442 /* Return true if EAX is live at the start of the function. Used by
3443 ix86_expand_prologue to determine if we need special help before
3444 calling allocate_stack_worker. */
3447 ix86_eax_live_at_start_p (void)
3449 /* Cheat. Don't bother working forward from ix86_function_regparm
3450 to the function type to whether an actual argument is located in
3451 eax. Instead just look at cfg info, which is still close enough
3452 to correct at this point. This gives false positives for broken
3453 functions that might use uninitialized data that happens to be
3454 allocated in eax, but who cares? */
3455 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3458 /* Value is the number of bytes of arguments automatically
3459 popped when returning from a subroutine call.
3460 FUNDECL is the declaration node of the function (as a tree),
3461 FUNTYPE is the data type of the function (as a tree),
3462 or for a library call it is an identifier node for the subroutine name.
3463 SIZE is the number of bytes of arguments passed on the stack.
3465 On the 80386, the RTD insn may be used to pop them if the number
3466 of args is fixed, but if the number is variable then the caller
3467 must pop them all. RTD can't be used for library calls now
3468 because the library is compiled with the Unix compiler.
3469 Use of RTD is a selectable option, since it is incompatible with
3470 standard Unix calling sequences. If the option is not selected,
3471 the caller must always pop the args.
3473 The attribute stdcall is equivalent to RTD on a per module basis. */
3476 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3480 /* None of the 64-bit ABIs pop arguments. */
3484 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3486 /* Cdecl functions override -mrtd, and never pop the stack. */
3487 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3489 /* Stdcall and fastcall functions will pop the stack if not
3491 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3492 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3495 if (rtd && ! stdarg_p (funtype))
3499 /* Lose any fake structure return argument if it is passed on the stack. */
3500 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3501 && !KEEP_AGGREGATE_RETURN_POINTER)
3503 int nregs = ix86_function_regparm (funtype, fundecl);
3505 return GET_MODE_SIZE (Pmode);
3511 /* Argument support functions. */
3513 /* Return true when register may be used to pass function parameters. */
3515 ix86_function_arg_regno_p (int regno)
3518 const int *parm_regs;
3523 return (regno < REGPARM_MAX
3524 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3526 return (regno < REGPARM_MAX
3527 || (TARGET_MMX && MMX_REGNO_P (regno)
3528 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3529 || (TARGET_SSE && SSE_REGNO_P (regno)
3530 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3535 if (SSE_REGNO_P (regno) && TARGET_SSE)
3540 if (TARGET_SSE && SSE_REGNO_P (regno)
3541 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3545 /* TODO: The function should depend on current function ABI but
3546 builtins.c would need updating then. Therefore we use the
3549 /* RAX is used as hidden argument to va_arg functions. */
3550 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
3553 if (DEFAULT_ABI == MS_ABI)
3554 parm_regs = x86_64_ms_abi_int_parameter_registers;
3556 parm_regs = x86_64_int_parameter_registers;
3557 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
3558 : X86_64_REGPARM_MAX); i++)
3559 if (regno == parm_regs[i])
3564 /* Return if we do not know how to pass TYPE solely in registers. */
3567 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3569 if (must_pass_in_stack_var_size_or_pad (mode, type))
3572 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3573 The layout_type routine is crafty and tries to trick us into passing
3574 currently unsupported vector types on the stack by using TImode. */
3575 return (!TARGET_64BIT && mode == TImode
3576 && type && TREE_CODE (type) != VECTOR_TYPE);
3579 /* It returns the size, in bytes, of the area reserved for arguments passed
3580 in registers for the function represented by fndecl dependent to the used
3583 ix86_reg_parm_stack_space (const_tree fndecl)
3586 /* For libcalls it is possible that there is no fndecl at hand.
3587 Therefore assume for this case the default abi of the target. */
3589 call_abi = DEFAULT_ABI;
3591 call_abi = ix86_function_abi (fndecl);
3597 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
3600 ix86_function_type_abi (const_tree fntype)
3602 if (TARGET_64BIT && fntype != NULL)
3605 if (DEFAULT_ABI == SYSV_ABI)
3606 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
3608 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
3616 ix86_function_abi (const_tree fndecl)
3620 return ix86_function_type_abi (TREE_TYPE (fndecl));
3623 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
3626 ix86_cfun_abi (void)
3628 if (! cfun || ! TARGET_64BIT)
3630 return cfun->machine->call_abi;
3634 extern void init_regs (void);
3636 /* Implementation of call abi switching target hook. Specific to FNDECL
3637 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
3639 To prevent redudant calls of costy function init_regs (), it checks not to
3640 reset register usage for default abi. */
3642 ix86_call_abi_override (const_tree fndecl)
3644 if (fndecl == NULL_TREE)
3645 cfun->machine->call_abi = DEFAULT_ABI;
3647 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
3648 if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
3650 if (call_used_regs[4 /*RSI*/] != 0 || call_used_regs[5 /*RDI*/] != 0)
3652 call_used_regs[4 /*RSI*/] = 0;
3653 call_used_regs[5 /*RDI*/] = 0;
3657 else if (TARGET_64BIT)
3659 if (call_used_regs[4 /*RSI*/] != 1 || call_used_regs[5 /*RDI*/] != 1)
3661 call_used_regs[4 /*RSI*/] = 1;
3662 call_used_regs[5 /*RDI*/] = 1;
3668 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3669 for a call to a function whose data type is FNTYPE.
3670 For a library call, FNTYPE is 0. */
3673 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3674 tree fntype, /* tree ptr for function decl */
3675 rtx libname, /* SYMBOL_REF of library name or 0 */
3678 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
3679 memset (cum, 0, sizeof (*cum));
3681 cum->call_abi = ix86_function_type_abi (fntype);
3682 /* Set up the number of registers to use for passing arguments. */
3683 cum->nregs = ix86_regparm;
3686 if (cum->call_abi != DEFAULT_ABI)
3687 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
3692 cum->sse_nregs = SSE_REGPARM_MAX;
3695 if (cum->call_abi != DEFAULT_ABI)
3696 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
3697 : X64_SSE_REGPARM_MAX;
3701 cum->mmx_nregs = MMX_REGPARM_MAX;
3702 cum->warn_sse = true;
3703 cum->warn_mmx = true;
3705 /* Because type might mismatch in between caller and callee, we need to
3706 use actual type of function for local calls.
3707 FIXME: cgraph_analyze can be told to actually record if function uses
3708 va_start so for local functions maybe_vaarg can be made aggressive
3710 FIXME: once typesytem is fixed, we won't need this code anymore. */
3712 fntype = TREE_TYPE (fndecl);
3713 cum->maybe_vaarg = (fntype
3714 ? (!prototype_p (fntype) || stdarg_p (fntype))
3719 /* If there are variable arguments, then we won't pass anything
3720 in registers in 32-bit mode. */
3721 if (stdarg_p (fntype))
3731 /* Use ecx and edx registers if function has fastcall attribute,
3732 else look for regparm information. */
3735 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3741 cum->nregs = ix86_function_regparm (fntype, fndecl);
3744 /* Set up the number of SSE registers used for passing SFmode
3745 and DFmode arguments. Warn for mismatching ABI. */
3746 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
3750 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3751 But in the case of vector types, it is some vector mode.
3753 When we have only some of our vector isa extensions enabled, then there
3754 are some modes for which vector_mode_supported_p is false. For these
3755 modes, the generic vector support in gcc will choose some non-vector mode
3756 in order to implement the type. By computing the natural mode, we'll
3757 select the proper ABI location for the operand and not depend on whatever
3758 the middle-end decides to do with these vector types. */
3760 static enum machine_mode
3761 type_natural_mode (const_tree type)
3763 enum machine_mode mode = TYPE_MODE (type);
3765 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3767 HOST_WIDE_INT size = int_size_in_bytes (type);
3768 if ((size == 8 || size == 16)
3769 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3770 && TYPE_VECTOR_SUBPARTS (type) > 1)
3772 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3774 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3775 mode = MIN_MODE_VECTOR_FLOAT;
3777 mode = MIN_MODE_VECTOR_INT;
3779 /* Get the mode which has this inner mode and number of units. */
3780 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3781 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3782 && GET_MODE_INNER (mode) == innermode)
3792 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3793 this may not agree with the mode that the type system has chosen for the
3794 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3795 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3798 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3803 if (orig_mode != BLKmode)
3804 tmp = gen_rtx_REG (orig_mode, regno);
3807 tmp = gen_rtx_REG (mode, regno);
3808 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3809 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3815 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3816 of this code is to classify each 8bytes of incoming argument by the register
3817 class and assign registers accordingly. */
3819 /* Return the union class of CLASS1 and CLASS2.
3820 See the x86-64 PS ABI for details. */
3822 static enum x86_64_reg_class
3823 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3825 /* Rule #1: If both classes are equal, this is the resulting class. */
3826 if (class1 == class2)
3829 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3831 if (class1 == X86_64_NO_CLASS)
3833 if (class2 == X86_64_NO_CLASS)
3836 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3837 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3838 return X86_64_MEMORY_CLASS;
3840 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3841 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3842 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3843 return X86_64_INTEGERSI_CLASS;
3844 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3845 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3846 return X86_64_INTEGER_CLASS;
3848 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3850 if (class1 == X86_64_X87_CLASS
3851 || class1 == X86_64_X87UP_CLASS
3852 || class1 == X86_64_COMPLEX_X87_CLASS
3853 || class2 == X86_64_X87_CLASS
3854 || class2 == X86_64_X87UP_CLASS
3855 || class2 == X86_64_COMPLEX_X87_CLASS)
3856 return X86_64_MEMORY_CLASS;
3858 /* Rule #6: Otherwise class SSE is used. */
3859 return X86_64_SSE_CLASS;
3862 /* Classify the argument of type TYPE and mode MODE.
3863 CLASSES will be filled by the register class used to pass each word
3864 of the operand. The number of words is returned. In case the parameter
3865 should be passed in memory, 0 is returned. As a special case for zero
3866 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3868 BIT_OFFSET is used internally for handling records and specifies offset
3869 of the offset in bits modulo 256 to avoid overflow cases.
3871 See the x86-64 PS ABI for details.
3875 classify_argument (enum machine_mode mode, const_tree type,
3876 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3878 HOST_WIDE_INT bytes =
3879 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3880 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3882 /* Variable sized entities are always passed/returned in memory. */
3886 if (mode != VOIDmode
3887 && targetm.calls.must_pass_in_stack (mode, type))
3890 if (type && AGGREGATE_TYPE_P (type))
3894 enum x86_64_reg_class subclasses[MAX_CLASSES];
3896 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3900 for (i = 0; i < words; i++)
3901 classes[i] = X86_64_NO_CLASS;
3903 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3904 signalize memory class, so handle it as special case. */
3907 classes[0] = X86_64_NO_CLASS;
3911 /* Classify each field of record and merge classes. */
3912 switch (TREE_CODE (type))
3915 /* And now merge the fields of structure. */
3916 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3918 if (TREE_CODE (field) == FIELD_DECL)
3922 if (TREE_TYPE (field) == error_mark_node)
3925 /* Bitfields are always classified as integer. Handle them
3926 early, since later code would consider them to be
3927 misaligned integers. */
3928 if (DECL_BIT_FIELD (field))
3930 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3931 i < ((int_bit_position (field) + (bit_offset % 64))
3932 + tree_low_cst (DECL_SIZE (field), 0)
3935 merge_classes (X86_64_INTEGER_CLASS,
3940 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3941 TREE_TYPE (field), subclasses,
3942 (int_bit_position (field)
3943 + bit_offset) % 256);
3946 for (i = 0; i < num; i++)
3949 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3951 merge_classes (subclasses[i], classes[i + pos]);
3959 /* Arrays are handled as small records. */
3962 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3963 TREE_TYPE (type), subclasses, bit_offset);
3967 /* The partial classes are now full classes. */
3968 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3969 subclasses[0] = X86_64_SSE_CLASS;
3970 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3971 subclasses[0] = X86_64_INTEGER_CLASS;
3973 for (i = 0; i < words; i++)
3974 classes[i] = subclasses[i % num];
3979 case QUAL_UNION_TYPE:
3980 /* Unions are similar to RECORD_TYPE but offset is always 0.
3982 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3984 if (TREE_CODE (field) == FIELD_DECL)
3988 if (TREE_TYPE (field) == error_mark_node)
3991 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3992 TREE_TYPE (field), subclasses,
3996 for (i = 0; i < num; i++)
3997 classes[i] = merge_classes (subclasses[i], classes[i]);
4006 /* Final merger cleanup. */
4007 for (i = 0; i < words; i++)
4009 /* If one class is MEMORY, everything should be passed in
4011 if (classes[i] == X86_64_MEMORY_CLASS)
4014 /* The X86_64_SSEUP_CLASS should be always preceded by
4015 X86_64_SSE_CLASS. */
4016 if (classes[i] == X86_64_SSEUP_CLASS
4017 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
4018 classes[i] = X86_64_SSE_CLASS;
4020 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
4021 if (classes[i] == X86_64_X87UP_CLASS
4022 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
4023 classes[i] = X86_64_SSE_CLASS;
4028 /* Compute alignment needed. We align all types to natural boundaries with
4029 exception of XFmode that is aligned to 64bits. */
4030 if (mode != VOIDmode && mode != BLKmode)
4032 int mode_alignment = GET_MODE_BITSIZE (mode);
4035 mode_alignment = 128;
4036 else if (mode == XCmode)
4037 mode_alignment = 256;
4038 if (COMPLEX_MODE_P (mode))
4039 mode_alignment /= 2;
4040 /* Misaligned fields are always returned in memory. */
4041 if (bit_offset % mode_alignment)
4045 /* for V1xx modes, just use the base mode */
4046 if (VECTOR_MODE_P (mode) && mode != V1DImode
4047 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
4048 mode = GET_MODE_INNER (mode);
4050 /* Classification of atomic types. */
4055 classes[0] = X86_64_SSE_CLASS;
4058 classes[0] = X86_64_SSE_CLASS;
4059 classes[1] = X86_64_SSEUP_CLASS;
4068 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4069 classes[0] = X86_64_INTEGERSI_CLASS;
4071 classes[0] = X86_64_INTEGER_CLASS;
4075 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
4080 if (!(bit_offset % 64))
4081 classes[0] = X86_64_SSESF_CLASS;
4083 classes[0] = X86_64_SSE_CLASS;
4086 classes[0] = X86_64_SSEDF_CLASS;
4089 classes[0] = X86_64_X87_CLASS;
4090 classes[1] = X86_64_X87UP_CLASS;
4093 classes[0] = X86_64_SSE_CLASS;
4094 classes[1] = X86_64_SSEUP_CLASS;
4097 classes[0] = X86_64_SSE_CLASS;
4100 classes[0] = X86_64_SSEDF_CLASS;
4101 classes[1] = X86_64_SSEDF_CLASS;
4104 classes[0] = X86_64_COMPLEX_X87_CLASS;
4107 /* This modes is larger than 16 bytes. */
4115 classes[0] = X86_64_SSE_CLASS;
4116 classes[1] = X86_64_SSEUP_CLASS;
4123 classes[0] = X86_64_SSE_CLASS;
4129 gcc_assert (VECTOR_MODE_P (mode));
4134 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
4136 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
4137 classes[0] = X86_64_INTEGERSI_CLASS;
4139 classes[0] = X86_64_INTEGER_CLASS;
4140 classes[1] = X86_64_INTEGER_CLASS;
4141 return 1 + (bytes > 8);
4145 /* Examine the argument and return set number of register required in each
4146 class. Return 0 iff parameter should be passed in memory. */
4148 examine_argument (enum machine_mode mode, const_tree type, int in_return,
4149 int *int_nregs, int *sse_nregs)
4151 enum x86_64_reg_class regclass[MAX_CLASSES];
4152 int n = classify_argument (mode, type, regclass, 0);
4158 for (n--; n >= 0; n--)
4159 switch (regclass[n])
4161 case X86_64_INTEGER_CLASS:
4162 case X86_64_INTEGERSI_CLASS:
4165 case X86_64_SSE_CLASS:
4166 case X86_64_SSESF_CLASS:
4167 case X86_64_SSEDF_CLASS:
4170 case X86_64_NO_CLASS:
4171 case X86_64_SSEUP_CLASS:
4173 case X86_64_X87_CLASS:
4174 case X86_64_X87UP_CLASS:
4178 case X86_64_COMPLEX_X87_CLASS:
4179 return in_return ? 2 : 0;
4180 case X86_64_MEMORY_CLASS:
4186 /* Construct container for the argument used by GCC interface. See
4187 FUNCTION_ARG for the detailed description. */
4190 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
4191 const_tree type, int in_return, int nintregs, int nsseregs,
4192 const int *intreg, int sse_regno)
4194 /* The following variables hold the static issued_error state. */
4195 static bool issued_sse_arg_error;
4196 static bool issued_sse_ret_error;
4197 static bool issued_x87_ret_error;
4199 enum machine_mode tmpmode;
4201 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4202 enum x86_64_reg_class regclass[MAX_CLASSES];
4206 int needed_sseregs, needed_intregs;
4207 rtx exp[MAX_CLASSES];
4210 n = classify_argument (mode, type, regclass, 0);
4213 if (!examine_argument (mode, type, in_return, &needed_intregs,
4216 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
4219 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
4220 some less clueful developer tries to use floating-point anyway. */
4221 if (needed_sseregs && !TARGET_SSE)
4225 if (!issued_sse_ret_error)
4227 error ("SSE register return with SSE disabled");
4228 issued_sse_ret_error = true;
4231 else if (!issued_sse_arg_error)
4233 error ("SSE register argument with SSE disabled");
4234 issued_sse_arg_error = true;
4239 /* Likewise, error if the ABI requires us to return values in the
4240 x87 registers and the user specified -mno-80387. */
4241 if (!TARGET_80387 && in_return)
4242 for (i = 0; i < n; i++)
4243 if (regclass[i] == X86_64_X87_CLASS
4244 || regclass[i] == X86_64_X87UP_CLASS
4245 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
4247 if (!issued_x87_ret_error)
4249 error ("x87 register return with x87 disabled");
4250 issued_x87_ret_error = true;
4255 /* First construct simple cases. Avoid SCmode, since we want to use
4256 single register to pass this type. */
4257 if (n == 1 && mode != SCmode)
4258 switch (regclass[0])
4260 case X86_64_INTEGER_CLASS:
4261 case X86_64_INTEGERSI_CLASS:
4262 return gen_rtx_REG (mode, intreg[0]);
4263 case X86_64_SSE_CLASS:
4264 case X86_64_SSESF_CLASS:
4265 case X86_64_SSEDF_CLASS:
4266 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
4267 case X86_64_X87_CLASS:
4268 case X86_64_COMPLEX_X87_CLASS:
4269 return gen_rtx_REG (mode, FIRST_STACK_REG);
4270 case X86_64_NO_CLASS:
4271 /* Zero sized array, struct or class. */
4276 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
4277 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
4278 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
4281 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
4282 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
4283 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
4284 && regclass[1] == X86_64_INTEGER_CLASS
4285 && (mode == CDImode || mode == TImode || mode == TFmode)
4286 && intreg[0] + 1 == intreg[1])
4287 return gen_rtx_REG (mode, intreg[0]);
4289 /* Otherwise figure out the entries of the PARALLEL. */
4290 for (i = 0; i < n; i++)
4292 switch (regclass[i])
4294 case X86_64_NO_CLASS:
4296 case X86_64_INTEGER_CLASS:
4297 case X86_64_INTEGERSI_CLASS:
4298 /* Merge TImodes on aligned occasions here too. */
4299 if (i * 8 + 8 > bytes)
4300 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
4301 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
4305 /* We've requested 24 bytes we don't have mode for. Use DImode. */
4306 if (tmpmode == BLKmode)
4308 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4309 gen_rtx_REG (tmpmode, *intreg),
4313 case X86_64_SSESF_CLASS:
4314 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4315 gen_rtx_REG (SFmode,
4316 SSE_REGNO (sse_regno)),
4320 case X86_64_SSEDF_CLASS:
4321 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4322 gen_rtx_REG (DFmode,
4323 SSE_REGNO (sse_regno)),
4327 case X86_64_SSE_CLASS:
4328 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
4332 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
4333 gen_rtx_REG (tmpmode,
4334 SSE_REGNO (sse_regno)),
4336 if (tmpmode == TImode)
4345 /* Empty aligned struct, union or class. */
4349 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
4350 for (i = 0; i < nexps; i++)
4351 XVECEXP (ret, 0, i) = exp [i];
4355 /* Update the data in CUM to advance over an argument of mode MODE
4356 and data type TYPE. (TYPE is null for libcalls where that information
4357 may not be available.) */
4360 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4361 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4377 cum->words += words;
4378 cum->nregs -= words;
4379 cum->regno += words;
4381 if (cum->nregs <= 0)
4389 if (cum->float_in_sse < 2)
4392 if (cum->float_in_sse < 1)
4403 if (!type || !AGGREGATE_TYPE_P (type))
4405 cum->sse_words += words;
4406 cum->sse_nregs -= 1;
4407 cum->sse_regno += 1;
4408 if (cum->sse_nregs <= 0)
4421 if (!type || !AGGREGATE_TYPE_P (type))
4423 cum->mmx_words += words;
4424 cum->mmx_nregs -= 1;
4425 cum->mmx_regno += 1;
4426 if (cum->mmx_nregs <= 0)
4437 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4438 tree type, HOST_WIDE_INT words)
4440 int int_nregs, sse_nregs;
4442 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
4443 cum->words += words;
4444 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
4446 cum->nregs -= int_nregs;
4447 cum->sse_nregs -= sse_nregs;
4448 cum->regno += int_nregs;
4449 cum->sse_regno += sse_nregs;
4452 cum->words += words;
4456 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
4457 HOST_WIDE_INT words)
4459 /* Otherwise, this should be passed indirect. */
4460 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
4462 cum->words += words;
4471 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4472 tree type, int named ATTRIBUTE_UNUSED)
4474 HOST_WIDE_INT bytes, words;
4476 if (mode == BLKmode)
4477 bytes = int_size_in_bytes (type);
4479 bytes = GET_MODE_SIZE (mode);
4480 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4483 mode = type_natural_mode (type);
4485 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4486 function_arg_advance_ms_64 (cum, bytes, words);
4487 else if (TARGET_64BIT)
4488 function_arg_advance_64 (cum, mode, type, words);
4490 function_arg_advance_32 (cum, mode, type, bytes, words);
4493 /* Define where to put the arguments to a function.
4494 Value is zero to push the argument on the stack,
4495 or a hard register in which to store the argument.
4497 MODE is the argument's machine mode.
4498 TYPE is the data type of the argument (as a tree).
4499 This is null for libcalls where that information may
4501 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4502 the preceding args and about the function being called.
4503 NAMED is nonzero if this argument is a named parameter
4504 (otherwise it is an extra parameter matching an ellipsis). */
4507 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4508 enum machine_mode orig_mode, tree type,
4509 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4511 static bool warnedsse, warnedmmx;
4513 /* Avoid the AL settings for the Unix64 ABI. */
4514 if (mode == VOIDmode)
4530 if (words <= cum->nregs)
4532 int regno = cum->regno;
4534 /* Fastcall allocates the first two DWORD (SImode) or
4535 smaller arguments to ECX and EDX if it isn't an
4541 || (type && AGGREGATE_TYPE_P (type)))
4544 /* ECX not EAX is the first allocated register. */
4545 if (regno == AX_REG)
4548 return gen_rtx_REG (mode, regno);
4553 if (cum->float_in_sse < 2)
4556 if (cum->float_in_sse < 1)
4566 if (!type || !AGGREGATE_TYPE_P (type))
4568 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4571 warning (0, "SSE vector argument without SSE enabled "
4575 return gen_reg_or_parallel (mode, orig_mode,
4576 cum->sse_regno + FIRST_SSE_REG);
4585 if (!type || !AGGREGATE_TYPE_P (type))
4587 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4590 warning (0, "MMX vector argument without MMX enabled "
4594 return gen_reg_or_parallel (mode, orig_mode,
4595 cum->mmx_regno + FIRST_MMX_REG);
4604 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4605 enum machine_mode orig_mode, tree type)
4607 /* Handle a hidden AL argument containing number of registers
4608 for varargs x86-64 functions. */
4609 if (mode == VOIDmode)
4610 return GEN_INT (cum->maybe_vaarg
4611 ? (cum->sse_nregs < 0
4612 ? (cum->call_abi == DEFAULT_ABI
4614 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4615 : X64_SSE_REGPARM_MAX))
4619 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4621 &x86_64_int_parameter_registers [cum->regno],
4626 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4627 enum machine_mode orig_mode, int named,
4628 HOST_WIDE_INT bytes)
4632 /* Avoid the AL settings for the Unix64 ABI. */
4633 if (mode == VOIDmode)
4636 /* If we've run out of registers, it goes on the stack. */
4637 if (cum->nregs == 0)
4640 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4642 /* Only floating point modes are passed in anything but integer regs. */
4643 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4646 regno = cum->regno + FIRST_SSE_REG;
4651 /* Unnamed floating parameters are passed in both the
4652 SSE and integer registers. */
4653 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4654 t2 = gen_rtx_REG (mode, regno);
4655 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4656 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4657 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4660 /* Handle aggregated types passed in register. */
4661 if (orig_mode == BLKmode)
4663 if (bytes > 0 && bytes <= 8)
4664 mode = (bytes > 4 ? DImode : SImode);
4665 if (mode == BLKmode)
4669 return gen_reg_or_parallel (mode, orig_mode, regno);
4673 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4674 tree type, int named)
4676 enum machine_mode mode = omode;
4677 HOST_WIDE_INT bytes, words;
4679 if (mode == BLKmode)
4680 bytes = int_size_in_bytes (type);
4682 bytes = GET_MODE_SIZE (mode);
4683 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4685 /* To simplify the code below, represent vector types with a vector mode
4686 even if MMX/SSE are not active. */
4687 if (type && TREE_CODE (type) == VECTOR_TYPE)
4688 mode = type_natural_mode (type);
4690 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4691 return function_arg_ms_64 (cum, mode, omode, named, bytes);
4692 else if (TARGET_64BIT)
4693 return function_arg_64 (cum, mode, omode, type);
4695 return function_arg_32 (cum, mode, omode, type, bytes, words);
4698 /* A C expression that indicates when an argument must be passed by
4699 reference. If nonzero for an argument, a copy of that argument is
4700 made in memory and a pointer to the argument is passed instead of
4701 the argument itself. The pointer is passed in whatever way is
4702 appropriate for passing a pointer to that type. */
4705 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4706 enum machine_mode mode ATTRIBUTE_UNUSED,
4707 const_tree type, bool named ATTRIBUTE_UNUSED)
4709 /* See Windows x64 Software Convention. */
4710 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
4712 int msize = (int) GET_MODE_SIZE (mode);
4715 /* Arrays are passed by reference. */
4716 if (TREE_CODE (type) == ARRAY_TYPE)
4719 if (AGGREGATE_TYPE_P (type))
4721 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4722 are passed by reference. */
4723 msize = int_size_in_bytes (type);
4727 /* __m128 is passed by reference. */
4729 case 1: case 2: case 4: case 8:
4735 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4741 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4744 contains_aligned_value_p (tree type)
4746 enum machine_mode mode = TYPE_MODE (type);
4747 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
4751 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4753 if (TYPE_ALIGN (type) < 128)
4756 if (AGGREGATE_TYPE_P (type))
4758 /* Walk the aggregates recursively. */
4759 switch (TREE_CODE (type))
4763 case QUAL_UNION_TYPE:
4767 /* Walk all the structure fields. */
4768 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4770 if (TREE_CODE (field) == FIELD_DECL
4771 && contains_aligned_value_p (TREE_TYPE (field)))
4778 /* Just for use if some languages passes arrays by value. */
4779 if (contains_aligned_value_p (TREE_TYPE (type)))
4790 /* Gives the alignment boundary, in bits, of an argument with the
4791 specified mode and type. */
4794 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4799 /* Since canonical type is used for call, we convert it to
4800 canonical type if needed. */
4801 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
4802 type = TYPE_CANONICAL (type);
4803 align = TYPE_ALIGN (type);
4806 align = GET_MODE_ALIGNMENT (mode);
4807 if (align < PARM_BOUNDARY)
4808 align = PARM_BOUNDARY;
4809 /* In 32bit, only _Decimal128 and __float128 are aligned to their
4810 natural boundaries. */
4811 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
4813 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4814 make an exception for SSE modes since these require 128bit
4817 The handling here differs from field_alignment. ICC aligns MMX
4818 arguments to 4 byte boundaries, while structure fields are aligned
4819 to 8 byte boundaries. */
4822 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
4823 align = PARM_BOUNDARY;
4827 if (!contains_aligned_value_p (type))
4828 align = PARM_BOUNDARY;
4831 if (align > BIGGEST_ALIGNMENT)
4832 align = BIGGEST_ALIGNMENT;
4836 /* Return true if N is a possible register number of function value. */
4839 ix86_function_value_regno_p (int regno)
4846 case FIRST_FLOAT_REG:
4847 /* TODO: The function should depend on current function ABI but
4848 builtins.c would need updating then. Therefore we use the
4850 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
4852 return TARGET_FLOAT_RETURNS_IN_80387;
4858 if (TARGET_MACHO || TARGET_64BIT)
4866 /* Define how to find the value returned by a function.
4867 VALTYPE is the data type of the value (as a tree).
4868 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4869 otherwise, FUNC is 0. */
4872 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4873 const_tree fntype, const_tree fn)
4877 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4878 we normally prevent this case when mmx is not available. However
4879 some ABIs may require the result to be returned like DImode. */
4880 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4881 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4883 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4884 we prevent this case when sse is not available. However some ABIs
4885 may require the result to be returned like integer TImode. */
4886 else if (mode == TImode
4887 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4888 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4890 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4891 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4892 regno = FIRST_FLOAT_REG;
4894 /* Most things go in %eax. */
4897 /* Override FP return register with %xmm0 for local functions when
4898 SSE math is enabled or for functions with sseregparm attribute. */
4899 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4901 int sse_level = ix86_function_sseregparm (fntype, fn, false);
4902 if ((sse_level >= 1 && mode == SFmode)
4903 || (sse_level == 2 && mode == DFmode))
4904 regno = FIRST_SSE_REG;
4907 return gen_rtx_REG (orig_mode, regno);
4911 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4916 /* Handle libcalls, which don't provide a type node. */
4917 if (valtype == NULL)
4929 return gen_rtx_REG (mode, FIRST_SSE_REG);
4932 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4936 return gen_rtx_REG (mode, AX_REG);
4940 ret = construct_container (mode, orig_mode, valtype, 1,
4941 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
4942 x86_64_int_return_registers, 0);
4944 /* For zero sized structures, construct_container returns NULL, but we
4945 need to keep rest of compiler happy by returning meaningful value. */
4947 ret = gen_rtx_REG (orig_mode, AX_REG);
4953 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4955 unsigned int regno = AX_REG;
4959 switch (GET_MODE_SIZE (mode))
4962 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
4963 && !COMPLEX_MODE_P (mode))
4964 regno = FIRST_SSE_REG;
4968 if (mode == SFmode || mode == DFmode)
4969 regno = FIRST_SSE_REG;
4975 return gen_rtx_REG (orig_mode, regno);
4979 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4980 enum machine_mode orig_mode, enum machine_mode mode)
4982 const_tree fn, fntype;
4985 if (fntype_or_decl && DECL_P (fntype_or_decl))
4986 fn = fntype_or_decl;
4987 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4989 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
4990 return function_value_ms_64 (orig_mode, mode);
4991 else if (TARGET_64BIT)
4992 return function_value_64 (orig_mode, mode, valtype);
4994 return function_value_32 (orig_mode, mode, fntype, fn);
4998 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4999 bool outgoing ATTRIBUTE_UNUSED)
5001 enum machine_mode mode, orig_mode;
5003 orig_mode = TYPE_MODE (valtype);
5004 mode = type_natural_mode (valtype);
5005 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
5009 ix86_libcall_value (enum machine_mode mode)
5011 return ix86_function_value_1 (NULL, NULL, mode, mode);
5014 /* Return true iff type is returned in memory. */
5016 static int ATTRIBUTE_UNUSED
5017 return_in_memory_32 (const_tree type, enum machine_mode mode)
5021 if (mode == BLKmode)
5024 size = int_size_in_bytes (type);
5026 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
5029 if (VECTOR_MODE_P (mode) || mode == TImode)
5031 /* User-created vectors small enough to fit in EAX. */
5035 /* MMX/3dNow values are returned in MM0,
5036 except when it doesn't exits. */
5038 return (TARGET_MMX ? 0 : 1);
5040 /* SSE values are returned in XMM0, except when it doesn't exist. */
5042 return (TARGET_SSE ? 0 : 1);
5053 static int ATTRIBUTE_UNUSED
5054 return_in_memory_64 (const_tree type, enum machine_mode mode)
5056 int needed_intregs, needed_sseregs;
5057 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
5060 static int ATTRIBUTE_UNUSED
5061 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
5063 HOST_WIDE_INT size = int_size_in_bytes (type);
5065 /* __m128 is returned in xmm0. */
5066 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
5067 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
5070 /* Otherwise, the size must be exactly in [1248]. */
5071 return (size != 1 && size != 2 && size != 4 && size != 8);
5075 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5077 #ifdef SUBTARGET_RETURN_IN_MEMORY
5078 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
5080 const enum machine_mode mode = type_natural_mode (type);
5082 if (TARGET_64BIT_MS_ABI)
5083 return return_in_memory_ms_64 (type, mode);
5084 else if (TARGET_64BIT)
5085 return return_in_memory_64 (type, mode);
5087 return return_in_memory_32 (type, mode);
5091 /* Return false iff TYPE is returned in memory. This version is used
5092 on Solaris 10. It is similar to the generic ix86_return_in_memory,
5093 but differs notably in that when MMX is available, 8-byte vectors
5094 are returned in memory, rather than in MMX registers. */
5097 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5100 enum machine_mode mode = type_natural_mode (type);
5103 return return_in_memory_64 (type, mode);
5105 if (mode == BLKmode)
5108 size = int_size_in_bytes (type);
5110 if (VECTOR_MODE_P (mode))
5112 /* Return in memory only if MMX registers *are* available. This
5113 seems backwards, but it is consistent with the existing
5120 else if (mode == TImode)
5122 else if (mode == XFmode)
5128 /* When returning SSE vector types, we have a choice of either
5129 (1) being abi incompatible with a -march switch, or
5130 (2) generating an error.
5131 Given no good solution, I think the safest thing is one warning.
5132 The user won't be able to use -Werror, but....
5134 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
5135 called in response to actually generating a caller or callee that
5136 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
5137 via aggregate_value_p for general type probing from tree-ssa. */
5140 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
5142 static bool warnedsse, warnedmmx;
5144 if (!TARGET_64BIT && type)
5146 /* Look at the return type of the function, not the function type. */
5147 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
5149 if (!TARGET_SSE && !warnedsse)
5152 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
5155 warning (0, "SSE vector return without SSE enabled "
5160 if (!TARGET_MMX && !warnedmmx)
5162 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
5165 warning (0, "MMX vector return without MMX enabled "
5175 /* Create the va_list data type. */
5177 /* Returns the calling convention specific va_list date type.
5178 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
5181 ix86_build_builtin_va_list_abi (enum calling_abi abi)
5183 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
5185 /* For i386 we use plain pointer to argument area. */
5186 if (!TARGET_64BIT || abi == MS_ABI)
5187 return build_pointer_type (char_type_node);
5189 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
5190 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
5192 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
5193 unsigned_type_node);
5194 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
5195 unsigned_type_node);
5196 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
5198 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
5201 va_list_gpr_counter_field = f_gpr;
5202 va_list_fpr_counter_field = f_fpr;
5204 DECL_FIELD_CONTEXT (f_gpr) = record;
5205 DECL_FIELD_CONTEXT (f_fpr) = record;
5206 DECL_FIELD_CONTEXT (f_ovf) = record;
5207 DECL_FIELD_CONTEXT (f_sav) = record;
5209 TREE_CHAIN (record) = type_decl;
5210 TYPE_NAME (record) = type_decl;
5211 TYPE_FIELDS (record) = f_gpr;
5212 TREE_CHAIN (f_gpr) = f_fpr;
5213 TREE_CHAIN (f_fpr) = f_ovf;
5214 TREE_CHAIN (f_ovf) = f_sav;
5216 layout_type (record);
5218 /* The correct type is an array type of one element. */
5219 return build_array_type (record, build_index_type (size_zero_node));
5222 /* Setup the builtin va_list data type and for 64-bit the additional
5223 calling convention specific va_list data types. */
5226 ix86_build_builtin_va_list (void)
5228 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
5230 /* Initialize abi specific va_list builtin types. */
5234 if (DEFAULT_ABI == MS_ABI)
5236 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
5237 if (TREE_CODE (t) != RECORD_TYPE)
5238 t = build_variant_type_copy (t);
5239 sysv_va_list_type_node = t;
5244 if (TREE_CODE (t) != RECORD_TYPE)
5245 t = build_variant_type_copy (t);
5246 sysv_va_list_type_node = t;
5248 if (DEFAULT_ABI != MS_ABI)
5250 t = ix86_build_builtin_va_list_abi (MS_ABI);
5251 if (TREE_CODE (t) != RECORD_TYPE)
5252 t = build_variant_type_copy (t);
5253 ms_va_list_type_node = t;
5258 if (TREE_CODE (t) != RECORD_TYPE)
5259 t = build_variant_type_copy (t);
5260 ms_va_list_type_node = t;
5267 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
5270 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
5279 int regparm = ix86_regparm;
5281 if((cum ? cum->call_abi : ix86_cfun_abi ()) != DEFAULT_ABI)
5282 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
5284 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
5287 /* Indicate to allocate space on the stack for varargs save area. */
5288 ix86_save_varrargs_registers = 1;
5289 /* We need 16-byte stack alignment to save SSE registers. If user
5290 asked for lower preferred_stack_boundary, lets just hope that he knows
5291 what he is doing and won't varargs SSE values.
5293 We also may end up assuming that only 64bit values are stored in SSE
5294 register let some floating point program work. */
5295 if (ix86_preferred_stack_boundary >= BIGGEST_ALIGNMENT)
5296 crtl->stack_alignment_needed = BIGGEST_ALIGNMENT;
5298 save_area = frame_pointer_rtx;
5299 set = get_varargs_alias_set ();
5301 for (i = cum->regno;
5303 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
5306 mem = gen_rtx_MEM (Pmode,
5307 plus_constant (save_area, i * UNITS_PER_WORD));
5308 MEM_NOTRAP_P (mem) = 1;
5309 set_mem_alias_set (mem, set);
5310 emit_move_insn (mem, gen_rtx_REG (Pmode,
5311 x86_64_int_parameter_registers[i]));
5314 if (cum->sse_nregs && cfun->va_list_fpr_size)
5316 /* Now emit code to save SSE registers. The AX parameter contains number
5317 of SSE parameter registers used to call this function. We use
5318 sse_prologue_save insn template that produces computed jump across
5319 SSE saves. We need some preparation work to get this working. */
5321 label = gen_label_rtx ();
5322 label_ref = gen_rtx_LABEL_REF (Pmode, label);
5324 /* Compute address to jump to :
5325 label - eax*4 + nnamed_sse_arguments*4 */
5326 tmp_reg = gen_reg_rtx (Pmode);
5327 nsse_reg = gen_reg_rtx (Pmode);
5328 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
5329 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5330 gen_rtx_MULT (Pmode, nsse_reg,
5335 gen_rtx_CONST (DImode,
5336 gen_rtx_PLUS (DImode,
5338 GEN_INT (cum->sse_regno * 4))));
5340 emit_move_insn (nsse_reg, label_ref);
5341 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
5343 /* Compute address of memory block we save into. We always use pointer
5344 pointing 127 bytes after first byte to store - this is needed to keep
5345 instruction size limited by 4 bytes. */
5346 tmp_reg = gen_reg_rtx (Pmode);
5347 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
5348 plus_constant (save_area,
5349 8 * X86_64_REGPARM_MAX + 127)));
5350 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
5351 MEM_NOTRAP_P (mem) = 1;
5352 set_mem_alias_set (mem, set);
5353 set_mem_align (mem, BITS_PER_WORD);
5355 /* And finally do the dirty job! */
5356 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
5357 GEN_INT (cum->sse_regno), label));
5362 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
5364 alias_set_type set = get_varargs_alias_set ();
5367 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
5371 mem = gen_rtx_MEM (Pmode,
5372 plus_constant (virtual_incoming_args_rtx,
5373 i * UNITS_PER_WORD));
5374 MEM_NOTRAP_P (mem) = 1;
5375 set_mem_alias_set (mem, set);
5377 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
5378 emit_move_insn (mem, reg);
5383 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5384 tree type, int *pretend_size ATTRIBUTE_UNUSED,
5387 CUMULATIVE_ARGS next_cum;
5390 /* This argument doesn't appear to be used anymore. Which is good,
5391 because the old code here didn't suppress rtl generation. */
5392 gcc_assert (!no_rtl);
5397 fntype = TREE_TYPE (current_function_decl);
5399 /* For varargs, we do not want to skip the dummy va_dcl argument.
5400 For stdargs, we do want to skip the last named argument. */
5402 if (stdarg_p (fntype))
5403 function_arg_advance (&next_cum, mode, type, 1);
5405 if ((cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5406 setup_incoming_varargs_ms_64 (&next_cum);
5408 setup_incoming_varargs_64 (&next_cum);
5411 /* Checks if TYPE is of kind va_list char *. */
5414 is_va_list_char_pointer (tree type)
5418 /* For 32-bit it is always true. */
5421 canonic = ix86_canonical_va_list_type (type);
5422 return (canonic == ms_va_list_type_node
5423 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
5426 /* Implement va_start. */
5429 ix86_va_start (tree valist, rtx nextarg)
5431 HOST_WIDE_INT words, n_gpr, n_fpr;
5432 tree f_gpr, f_fpr, f_ovf, f_sav;
5433 tree gpr, fpr, ovf, sav, t;
5436 /* Only 64bit target needs something special. */
5437 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
5439 std_expand_builtin_va_start (valist, nextarg);
5443 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
5444 f_fpr = TREE_CHAIN (f_gpr);
5445 f_ovf = TREE_CHAIN (f_fpr);
5446 f_sav = TREE_CHAIN (f_ovf);
5448 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
5449 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5450 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5451 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5452 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5454 /* Count number of gp and fp argument registers used. */
5455 words = crtl->args.info.words;
5456 n_gpr = crtl->args.info.regno;
5457 n_fpr = crtl->args.info.sse_regno;
5459 if (cfun->va_list_gpr_size)
5461 type = TREE_TYPE (gpr);
5462 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
5463 build_int_cst (type, n_gpr * 8));
5464 TREE_SIDE_EFFECTS (t) = 1;
5465 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5468 if (cfun->va_list_fpr_size)
5470 type = TREE_TYPE (fpr);
5471 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
5472 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
5473 TREE_SIDE_EFFECTS (t) = 1;
5474 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5477 /* Find the overflow area. */
5478 type = TREE_TYPE (ovf);
5479 t = make_tree (type, virtual_incoming_args_rtx);
5481 t = build2 (POINTER_PLUS_EXPR, type, t,
5482 size_int (words * UNITS_PER_WORD));
5483 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
5484 TREE_SIDE_EFFECTS (t) = 1;
5485 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5487 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
5489 /* Find the register save area.
5490 Prologue of the function save it right above stack frame. */
5491 type = TREE_TYPE (sav);
5492 t = make_tree (type, frame_pointer_rtx);
5493 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
5494 TREE_SIDE_EFFECTS (t) = 1;
5495 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
5499 /* Implement va_arg. */
5502 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
5504 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
5505 tree f_gpr, f_fpr, f_ovf, f_sav;
5506 tree gpr, fpr, ovf, sav, t;
5508 tree lab_false, lab_over = NULL_TREE;
5513 enum machine_mode nat_mode;
5516 /* Only 64bit target needs something special. */
5517 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
5518 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
5520 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
5521 f_fpr = TREE_CHAIN (f_gpr);
5522 f_ovf = TREE_CHAIN (f_fpr);
5523 f_sav = TREE_CHAIN (f_ovf);
5525 valist = build_va_arg_indirect_ref (valist);
5526 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
5527 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
5528 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
5529 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
5531 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
5533 type = build_pointer_type (type);
5534 size = int_size_in_bytes (type);
5535 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5537 nat_mode = type_natural_mode (type);
5538 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
5539 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
5542 /* Pull the value out of the saved registers. */
5544 addr = create_tmp_var (ptr_type_node, "addr");
5545 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
5549 int needed_intregs, needed_sseregs;
5551 tree int_addr, sse_addr;
5553 lab_false = create_artificial_label ();
5554 lab_over = create_artificial_label ();
5556 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
5558 need_temp = (!REG_P (container)
5559 && ((needed_intregs && TYPE_ALIGN (type) > 64)
5560 || TYPE_ALIGN (type) > 128));
5562 /* In case we are passing structure, verify that it is consecutive block
5563 on the register save area. If not we need to do moves. */
5564 if (!need_temp && !REG_P (container))
5566 /* Verify that all registers are strictly consecutive */
5567 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
5571 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5573 rtx slot = XVECEXP (container, 0, i);
5574 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
5575 || INTVAL (XEXP (slot, 1)) != i * 16)
5583 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
5585 rtx slot = XVECEXP (container, 0, i);
5586 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5587 || INTVAL (XEXP (slot, 1)) != i * 8)
5599 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5600 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5601 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5602 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5605 /* First ensure that we fit completely in registers. */
5608 t = build_int_cst (TREE_TYPE (gpr),
5609 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
5610 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5611 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5612 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5613 gimplify_and_add (t, pre_p);
5617 t = build_int_cst (TREE_TYPE (fpr),
5618 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5619 + X86_64_REGPARM_MAX * 8);
5620 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5621 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5622 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5623 gimplify_and_add (t, pre_p);
5626 /* Compute index to start of area used for integer regs. */
5629 /* int_addr = gpr + sav; */
5630 t = fold_convert (sizetype, gpr);
5631 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5632 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5633 gimplify_and_add (t, pre_p);
5637 /* sse_addr = fpr + sav; */
5638 t = fold_convert (sizetype, fpr);
5639 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5640 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5641 gimplify_and_add (t, pre_p);
5646 tree temp = create_tmp_var (type, "va_arg_tmp");
5649 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5650 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5651 gimplify_and_add (t, pre_p);
5653 for (i = 0; i < XVECLEN (container, 0); i++)
5655 rtx slot = XVECEXP (container, 0, i);
5656 rtx reg = XEXP (slot, 0);
5657 enum machine_mode mode = GET_MODE (reg);
5658 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5659 tree addr_type = build_pointer_type (piece_type);
5662 tree dest_addr, dest;
5664 if (SSE_REGNO_P (REGNO (reg)))
5666 src_addr = sse_addr;
5667 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5671 src_addr = int_addr;
5672 src_offset = REGNO (reg) * 8;
5674 src_addr = fold_convert (addr_type, src_addr);
5675 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5676 size_int (src_offset));
5677 src = build_va_arg_indirect_ref (src_addr);
5679 dest_addr = fold_convert (addr_type, addr);
5680 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5681 size_int (INTVAL (XEXP (slot, 1))));
5682 dest = build_va_arg_indirect_ref (dest_addr);
5684 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5685 gimplify_and_add (t, pre_p);
5691 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5692 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5693 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5694 gimplify_and_add (t, pre_p);
5698 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5699 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5700 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5701 gimplify_and_add (t, pre_p);
5704 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5705 gimplify_and_add (t, pre_p);
5707 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5708 append_to_statement_list (t, pre_p);
5711 /* ... otherwise out of the overflow area. */
5713 /* When we align parameter on stack for caller, if the parameter
5714 alignment is beyond PREFERRED_STACK_BOUNDARY, it will be
5715 aligned at PREFERRED_STACK_BOUNDARY. We will match callee
5716 here with caller. */
5717 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
5718 if ((unsigned int) arg_boundary > PREFERRED_STACK_BOUNDARY)
5719 arg_boundary = PREFERRED_STACK_BOUNDARY;
5721 /* Care for on-stack alignment if needed. */
5722 if (arg_boundary <= 64
5723 || integer_zerop (TYPE_SIZE (type)))
5727 HOST_WIDE_INT align = arg_boundary / 8;
5728 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5729 size_int (align - 1));
5730 t = fold_convert (sizetype, t);
5731 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5733 t = fold_convert (TREE_TYPE (ovf), t);
5735 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5737 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5738 gimplify_and_add (t2, pre_p);
5740 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5741 size_int (rsize * UNITS_PER_WORD));
5742 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5743 gimplify_and_add (t, pre_p);
5747 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5748 append_to_statement_list (t, pre_p);
5751 ptrtype = build_pointer_type (type);
5752 addr = fold_convert (ptrtype, addr);
5755 addr = build_va_arg_indirect_ref (addr);
5756 return build_va_arg_indirect_ref (addr);
5759 /* Return nonzero if OPNUM's MEM should be matched
5760 in movabs* patterns. */
5763 ix86_check_movabs (rtx insn, int opnum)
5767 set = PATTERN (insn);
5768 if (GET_CODE (set) == PARALLEL)
5769 set = XVECEXP (set, 0, 0);
5770 gcc_assert (GET_CODE (set) == SET);
5771 mem = XEXP (set, opnum);
5772 while (GET_CODE (mem) == SUBREG)
5773 mem = SUBREG_REG (mem);
5774 gcc_assert (MEM_P (mem));
5775 return (volatile_ok || !MEM_VOLATILE_P (mem));
5778 /* Initialize the table of extra 80387 mathematical constants. */
5781 init_ext_80387_constants (void)
5783 static const char * cst[5] =
5785 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5786 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5787 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5788 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5789 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5793 for (i = 0; i < 5; i++)
5795 real_from_string (&ext_80387_constants_table[i], cst[i]);
5796 /* Ensure each constant is rounded to XFmode precision. */
5797 real_convert (&ext_80387_constants_table[i],
5798 XFmode, &ext_80387_constants_table[i]);
5801 ext_80387_constants_init = 1;
5804 /* Return true if the constant is something that can be loaded with
5805 a special instruction. */
5808 standard_80387_constant_p (rtx x)
5810 enum machine_mode mode = GET_MODE (x);
5814 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5817 if (x == CONST0_RTX (mode))
5819 if (x == CONST1_RTX (mode))
5822 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5824 /* For XFmode constants, try to find a special 80387 instruction when
5825 optimizing for size or on those CPUs that benefit from them. */
5827 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5831 if (! ext_80387_constants_init)
5832 init_ext_80387_constants ();
5834 for (i = 0; i < 5; i++)
5835 if (real_identical (&r, &ext_80387_constants_table[i]))
5839 /* Load of the constant -0.0 or -1.0 will be split as
5840 fldz;fchs or fld1;fchs sequence. */
5841 if (real_isnegzero (&r))
5843 if (real_identical (&r, &dconstm1))
5849 /* Return the opcode of the special instruction to be used to load
5853 standard_80387_constant_opcode (rtx x)
5855 switch (standard_80387_constant_p (x))
5879 /* Return the CONST_DOUBLE representing the 80387 constant that is
5880 loaded by the specified special instruction. The argument IDX
5881 matches the return value from standard_80387_constant_p. */
5884 standard_80387_constant_rtx (int idx)
5888 if (! ext_80387_constants_init)
5889 init_ext_80387_constants ();
5905 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5909 /* Return 1 if mode is a valid mode for sse. */
5911 standard_sse_mode_p (enum machine_mode mode)
5928 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5931 standard_sse_constant_p (rtx x)
5933 enum machine_mode mode = GET_MODE (x);
5935 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5937 if (vector_all_ones_operand (x, mode)
5938 && standard_sse_mode_p (mode))
5939 return TARGET_SSE2 ? 2 : -1;
5944 /* Return the opcode of the special instruction to be used to load
5948 standard_sse_constant_opcode (rtx insn, rtx x)
5950 switch (standard_sse_constant_p (x))
5953 if (get_attr_mode (insn) == MODE_V4SF)
5954 return "xorps\t%0, %0";
5955 else if (get_attr_mode (insn) == MODE_V2DF)
5956 return "xorpd\t%0, %0";
5958 return "pxor\t%0, %0";
5960 return "pcmpeqd\t%0, %0";
5965 /* Returns 1 if OP contains a symbol reference */
5968 symbolic_reference_mentioned_p (rtx op)
5973 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5976 fmt = GET_RTX_FORMAT (GET_CODE (op));
5977 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5983 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5984 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5988 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5995 /* Return 1 if it is appropriate to emit `ret' instructions in the
5996 body of a function. Do this only if the epilogue is simple, needing a
5997 couple of insns. Prior to reloading, we can't tell how many registers
5998 must be saved, so return 0 then. Return 0 if there is no frame
5999 marker to de-allocate. */
6002 ix86_can_use_return_insn_p (void)
6004 struct ix86_frame frame;
6006 if (! reload_completed || frame_pointer_needed)
6009 /* Don't allow more than 32 pop, since that's all we can do
6010 with one instruction. */
6011 if (crtl->args.pops_args
6012 && crtl->args.size >= 32768)
6015 ix86_compute_frame_layout (&frame);
6016 return frame.to_allocate == 0 && frame.nregs == 0;
6019 /* Value should be nonzero if functions must have frame pointers.
6020 Zero means the frame pointer need not be set up (and parms may
6021 be accessed via the stack pointer) in functions that seem suitable. */
6024 ix86_frame_pointer_required (void)
6026 /* If we accessed previous frames, then the generated code expects
6027 to be able to access the saved ebp value in our frame. */
6028 if (cfun->machine->accesses_prev_frame)
6031 /* Several x86 os'es need a frame pointer for other reasons,
6032 usually pertaining to setjmp. */
6033 if (SUBTARGET_FRAME_POINTER_REQUIRED)
6036 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
6037 the frame pointer by default. Turn it back on now if we've not
6038 got a leaf function. */
6039 if (TARGET_OMIT_LEAF_FRAME_POINTER
6040 && (!current_function_is_leaf
6041 || ix86_current_function_calls_tls_descriptor))
6050 /* Record that the current function accesses previous call frames. */
6053 ix86_setup_frame_addresses (void)
6055 cfun->machine->accesses_prev_frame = 1;
6058 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
6059 # define USE_HIDDEN_LINKONCE 1
6061 # define USE_HIDDEN_LINKONCE 0
6064 static int pic_labels_used;
6066 /* Fills in the label name that should be used for a pc thunk for
6067 the given register. */
6070 get_pc_thunk_name (char name[32], unsigned int regno)
6072 gcc_assert (!TARGET_64BIT);
6074 if (USE_HIDDEN_LINKONCE)
6075 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
6077 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
6081 /* This function generates code for -fpic that loads %ebx with
6082 the return address of the caller and then returns. */
6085 ix86_file_end (void)
6090 for (regno = 0; regno < 8; ++regno)
6094 if (! ((pic_labels_used >> regno) & 1))
6097 get_pc_thunk_name (name, regno);
6102 switch_to_section (darwin_sections[text_coal_section]);
6103 fputs ("\t.weak_definition\t", asm_out_file);
6104 assemble_name (asm_out_file, name);
6105 fputs ("\n\t.private_extern\t", asm_out_file);
6106 assemble_name (asm_out_file, name);
6107 fputs ("\n", asm_out_file);
6108 ASM_OUTPUT_LABEL (asm_out_file, name);
6112 if (USE_HIDDEN_LINKONCE)
6116 decl = build_decl (FUNCTION_DECL, get_identifier (name),
6118 TREE_PUBLIC (decl) = 1;
6119 TREE_STATIC (decl) = 1;
6120 DECL_ONE_ONLY (decl) = 1;
6122 (*targetm.asm_out.unique_section) (decl, 0);
6123 switch_to_section (get_named_section (decl, NULL, 0));
6125 (*targetm.asm_out.globalize_label) (asm_out_file, name);
6126 fputs ("\t.hidden\t", asm_out_file);
6127 assemble_name (asm_out_file, name);
6128 fputc ('\n', asm_out_file);
6129 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
6133 switch_to_section (text_section);
6134 ASM_OUTPUT_LABEL (asm_out_file, name);
6137 xops[0] = gen_rtx_REG (Pmode, regno);
6138 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
6139 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
6140 output_asm_insn ("ret", xops);
6143 if (NEED_INDICATE_EXEC_STACK)
6144 file_end_indicate_exec_stack ();
6147 /* Emit code for the SET_GOT patterns. */
6150 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
6156 if (TARGET_VXWORKS_RTP && flag_pic)
6158 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
6159 xops[2] = gen_rtx_MEM (Pmode,
6160 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
6161 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
6163 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
6164 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
6165 an unadorned address. */
6166 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6167 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
6168 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
6172 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
6174 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
6176 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
6179 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
6181 output_asm_insn ("call\t%a2", xops);
6184 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
6185 is what will be referenced by the Mach-O PIC subsystem. */
6187 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
6190 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6191 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
6194 output_asm_insn ("pop%z0\t%0", xops);
6199 get_pc_thunk_name (name, REGNO (dest));
6200 pic_labels_used |= 1 << REGNO (dest);
6202 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
6203 xops[2] = gen_rtx_MEM (QImode, xops[2]);
6204 output_asm_insn ("call\t%X2", xops);
6205 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
6206 is what will be referenced by the Mach-O PIC subsystem. */
6209 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
6211 targetm.asm_out.internal_label (asm_out_file, "L",
6212 CODE_LABEL_NUMBER (label));
6219 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
6220 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
6222 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
6227 /* Generate an "push" pattern for input ARG. */
6232 return gen_rtx_SET (VOIDmode,
6234 gen_rtx_PRE_DEC (Pmode,
6235 stack_pointer_rtx)),
6239 /* Return >= 0 if there is an unused call-clobbered register available
6240 for the entire function. */
6243 ix86_select_alt_pic_regnum (void)
6245 if (current_function_is_leaf && !crtl->profile
6246 && !ix86_current_function_calls_tls_descriptor)
6249 for (i = 2; i >= 0; --i)
6250 if (!df_regs_ever_live_p (i))
6254 return INVALID_REGNUM;
6257 /* Return 1 if we need to save REGNO. */
6259 ix86_save_reg (unsigned int regno, int maybe_eh_return)
6261 if (pic_offset_table_rtx
6262 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
6263 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6265 || crtl->calls_eh_return
6266 || crtl->uses_const_pool))
6268 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
6273 if (crtl->calls_eh_return && maybe_eh_return)
6278 unsigned test = EH_RETURN_DATA_REGNO (i);
6279 if (test == INVALID_REGNUM)
6286 if (cfun->machine->force_align_arg_pointer
6287 && regno == REGNO (cfun->machine->force_align_arg_pointer))
6290 return (df_regs_ever_live_p (regno)
6291 && !call_used_regs[regno]
6292 && !fixed_regs[regno]
6293 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
6296 /* Return number of registers to be saved on the stack. */
6299 ix86_nsaved_regs (void)
6304 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
6305 if (ix86_save_reg (regno, true))
6310 /* Return the offset between two registers, one to be eliminated, and the other
6311 its replacement, at the start of a routine. */
6314 ix86_initial_elimination_offset (int from, int to)
6316 struct ix86_frame frame;
6317 ix86_compute_frame_layout (&frame);
6319 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
6320 return frame.hard_frame_pointer_offset;
6321 else if (from == FRAME_POINTER_REGNUM
6322 && to == HARD_FRAME_POINTER_REGNUM)
6323 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
6326 gcc_assert (to == STACK_POINTER_REGNUM);
6328 if (from == ARG_POINTER_REGNUM)
6329 return frame.stack_pointer_offset;
6331 gcc_assert (from == FRAME_POINTER_REGNUM);
6332 return frame.stack_pointer_offset - frame.frame_pointer_offset;
6336 /* Fill structure ix86_frame about frame of currently computed function. */
6339 ix86_compute_frame_layout (struct ix86_frame *frame)
6341 HOST_WIDE_INT total_size;
6342 unsigned int stack_alignment_needed;
6343 HOST_WIDE_INT offset;
6344 unsigned int preferred_alignment;
6345 HOST_WIDE_INT size = get_frame_size ();
6347 frame->nregs = ix86_nsaved_regs ();
6350 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
6351 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
6353 /* During reload iteration the amount of registers saved can change.
6354 Recompute the value as needed. Do not recompute when amount of registers
6355 didn't change as reload does multiple calls to the function and does not
6356 expect the decision to change within single iteration. */
6358 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
6360 int count = frame->nregs;
6362 cfun->machine->use_fast_prologue_epilogue_nregs = count;
6363 /* The fast prologue uses move instead of push to save registers. This
6364 is significantly longer, but also executes faster as modern hardware
6365 can execute the moves in parallel, but can't do that for push/pop.
6367 Be careful about choosing what prologue to emit: When function takes
6368 many instructions to execute we may use slow version as well as in
6369 case function is known to be outside hot spot (this is known with
6370 feedback only). Weight the size of function by number of registers
6371 to save as it is cheap to use one or two push instructions but very
6372 slow to use many of them. */
6374 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
6375 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
6376 || (flag_branch_probabilities
6377 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
6378 cfun->machine->use_fast_prologue_epilogue = false;
6380 cfun->machine->use_fast_prologue_epilogue
6381 = !expensive_function_p (count);
6383 if (TARGET_PROLOGUE_USING_MOVE
6384 && cfun->machine->use_fast_prologue_epilogue)
6385 frame->save_regs_using_mov = true;
6387 frame->save_regs_using_mov = false;
6390 /* Skip return address and saved base pointer. */
6391 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
6393 frame->hard_frame_pointer_offset = offset;
6395 /* Do some sanity checking of stack_alignment_needed and
6396 preferred_alignment, since i386 port is the only using those features
6397 that may break easily. */
6399 gcc_assert (!size || stack_alignment_needed);
6400 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
6401 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6402 gcc_assert (stack_alignment_needed
6403 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
6405 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
6406 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
6408 /* Register save area */
6409 offset += frame->nregs * UNITS_PER_WORD;
6412 if (ix86_save_varrargs_registers)
6414 offset += X86_64_VARARGS_SIZE;
6415 frame->va_arg_size = X86_64_VARARGS_SIZE;
6418 frame->va_arg_size = 0;
6420 /* Align start of frame for local function. */
6421 frame->padding1 = ((offset + stack_alignment_needed - 1)
6422 & -stack_alignment_needed) - offset;
6424 offset += frame->padding1;
6426 /* Frame pointer points here. */
6427 frame->frame_pointer_offset = offset;
6431 /* Add outgoing arguments area. Can be skipped if we eliminated
6432 all the function calls as dead code.
6433 Skipping is however impossible when function calls alloca. Alloca
6434 expander assumes that last crtl->outgoing_args_size
6435 of stack frame are unused. */
6436 if (ACCUMULATE_OUTGOING_ARGS
6437 && (!current_function_is_leaf || cfun->calls_alloca
6438 || ix86_current_function_calls_tls_descriptor))
6440 offset += crtl->outgoing_args_size;
6441 frame->outgoing_arguments_size = crtl->outgoing_args_size;
6444 frame->outgoing_arguments_size = 0;
6446 /* Align stack boundary. Only needed if we're calling another function
6448 if (!current_function_is_leaf || cfun->calls_alloca
6449 || ix86_current_function_calls_tls_descriptor)
6450 frame->padding2 = ((offset + preferred_alignment - 1)
6451 & -preferred_alignment) - offset;
6453 frame->padding2 = 0;
6455 offset += frame->padding2;
6457 /* We've reached end of stack frame. */
6458 frame->stack_pointer_offset = offset;
6460 /* Size prologue needs to allocate. */
6461 frame->to_allocate =
6462 (size + frame->padding1 + frame->padding2
6463 + frame->outgoing_arguments_size + frame->va_arg_size);
6465 if ((!frame->to_allocate && frame->nregs <= 1)
6466 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
6467 frame->save_regs_using_mov = false;
6469 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && current_function_sp_is_unchanging
6470 && current_function_is_leaf
6471 && !ix86_current_function_calls_tls_descriptor)
6473 frame->red_zone_size = frame->to_allocate;
6474 if (frame->save_regs_using_mov)
6475 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
6476 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
6477 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
6480 frame->red_zone_size = 0;
6481 frame->to_allocate -= frame->red_zone_size;
6482 frame->stack_pointer_offset -= frame->red_zone_size;
6484 fprintf (stderr, "\n");
6485 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
6486 fprintf (stderr, "size: %ld\n", (long)size);
6487 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
6488 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
6489 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
6490 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
6491 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
6492 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
6493 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
6494 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
6495 (long)frame->hard_frame_pointer_offset);
6496 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
6497 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
6498 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
6499 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
6503 /* Emit code to save registers in the prologue. */
6506 ix86_emit_save_regs (void)
6511 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
6512 if (ix86_save_reg (regno, true))
6514 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
6515 RTX_FRAME_RELATED_P (insn) = 1;
6519 /* Emit code to save registers using MOV insns. First register
6520 is restored from POINTER + OFFSET. */
6522 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
6527 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6528 if (ix86_save_reg (regno, true))
6530 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
6532 gen_rtx_REG (Pmode, regno));
6533 RTX_FRAME_RELATED_P (insn) = 1;
6534 offset += UNITS_PER_WORD;
6538 /* Expand prologue or epilogue stack adjustment.
6539 The pattern exist to put a dependency on all ebp-based memory accesses.
6540 STYLE should be negative if instructions should be marked as frame related,
6541 zero if %r11 register is live and cannot be freely used and positive
6545 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
6550 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
6551 else if (x86_64_immediate_operand (offset, DImode))
6552 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
6556 /* r11 is used by indirect sibcall return as well, set before the
6557 epilogue and used after the epilogue. ATM indirect sibcall
6558 shouldn't be used together with huge frame sizes in one
6559 function because of the frame_size check in sibcall.c. */
6561 r11 = gen_rtx_REG (DImode, R11_REG);
6562 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
6564 RTX_FRAME_RELATED_P (insn) = 1;
6565 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
6569 RTX_FRAME_RELATED_P (insn) = 1;
6572 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
6575 ix86_internal_arg_pointer (void)
6577 bool has_force_align_arg_pointer =
6578 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
6579 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
6580 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
6581 && DECL_NAME (current_function_decl)
6582 && MAIN_NAME_P (DECL_NAME (current_function_decl))
6583 && DECL_FILE_SCOPE_P (current_function_decl))
6584 || ix86_force_align_arg_pointer
6585 || has_force_align_arg_pointer)
6587 /* Nested functions can't realign the stack due to a register
6589 if (DECL_CONTEXT (current_function_decl)
6590 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
6592 if (ix86_force_align_arg_pointer)
6593 warning (0, "-mstackrealign ignored for nested functions");
6594 if (has_force_align_arg_pointer)
6595 error ("%s not supported for nested functions",
6596 ix86_force_align_arg_pointer_string);
6597 return virtual_incoming_args_rtx;
6599 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, CX_REG);
6600 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6603 return virtual_incoming_args_rtx;
6606 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6607 This is called from dwarf2out.c to emit call frame instructions
6608 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6610 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6612 rtx unspec = SET_SRC (pattern);
6613 gcc_assert (GET_CODE (unspec) == UNSPEC);
6617 case UNSPEC_REG_SAVE:
6618 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6619 SET_DEST (pattern));
6621 case UNSPEC_DEF_CFA:
6622 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6623 INTVAL (XVECEXP (unspec, 0, 0)));
6630 /* Expand the prologue into a bunch of separate insns. */
6633 ix86_expand_prologue (void)
6637 struct ix86_frame frame;
6638 HOST_WIDE_INT allocate;
6640 ix86_compute_frame_layout (&frame);
6642 if (cfun->machine->force_align_arg_pointer)
6646 /* Grab the argument pointer. */
6647 x = plus_constant (stack_pointer_rtx, 4);
6648 y = cfun->machine->force_align_arg_pointer;
6649 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6650 RTX_FRAME_RELATED_P (insn) = 1;
6652 /* The unwind info consists of two parts: install the fafp as the cfa,
6653 and record the fafp as the "save register" of the stack pointer.
6654 The later is there in order that the unwinder can see where it
6655 should restore the stack pointer across the and insn. */
6656 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6657 x = gen_rtx_SET (VOIDmode, y, x);
6658 RTX_FRAME_RELATED_P (x) = 1;
6659 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6661 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6662 RTX_FRAME_RELATED_P (y) = 1;
6663 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6664 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6665 REG_NOTES (insn) = x;
6667 /* Align the stack. */
6668 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6671 /* And here we cheat like madmen with the unwind info. We force the
6672 cfa register back to sp+4, which is exactly what it was at the
6673 start of the function. Re-pushing the return address results in
6674 the return at the same spot relative to the cfa, and thus is
6675 correct wrt the unwind info. */
6676 x = cfun->machine->force_align_arg_pointer;
6677 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6678 insn = emit_insn (gen_push (x));
6679 RTX_FRAME_RELATED_P (insn) = 1;
6682 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6683 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6684 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6685 REG_NOTES (insn) = x;
6688 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6689 slower on all targets. Also sdb doesn't like it. */
6691 if (frame_pointer_needed)
6693 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6694 RTX_FRAME_RELATED_P (insn) = 1;
6696 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6697 RTX_FRAME_RELATED_P (insn) = 1;
6700 allocate = frame.to_allocate;
6702 if (!frame.save_regs_using_mov)
6703 ix86_emit_save_regs ();
6705 allocate += frame.nregs * UNITS_PER_WORD;
6707 /* When using red zone we may start register saving before allocating
6708 the stack frame saving one cycle of the prologue. However I will
6709 avoid doing this if I am going to have to probe the stack since
6710 at least on x86_64 the stack probe can turn into a call that clobbers
6711 a red zone location */
6712 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
6713 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
6714 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6715 : stack_pointer_rtx,
6716 -frame.nregs * UNITS_PER_WORD);
6720 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6721 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6722 GEN_INT (-allocate), -1);
6725 /* Only valid for Win32. */
6726 rtx eax = gen_rtx_REG (Pmode, AX_REG);
6730 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
6732 if (cfun->machine->call_abi == MS_ABI)
6735 eax_live = ix86_eax_live_at_start_p ();
6739 emit_insn (gen_push (eax));
6740 allocate -= UNITS_PER_WORD;
6743 emit_move_insn (eax, GEN_INT (allocate));
6746 insn = gen_allocate_stack_worker_64 (eax);
6748 insn = gen_allocate_stack_worker_32 (eax);
6749 insn = emit_insn (insn);
6750 RTX_FRAME_RELATED_P (insn) = 1;
6751 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6752 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6753 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6754 t, REG_NOTES (insn));
6758 if (frame_pointer_needed)
6759 t = plus_constant (hard_frame_pointer_rtx,
6762 - frame.nregs * UNITS_PER_WORD);
6764 t = plus_constant (stack_pointer_rtx, allocate);
6765 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6769 if (frame.save_regs_using_mov
6770 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
6771 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
6773 if (!frame_pointer_needed || !frame.to_allocate)
6774 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6776 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6777 -frame.nregs * UNITS_PER_WORD);
6780 pic_reg_used = false;
6781 if (pic_offset_table_rtx
6782 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6785 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6787 if (alt_pic_reg_used != INVALID_REGNUM)
6788 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6790 pic_reg_used = true;
6797 if (ix86_cmodel == CM_LARGE_PIC)
6799 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
6800 rtx label = gen_label_rtx ();
6802 LABEL_PRESERVE_P (label) = 1;
6803 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6804 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6805 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6806 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6807 pic_offset_table_rtx, tmp_reg));
6810 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6813 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6816 /* Prevent function calls from being scheduled before the call to mcount.
6817 In the pic_reg_used case, make sure that the got load isn't deleted. */
6821 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6822 emit_insn (gen_blockage ());
6825 /* Emit cld instruction if stringops are used in the function. */
6826 if (TARGET_CLD && ix86_current_function_needs_cld)
6827 emit_insn (gen_cld ());
6830 /* Emit code to restore saved registers using MOV insns. First register
6831 is restored from POINTER + OFFSET. */
6833 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6834 int maybe_eh_return)
6837 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6839 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6840 if (ix86_save_reg (regno, maybe_eh_return))
6842 /* Ensure that adjust_address won't be forced to produce pointer
6843 out of range allowed by x86-64 instruction set. */
6844 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6848 r11 = gen_rtx_REG (DImode, R11_REG);
6849 emit_move_insn (r11, GEN_INT (offset));
6850 emit_insn (gen_adddi3 (r11, r11, pointer));
6851 base_address = gen_rtx_MEM (Pmode, r11);
6854 emit_move_insn (gen_rtx_REG (Pmode, regno),
6855 adjust_address (base_address, Pmode, offset));
6856 offset += UNITS_PER_WORD;
6860 /* Restore function stack, frame, and registers. */
6863 ix86_expand_epilogue (int style)
6866 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6867 struct ix86_frame frame;
6868 HOST_WIDE_INT offset;
6870 ix86_compute_frame_layout (&frame);
6872 /* Calculate start of saved registers relative to ebp. Special care
6873 must be taken for the normal return case of a function using
6874 eh_return: the eax and edx registers are marked as saved, but not
6875 restored along this path. */
6876 offset = frame.nregs;
6877 if (crtl->calls_eh_return && style != 2)
6879 offset *= -UNITS_PER_WORD;
6881 /* If we're only restoring one register and sp is not valid then
6882 using a move instruction to restore the register since it's
6883 less work than reloading sp and popping the register.
6885 The default code result in stack adjustment using add/lea instruction,
6886 while this code results in LEAVE instruction (or discrete equivalent),
6887 so it is profitable in some other cases as well. Especially when there
6888 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6889 and there is exactly one register to pop. This heuristic may need some
6890 tuning in future. */
6891 if ((!sp_valid && frame.nregs <= 1)
6892 || (TARGET_EPILOGUE_USING_MOVE
6893 && cfun->machine->use_fast_prologue_epilogue
6894 && (frame.nregs > 1 || frame.to_allocate))
6895 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6896 || (frame_pointer_needed && TARGET_USE_LEAVE
6897 && cfun->machine->use_fast_prologue_epilogue
6898 && frame.nregs == 1)
6899 || crtl->calls_eh_return)
6901 /* Restore registers. We can use ebp or esp to address the memory
6902 locations. If both are available, default to ebp, since offsets
6903 are known to be small. Only exception is esp pointing directly to the
6904 end of block of saved registers, where we may simplify addressing
6907 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6908 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6909 frame.to_allocate, style == 2);
6911 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6912 offset, style == 2);
6914 /* eh_return epilogues need %ecx added to the stack pointer. */
6917 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6919 if (frame_pointer_needed)
6921 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6922 tmp = plus_constant (tmp, UNITS_PER_WORD);
6923 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6925 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6926 emit_move_insn (hard_frame_pointer_rtx, tmp);
6928 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6933 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6934 tmp = plus_constant (tmp, (frame.to_allocate
6935 + frame.nregs * UNITS_PER_WORD));
6936 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6939 else if (!frame_pointer_needed)
6940 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6941 GEN_INT (frame.to_allocate
6942 + frame.nregs * UNITS_PER_WORD),
6944 /* If not an i386, mov & pop is faster than "leave". */
6945 else if (TARGET_USE_LEAVE || optimize_size
6946 || !cfun->machine->use_fast_prologue_epilogue)
6947 emit_insn ((*ix86_gen_leave) ());
6950 pro_epilogue_adjust_stack (stack_pointer_rtx,
6951 hard_frame_pointer_rtx,
6954 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
6959 /* First step is to deallocate the stack frame so that we can
6960 pop the registers. */
6963 gcc_assert (frame_pointer_needed);
6964 pro_epilogue_adjust_stack (stack_pointer_rtx,
6965 hard_frame_pointer_rtx,
6966 GEN_INT (offset), style);
6968 else if (frame.to_allocate)
6969 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6970 GEN_INT (frame.to_allocate), style);
6972 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6973 if (ix86_save_reg (regno, false))
6974 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
6975 if (frame_pointer_needed)
6977 /* Leave results in shorter dependency chains on CPUs that are
6978 able to grok it fast. */
6979 if (TARGET_USE_LEAVE)
6980 emit_insn ((*ix86_gen_leave) ());
6982 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
6986 if (cfun->machine->force_align_arg_pointer)
6988 emit_insn (gen_addsi3 (stack_pointer_rtx,
6989 cfun->machine->force_align_arg_pointer,
6993 /* Sibcall epilogues don't want a return instruction. */
6997 if (crtl->args.pops_args && crtl->args.size)
6999 rtx popc = GEN_INT (crtl->args.pops_args);
7001 /* i386 can only pop 64K bytes. If asked to pop more, pop
7002 return address, do explicit add, and jump indirectly to the
7005 if (crtl->args.pops_args >= 65536)
7007 rtx ecx = gen_rtx_REG (SImode, CX_REG);
7009 /* There is no "pascal" calling convention in any 64bit ABI. */
7010 gcc_assert (!TARGET_64BIT);
7012 emit_insn (gen_popsi1 (ecx));
7013 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
7014 emit_jump_insn (gen_return_indirect_internal (ecx));
7017 emit_jump_insn (gen_return_pop_internal (popc));
7020 emit_jump_insn (gen_return_internal ());
7023 /* Reset from the function's potential modifications. */
7026 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
7027 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
7029 if (pic_offset_table_rtx)
7030 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
7032 /* Mach-O doesn't support labels at the end of objects, so if
7033 it looks like we might want one, insert a NOP. */
7035 rtx insn = get_last_insn ();
7038 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
7039 insn = PREV_INSN (insn);
7043 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
7044 fputs ("\tnop\n", file);
7050 /* Extract the parts of an RTL expression that is a valid memory address
7051 for an instruction. Return 0 if the structure of the address is
7052 grossly off. Return -1 if the address contains ASHIFT, so it is not
7053 strictly valid, but still used for computing length of lea instruction. */
7056 ix86_decompose_address (rtx addr, struct ix86_address *out)
7058 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
7059 rtx base_reg, index_reg;
7060 HOST_WIDE_INT scale = 1;
7061 rtx scale_rtx = NULL_RTX;
7063 enum ix86_address_seg seg = SEG_DEFAULT;
7065 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
7067 else if (GET_CODE (addr) == PLUS)
7077 addends[n++] = XEXP (op, 1);
7080 while (GET_CODE (op) == PLUS);
7085 for (i = n; i >= 0; --i)
7088 switch (GET_CODE (op))
7093 index = XEXP (op, 0);
7094 scale_rtx = XEXP (op, 1);
7098 if (XINT (op, 1) == UNSPEC_TP
7099 && TARGET_TLS_DIRECT_SEG_REFS
7100 && seg == SEG_DEFAULT)
7101 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
7130 else if (GET_CODE (addr) == MULT)
7132 index = XEXP (addr, 0); /* index*scale */
7133 scale_rtx = XEXP (addr, 1);
7135 else if (GET_CODE (addr) == ASHIFT)
7139 /* We're called for lea too, which implements ashift on occasion. */
7140 index = XEXP (addr, 0);
7141 tmp = XEXP (addr, 1);
7142 if (!CONST_INT_P (tmp))
7144 scale = INTVAL (tmp);
7145 if ((unsigned HOST_WIDE_INT) scale > 3)
7151 disp = addr; /* displacement */
7153 /* Extract the integral value of scale. */
7156 if (!CONST_INT_P (scale_rtx))
7158 scale = INTVAL (scale_rtx);
7161 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
7162 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
7164 /* Allow arg pointer and stack pointer as index if there is not scaling. */
7165 if (base_reg && index_reg && scale == 1
7166 && (index_reg == arg_pointer_rtx
7167 || index_reg == frame_pointer_rtx
7168 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
7171 tmp = base, base = index, index = tmp;
7172 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
7175 /* Special case: %ebp cannot be encoded as a base without a displacement. */
7176 if ((base_reg == hard_frame_pointer_rtx
7177 || base_reg == frame_pointer_rtx
7178 || base_reg == arg_pointer_rtx) && !disp)
7181 /* Special case: on K6, [%esi] makes the instruction vector decoded.
7182 Avoid this by transforming to [%esi+0]. */
7183 if (TARGET_K6 && !optimize_size
7184 && base_reg && !index_reg && !disp
7186 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
7189 /* Special case: encode reg+reg instead of reg*2. */
7190 if (!base && index && scale && scale == 2)
7191 base = index, base_reg = index_reg, scale = 1;
7193 /* Special case: scaling cannot be encoded without base or displacement. */
7194 if (!base && !disp && index && scale != 1)
7206 /* Return cost of the memory address x.
7207 For i386, it is better to use a complex address than let gcc copy
7208 the address into a reg and make a new pseudo. But not if the address
7209 requires to two regs - that would mean more pseudos with longer
7212 ix86_address_cost (rtx x)
7214 struct ix86_address parts;
7216 int ok = ix86_decompose_address (x, &parts);
7220 if (parts.base && GET_CODE (parts.base) == SUBREG)
7221 parts.base = SUBREG_REG (parts.base);
7222 if (parts.index && GET_CODE (parts.index) == SUBREG)
7223 parts.index = SUBREG_REG (parts.index);
7225 /* Attempt to minimize number of registers in the address. */
7227 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
7229 && (!REG_P (parts.index)
7230 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
7234 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
7236 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
7237 && parts.base != parts.index)
7240 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
7241 since it's predecode logic can't detect the length of instructions
7242 and it degenerates to vector decoded. Increase cost of such
7243 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
7244 to split such addresses or even refuse such addresses at all.
7246 Following addressing modes are affected:
7251 The first and last case may be avoidable by explicitly coding the zero in
7252 memory address, but I don't have AMD-K6 machine handy to check this
7256 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
7257 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
7258 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
7264 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
7265 this is used for to form addresses to local data when -fPIC is in
7269 darwin_local_data_pic (rtx disp)
7271 if (GET_CODE (disp) == MINUS)
7273 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
7274 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
7275 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
7277 const char *sym_name = XSTR (XEXP (disp, 1), 0);
7278 if (! strcmp (sym_name, "<pic base>"))
7286 /* Determine if a given RTX is a valid constant. We already know this
7287 satisfies CONSTANT_P. */
7290 legitimate_constant_p (rtx x)
7292 switch (GET_CODE (x))
7297 if (GET_CODE (x) == PLUS)
7299 if (!CONST_INT_P (XEXP (x, 1)))
7304 if (TARGET_MACHO && darwin_local_data_pic (x))
7307 /* Only some unspecs are valid as "constants". */
7308 if (GET_CODE (x) == UNSPEC)
7309 switch (XINT (x, 1))
7314 return TARGET_64BIT;
7317 x = XVECEXP (x, 0, 0);
7318 return (GET_CODE (x) == SYMBOL_REF
7319 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7321 x = XVECEXP (x, 0, 0);
7322 return (GET_CODE (x) == SYMBOL_REF
7323 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
7328 /* We must have drilled down to a symbol. */
7329 if (GET_CODE (x) == LABEL_REF)
7331 if (GET_CODE (x) != SYMBOL_REF)
7336 /* TLS symbols are never valid. */
7337 if (SYMBOL_REF_TLS_MODEL (x))
7340 /* DLLIMPORT symbols are never valid. */
7341 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
7342 && SYMBOL_REF_DLLIMPORT_P (x))
7347 if (GET_MODE (x) == TImode
7348 && x != CONST0_RTX (TImode)
7354 if (x == CONST0_RTX (GET_MODE (x)))
7362 /* Otherwise we handle everything else in the move patterns. */
7366 /* Determine if it's legal to put X into the constant pool. This
7367 is not possible for the address of thread-local symbols, which
7368 is checked above. */
7371 ix86_cannot_force_const_mem (rtx x)
7373 /* We can always put integral constants and vectors in memory. */
7374 switch (GET_CODE (x))
7384 return !legitimate_constant_p (x);
7387 /* Determine if a given RTX is a valid constant address. */
7390 constant_address_p (rtx x)
7392 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
7395 /* Nonzero if the constant value X is a legitimate general operand
7396 when generating PIC code. It is given that flag_pic is on and
7397 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
7400 legitimate_pic_operand_p (rtx x)
7404 switch (GET_CODE (x))
7407 inner = XEXP (x, 0);
7408 if (GET_CODE (inner) == PLUS
7409 && CONST_INT_P (XEXP (inner, 1)))
7410 inner = XEXP (inner, 0);
7412 /* Only some unspecs are valid as "constants". */
7413 if (GET_CODE (inner) == UNSPEC)
7414 switch (XINT (inner, 1))
7419 return TARGET_64BIT;
7421 x = XVECEXP (inner, 0, 0);
7422 return (GET_CODE (x) == SYMBOL_REF
7423 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
7431 return legitimate_pic_address_disp_p (x);
7438 /* Determine if a given CONST RTX is a valid memory displacement
7442 legitimate_pic_address_disp_p (rtx disp)
7446 /* In 64bit mode we can allow direct addresses of symbols and labels
7447 when they are not dynamic symbols. */
7450 rtx op0 = disp, op1;
7452 switch (GET_CODE (disp))
7458 if (GET_CODE (XEXP (disp, 0)) != PLUS)
7460 op0 = XEXP (XEXP (disp, 0), 0);
7461 op1 = XEXP (XEXP (disp, 0), 1);
7462 if (!CONST_INT_P (op1)
7463 || INTVAL (op1) >= 16*1024*1024
7464 || INTVAL (op1) < -16*1024*1024)
7466 if (GET_CODE (op0) == LABEL_REF)
7468 if (GET_CODE (op0) != SYMBOL_REF)
7473 /* TLS references should always be enclosed in UNSPEC. */
7474 if (SYMBOL_REF_TLS_MODEL (op0))
7476 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
7477 && ix86_cmodel != CM_LARGE_PIC)
7485 if (GET_CODE (disp) != CONST)
7487 disp = XEXP (disp, 0);
7491 /* We are unsafe to allow PLUS expressions. This limit allowed distance
7492 of GOT tables. We should not need these anyway. */
7493 if (GET_CODE (disp) != UNSPEC
7494 || (XINT (disp, 1) != UNSPEC_GOTPCREL
7495 && XINT (disp, 1) != UNSPEC_GOTOFF
7496 && XINT (disp, 1) != UNSPEC_PLTOFF))
7499 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
7500 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
7506 if (GET_CODE (disp) == PLUS)
7508 if (!CONST_INT_P (XEXP (disp, 1)))
7510 disp = XEXP (disp, 0);
7514 if (TARGET_MACHO && darwin_local_data_pic (disp))
7517 if (GET_CODE (disp) != UNSPEC)
7520 switch (XINT (disp, 1))
7525 /* We need to check for both symbols and labels because VxWorks loads
7526 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
7528 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7529 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
7531 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
7532 While ABI specify also 32bit relocation but we don't produce it in
7533 small PIC model at all. */
7534 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
7535 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
7537 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
7539 case UNSPEC_GOTTPOFF:
7540 case UNSPEC_GOTNTPOFF:
7541 case UNSPEC_INDNTPOFF:
7544 disp = XVECEXP (disp, 0, 0);
7545 return (GET_CODE (disp) == SYMBOL_REF
7546 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
7548 disp = XVECEXP (disp, 0, 0);
7549 return (GET_CODE (disp) == SYMBOL_REF
7550 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
7552 disp = XVECEXP (disp, 0, 0);
7553 return (GET_CODE (disp) == SYMBOL_REF
7554 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
7560 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
7561 memory address for an instruction. The MODE argument is the machine mode
7562 for the MEM expression that wants to use this address.
7564 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
7565 convert common non-canonical forms to canonical form so that they will
7569 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
7570 rtx addr, int strict)
7572 struct ix86_address parts;
7573 rtx base, index, disp;
7574 HOST_WIDE_INT scale;
7575 const char *reason = NULL;
7576 rtx reason_rtx = NULL_RTX;
7578 if (ix86_decompose_address (addr, &parts) <= 0)
7580 reason = "decomposition failed";
7585 index = parts.index;
7587 scale = parts.scale;
7589 /* Validate base register.
7591 Don't allow SUBREG's that span more than a word here. It can lead to spill
7592 failures when the base is one word out of a two word structure, which is
7593 represented internally as a DImode int. */
7602 else if (GET_CODE (base) == SUBREG
7603 && REG_P (SUBREG_REG (base))
7604 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7606 reg = SUBREG_REG (base);
7609 reason = "base is not a register";
7613 if (GET_MODE (base) != Pmode)
7615 reason = "base is not in Pmode";
7619 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7620 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7622 reason = "base is not valid";
7627 /* Validate index register.
7629 Don't allow SUBREG's that span more than a word here -- same as above. */
7638 else if (GET_CODE (index) == SUBREG
7639 && REG_P (SUBREG_REG (index))
7640 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7642 reg = SUBREG_REG (index);
7645 reason = "index is not a register";
7649 if (GET_MODE (index) != Pmode)
7651 reason = "index is not in Pmode";
7655 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7656 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7658 reason = "index is not valid";
7663 /* Validate scale factor. */
7666 reason_rtx = GEN_INT (scale);
7669 reason = "scale without index";
7673 if (scale != 2 && scale != 4 && scale != 8)
7675 reason = "scale is not a valid multiplier";
7680 /* Validate displacement. */
7685 if (GET_CODE (disp) == CONST
7686 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7687 switch (XINT (XEXP (disp, 0), 1))
7689 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7690 used. While ABI specify also 32bit relocations, we don't produce
7691 them at all and use IP relative instead. */
7694 gcc_assert (flag_pic);
7696 goto is_legitimate_pic;
7697 reason = "64bit address unspec";
7700 case UNSPEC_GOTPCREL:
7701 gcc_assert (flag_pic);
7702 goto is_legitimate_pic;
7704 case UNSPEC_GOTTPOFF:
7705 case UNSPEC_GOTNTPOFF:
7706 case UNSPEC_INDNTPOFF:
7712 reason = "invalid address unspec";
7716 else if (SYMBOLIC_CONST (disp)
7720 && MACHOPIC_INDIRECT
7721 && !machopic_operand_p (disp)
7727 if (TARGET_64BIT && (index || base))
7729 /* foo@dtpoff(%rX) is ok. */
7730 if (GET_CODE (disp) != CONST
7731 || GET_CODE (XEXP (disp, 0)) != PLUS
7732 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7733 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7734 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7735 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7737 reason = "non-constant pic memory reference";
7741 else if (! legitimate_pic_address_disp_p (disp))
7743 reason = "displacement is an invalid pic construct";
7747 /* This code used to verify that a symbolic pic displacement
7748 includes the pic_offset_table_rtx register.
7750 While this is good idea, unfortunately these constructs may
7751 be created by "adds using lea" optimization for incorrect
7760 This code is nonsensical, but results in addressing
7761 GOT table with pic_offset_table_rtx base. We can't
7762 just refuse it easily, since it gets matched by
7763 "addsi3" pattern, that later gets split to lea in the
7764 case output register differs from input. While this
7765 can be handled by separate addsi pattern for this case
7766 that never results in lea, this seems to be easier and
7767 correct fix for crash to disable this test. */
7769 else if (GET_CODE (disp) != LABEL_REF
7770 && !CONST_INT_P (disp)
7771 && (GET_CODE (disp) != CONST
7772 || !legitimate_constant_p (disp))
7773 && (GET_CODE (disp) != SYMBOL_REF
7774 || !legitimate_constant_p (disp)))
7776 reason = "displacement is not constant";
7779 else if (TARGET_64BIT
7780 && !x86_64_immediate_operand (disp, VOIDmode))
7782 reason = "displacement is out of range";
7787 /* Everything looks valid. */
7794 /* Return a unique alias set for the GOT. */
7796 static alias_set_type
7797 ix86_GOT_alias_set (void)
7799 static alias_set_type set = -1;
7801 set = new_alias_set ();
7805 /* Return a legitimate reference for ORIG (an address) using the
7806 register REG. If REG is 0, a new pseudo is generated.
7808 There are two types of references that must be handled:
7810 1. Global data references must load the address from the GOT, via
7811 the PIC reg. An insn is emitted to do this load, and the reg is
7814 2. Static data references, constant pool addresses, and code labels
7815 compute the address as an offset from the GOT, whose base is in
7816 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7817 differentiate them from global data objects. The returned
7818 address is the PIC reg + an unspec constant.
7820 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7821 reg also appears in the address. */
7824 legitimize_pic_address (rtx orig, rtx reg)
7831 if (TARGET_MACHO && !TARGET_64BIT)
7834 reg = gen_reg_rtx (Pmode);
7835 /* Use the generic Mach-O PIC machinery. */
7836 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7840 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7842 else if (TARGET_64BIT
7843 && ix86_cmodel != CM_SMALL_PIC
7844 && gotoff_operand (addr, Pmode))
7847 /* This symbol may be referenced via a displacement from the PIC
7848 base address (@GOTOFF). */
7850 if (reload_in_progress)
7851 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7852 if (GET_CODE (addr) == CONST)
7853 addr = XEXP (addr, 0);
7854 if (GET_CODE (addr) == PLUS)
7856 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7858 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7861 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7862 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7864 tmpreg = gen_reg_rtx (Pmode);
7867 emit_move_insn (tmpreg, new_rtx);
7871 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7872 tmpreg, 1, OPTAB_DIRECT);
7875 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7877 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7879 /* This symbol may be referenced via a displacement from the PIC
7880 base address (@GOTOFF). */
7882 if (reload_in_progress)
7883 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7884 if (GET_CODE (addr) == CONST)
7885 addr = XEXP (addr, 0);
7886 if (GET_CODE (addr) == PLUS)
7888 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7890 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7893 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7894 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7895 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7899 emit_move_insn (reg, new_rtx);
7903 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7904 /* We can't use @GOTOFF for text labels on VxWorks;
7905 see gotoff_operand. */
7906 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7908 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7910 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
7911 return legitimize_dllimport_symbol (addr, true);
7912 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
7913 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
7914 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
7916 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
7917 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
7921 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7923 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7924 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7925 new_rtx = gen_const_mem (Pmode, new_rtx);
7926 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7929 reg = gen_reg_rtx (Pmode);
7930 /* Use directly gen_movsi, otherwise the address is loaded
7931 into register for CSE. We don't want to CSE this addresses,
7932 instead we CSE addresses from the GOT table, so skip this. */
7933 emit_insn (gen_movsi (reg, new_rtx));
7938 /* This symbol must be referenced via a load from the
7939 Global Offset Table (@GOT). */
7941 if (reload_in_progress)
7942 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7943 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7944 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7946 new_rtx = force_reg (Pmode, new_rtx);
7947 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7948 new_rtx = gen_const_mem (Pmode, new_rtx);
7949 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7952 reg = gen_reg_rtx (Pmode);
7953 emit_move_insn (reg, new_rtx);
7959 if (CONST_INT_P (addr)
7960 && !x86_64_immediate_operand (addr, VOIDmode))
7964 emit_move_insn (reg, addr);
7968 new_rtx = force_reg (Pmode, addr);
7970 else if (GET_CODE (addr) == CONST)
7972 addr = XEXP (addr, 0);
7974 /* We must match stuff we generate before. Assume the only
7975 unspecs that can get here are ours. Not that we could do
7976 anything with them anyway.... */
7977 if (GET_CODE (addr) == UNSPEC
7978 || (GET_CODE (addr) == PLUS
7979 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7981 gcc_assert (GET_CODE (addr) == PLUS);
7983 if (GET_CODE (addr) == PLUS)
7985 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7987 /* Check first to see if this is a constant offset from a @GOTOFF
7988 symbol reference. */
7989 if (gotoff_operand (op0, Pmode)
7990 && CONST_INT_P (op1))
7994 if (reload_in_progress)
7995 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7996 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7998 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7999 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
8000 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
8004 emit_move_insn (reg, new_rtx);
8010 if (INTVAL (op1) < -16*1024*1024
8011 || INTVAL (op1) >= 16*1024*1024)
8013 if (!x86_64_immediate_operand (op1, Pmode))
8014 op1 = force_reg (Pmode, op1);
8015 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
8021 base = legitimize_pic_address (XEXP (addr, 0), reg);
8022 new_rtx = legitimize_pic_address (XEXP (addr, 1),
8023 base == reg ? NULL_RTX : reg);
8025 if (CONST_INT_P (new_rtx))
8026 new_rtx = plus_constant (base, INTVAL (new_rtx));
8029 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
8031 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
8032 new_rtx = XEXP (new_rtx, 1);
8034 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
8042 /* Load the thread pointer. If TO_REG is true, force it into a register. */
8045 get_thread_pointer (int to_reg)
8049 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
8053 reg = gen_reg_rtx (Pmode);
8054 insn = gen_rtx_SET (VOIDmode, reg, tp);
8055 insn = emit_insn (insn);
8060 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
8061 false if we expect this to be used for a memory address and true if
8062 we expect to load the address into a register. */
8065 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
8067 rtx dest, base, off, pic, tp;
8072 case TLS_MODEL_GLOBAL_DYNAMIC:
8073 dest = gen_reg_rtx (Pmode);
8074 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8076 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8078 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
8081 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
8082 insns = get_insns ();
8085 RTL_CONST_CALL_P (insns) = 1;
8086 emit_libcall_block (insns, dest, rax, x);
8088 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8089 emit_insn (gen_tls_global_dynamic_64 (dest, x));
8091 emit_insn (gen_tls_global_dynamic_32 (dest, x));
8093 if (TARGET_GNU2_TLS)
8095 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
8097 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8101 case TLS_MODEL_LOCAL_DYNAMIC:
8102 base = gen_reg_rtx (Pmode);
8103 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
8105 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
8107 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
8110 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
8111 insns = get_insns ();
8114 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
8115 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
8116 RTL_CONST_CALL_P (insns) = 1;
8117 emit_libcall_block (insns, base, rax, note);
8119 else if (TARGET_64BIT && TARGET_GNU2_TLS)
8120 emit_insn (gen_tls_local_dynamic_base_64 (base));
8122 emit_insn (gen_tls_local_dynamic_base_32 (base));
8124 if (TARGET_GNU2_TLS)
8126 rtx x = ix86_tls_module_base ();
8128 set_unique_reg_note (get_last_insn (), REG_EQUIV,
8129 gen_rtx_MINUS (Pmode, x, tp));
8132 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
8133 off = gen_rtx_CONST (Pmode, off);
8135 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
8137 if (TARGET_GNU2_TLS)
8139 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
8141 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
8146 case TLS_MODEL_INITIAL_EXEC:
8150 type = UNSPEC_GOTNTPOFF;
8154 if (reload_in_progress)
8155 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
8156 pic = pic_offset_table_rtx;
8157 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
8159 else if (!TARGET_ANY_GNU_TLS)
8161 pic = gen_reg_rtx (Pmode);
8162 emit_insn (gen_set_got (pic));
8163 type = UNSPEC_GOTTPOFF;
8168 type = UNSPEC_INDNTPOFF;
8171 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
8172 off = gen_rtx_CONST (Pmode, off);
8174 off = gen_rtx_PLUS (Pmode, pic, off);
8175 off = gen_const_mem (Pmode, off);
8176 set_mem_alias_set (off, ix86_GOT_alias_set ());
8178 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8180 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8181 off = force_reg (Pmode, off);
8182 return gen_rtx_PLUS (Pmode, base, off);
8186 base = get_thread_pointer (true);
8187 dest = gen_reg_rtx (Pmode);
8188 emit_insn (gen_subsi3 (dest, base, off));
8192 case TLS_MODEL_LOCAL_EXEC:
8193 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
8194 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8195 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
8196 off = gen_rtx_CONST (Pmode, off);
8198 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
8200 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
8201 return gen_rtx_PLUS (Pmode, base, off);
8205 base = get_thread_pointer (true);
8206 dest = gen_reg_rtx (Pmode);
8207 emit_insn (gen_subsi3 (dest, base, off));
8218 /* Create or return the unique __imp_DECL dllimport symbol corresponding
8221 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
8222 htab_t dllimport_map;
8225 get_dllimport_decl (tree decl)
8227 struct tree_map *h, in;
8231 size_t namelen, prefixlen;
8237 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
8239 in.hash = htab_hash_pointer (decl);
8240 in.base.from = decl;
8241 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
8242 h = (struct tree_map *) *loc;
8246 *loc = h = GGC_NEW (struct tree_map);
8248 h->base.from = decl;
8249 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
8250 DECL_ARTIFICIAL (to) = 1;
8251 DECL_IGNORED_P (to) = 1;
8252 DECL_EXTERNAL (to) = 1;
8253 TREE_READONLY (to) = 1;
8255 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
8256 name = targetm.strip_name_encoding (name);
8257 prefix = name[0] == FASTCALL_PREFIX ? "*__imp_": "*__imp__";
8258 namelen = strlen (name);
8259 prefixlen = strlen (prefix);
8260 imp_name = (char *) alloca (namelen + prefixlen + 1);
8261 memcpy (imp_name, prefix, prefixlen);
8262 memcpy (imp_name + prefixlen, name, namelen + 1);
8264 name = ggc_alloc_string (imp_name, namelen + prefixlen);
8265 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
8266 SET_SYMBOL_REF_DECL (rtl, to);
8267 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
8269 rtl = gen_const_mem (Pmode, rtl);
8270 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
8272 SET_DECL_RTL (to, rtl);
8273 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
8278 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
8279 true if we require the result be a register. */
8282 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
8287 gcc_assert (SYMBOL_REF_DECL (symbol));
8288 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
8290 x = DECL_RTL (imp_decl);
8292 x = force_reg (Pmode, x);
8296 /* Try machine-dependent ways of modifying an illegitimate address
8297 to be legitimate. If we find one, return the new, valid address.
8298 This macro is used in only one place: `memory_address' in explow.c.
8300 OLDX is the address as it was before break_out_memory_refs was called.
8301 In some cases it is useful to look at this to decide what needs to be done.
8303 MODE and WIN are passed so that this macro can use
8304 GO_IF_LEGITIMATE_ADDRESS.
8306 It is always safe for this macro to do nothing. It exists to recognize
8307 opportunities to optimize the output.
8309 For the 80386, we handle X+REG by loading X into a register R and
8310 using R+REG. R will go in a general reg and indexing will be used.
8311 However, if REG is a broken-out memory address or multiplication,
8312 nothing needs to be done because REG can certainly go in a general reg.
8314 When -fpic is used, special handling is needed for symbolic references.
8315 See comments by legitimize_pic_address in i386.c for details. */
8318 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
8323 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
8325 return legitimize_tls_address (x, (enum tls_model) log, false);
8326 if (GET_CODE (x) == CONST
8327 && GET_CODE (XEXP (x, 0)) == PLUS
8328 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8329 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
8331 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
8332 (enum tls_model) log, false);
8333 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8336 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
8338 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
8339 return legitimize_dllimport_symbol (x, true);
8340 if (GET_CODE (x) == CONST
8341 && GET_CODE (XEXP (x, 0)) == PLUS
8342 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8343 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
8345 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
8346 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
8350 if (flag_pic && SYMBOLIC_CONST (x))
8351 return legitimize_pic_address (x, 0);
8353 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
8354 if (GET_CODE (x) == ASHIFT
8355 && CONST_INT_P (XEXP (x, 1))
8356 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
8359 log = INTVAL (XEXP (x, 1));
8360 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
8361 GEN_INT (1 << log));
8364 if (GET_CODE (x) == PLUS)
8366 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
8368 if (GET_CODE (XEXP (x, 0)) == ASHIFT
8369 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
8370 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
8373 log = INTVAL (XEXP (XEXP (x, 0), 1));
8374 XEXP (x, 0) = gen_rtx_MULT (Pmode,
8375 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
8376 GEN_INT (1 << log));
8379 if (GET_CODE (XEXP (x, 1)) == ASHIFT
8380 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
8381 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
8384 log = INTVAL (XEXP (XEXP (x, 1), 1));
8385 XEXP (x, 1) = gen_rtx_MULT (Pmode,
8386 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
8387 GEN_INT (1 << log));
8390 /* Put multiply first if it isn't already. */
8391 if (GET_CODE (XEXP (x, 1)) == MULT)
8393 rtx tmp = XEXP (x, 0);
8394 XEXP (x, 0) = XEXP (x, 1);
8399 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
8400 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
8401 created by virtual register instantiation, register elimination, and
8402 similar optimizations. */
8403 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
8406 x = gen_rtx_PLUS (Pmode,
8407 gen_rtx_PLUS (Pmode, XEXP (x, 0),
8408 XEXP (XEXP (x, 1), 0)),
8409 XEXP (XEXP (x, 1), 1));
8413 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
8414 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
8415 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
8416 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8417 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
8418 && CONSTANT_P (XEXP (x, 1)))
8421 rtx other = NULL_RTX;
8423 if (CONST_INT_P (XEXP (x, 1)))
8425 constant = XEXP (x, 1);
8426 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
8428 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
8430 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
8431 other = XEXP (x, 1);
8439 x = gen_rtx_PLUS (Pmode,
8440 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
8441 XEXP (XEXP (XEXP (x, 0), 1), 0)),
8442 plus_constant (other, INTVAL (constant)));
8446 if (changed && legitimate_address_p (mode, x, FALSE))
8449 if (GET_CODE (XEXP (x, 0)) == MULT)
8452 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
8455 if (GET_CODE (XEXP (x, 1)) == MULT)
8458 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
8462 && REG_P (XEXP (x, 1))
8463 && REG_P (XEXP (x, 0)))
8466 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
8469 x = legitimize_pic_address (x, 0);
8472 if (changed && legitimate_address_p (mode, x, FALSE))
8475 if (REG_P (XEXP (x, 0)))
8477 rtx temp = gen_reg_rtx (Pmode);
8478 rtx val = force_operand (XEXP (x, 1), temp);
8480 emit_move_insn (temp, val);
8486 else if (REG_P (XEXP (x, 1)))
8488 rtx temp = gen_reg_rtx (Pmode);
8489 rtx val = force_operand (XEXP (x, 0), temp);
8491 emit_move_insn (temp, val);
8501 /* Print an integer constant expression in assembler syntax. Addition
8502 and subtraction are the only arithmetic that may appear in these
8503 expressions. FILE is the stdio stream to write to, X is the rtx, and
8504 CODE is the operand print code from the output string. */
8507 output_pic_addr_const (FILE *file, rtx x, int code)
8511 switch (GET_CODE (x))
8514 gcc_assert (flag_pic);
8519 if (! TARGET_MACHO || TARGET_64BIT)
8520 output_addr_const (file, x);
8523 const char *name = XSTR (x, 0);
8525 /* Mark the decl as referenced so that cgraph will
8526 output the function. */
8527 if (SYMBOL_REF_DECL (x))
8528 mark_decl_referenced (SYMBOL_REF_DECL (x));
8531 if (MACHOPIC_INDIRECT
8532 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
8533 name = machopic_indirection_name (x, /*stub_p=*/true);
8535 assemble_name (file, name);
8537 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
8538 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
8539 fputs ("@PLT", file);
8546 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
8547 assemble_name (asm_out_file, buf);
8551 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8555 /* This used to output parentheses around the expression,
8556 but that does not work on the 386 (either ATT or BSD assembler). */
8557 output_pic_addr_const (file, XEXP (x, 0), code);
8561 if (GET_MODE (x) == VOIDmode)
8563 /* We can use %d if the number is <32 bits and positive. */
8564 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
8565 fprintf (file, "0x%lx%08lx",
8566 (unsigned long) CONST_DOUBLE_HIGH (x),
8567 (unsigned long) CONST_DOUBLE_LOW (x));
8569 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
8572 /* We can't handle floating point constants;
8573 PRINT_OPERAND must handle them. */
8574 output_operand_lossage ("floating constant misused");
8578 /* Some assemblers need integer constants to appear first. */
8579 if (CONST_INT_P (XEXP (x, 0)))
8581 output_pic_addr_const (file, XEXP (x, 0), code);
8583 output_pic_addr_const (file, XEXP (x, 1), code);
8587 gcc_assert (CONST_INT_P (XEXP (x, 1)));
8588 output_pic_addr_const (file, XEXP (x, 1), code);
8590 output_pic_addr_const (file, XEXP (x, 0), code);
8596 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8597 output_pic_addr_const (file, XEXP (x, 0), code);
8599 output_pic_addr_const (file, XEXP (x, 1), code);
8601 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8605 gcc_assert (XVECLEN (x, 0) == 1);
8606 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8607 switch (XINT (x, 1))
8610 fputs ("@GOT", file);
8613 fputs ("@GOTOFF", file);
8616 fputs ("@PLTOFF", file);
8618 case UNSPEC_GOTPCREL:
8619 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8620 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
8622 case UNSPEC_GOTTPOFF:
8623 /* FIXME: This might be @TPOFF in Sun ld too. */
8624 fputs ("@GOTTPOFF", file);
8627 fputs ("@TPOFF", file);
8631 fputs ("@TPOFF", file);
8633 fputs ("@NTPOFF", file);
8636 fputs ("@DTPOFF", file);
8638 case UNSPEC_GOTNTPOFF:
8640 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
8641 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
8643 fputs ("@GOTNTPOFF", file);
8645 case UNSPEC_INDNTPOFF:
8646 fputs ("@INDNTPOFF", file);
8649 output_operand_lossage ("invalid UNSPEC as operand");
8655 output_operand_lossage ("invalid expression as operand");
8659 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8660 We need to emit DTP-relative relocations. */
8662 static void ATTRIBUTE_UNUSED
8663 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8665 fputs (ASM_LONG, file);
8666 output_addr_const (file, x);
8667 fputs ("@DTPOFF", file);
8673 fputs (", 0", file);
8680 /* In the name of slightly smaller debug output, and to cater to
8681 general assembler lossage, recognize PIC+GOTOFF and turn it back
8682 into a direct symbol reference.
8684 On Darwin, this is necessary to avoid a crash, because Darwin
8685 has a different PIC label for each routine but the DWARF debugging
8686 information is not associated with any particular routine, so it's
8687 necessary to remove references to the PIC label from RTL stored by
8688 the DWARF output code. */
8691 ix86_delegitimize_address (rtx orig_x)
8694 /* reg_addend is NULL or a multiple of some register. */
8695 rtx reg_addend = NULL_RTX;
8696 /* const_addend is NULL or a const_int. */
8697 rtx const_addend = NULL_RTX;
8698 /* This is the result, or NULL. */
8699 rtx result = NULL_RTX;
8706 if (GET_CODE (x) != CONST
8707 || GET_CODE (XEXP (x, 0)) != UNSPEC
8708 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8711 return XVECEXP (XEXP (x, 0), 0, 0);
8714 if (GET_CODE (x) != PLUS
8715 || GET_CODE (XEXP (x, 1)) != CONST)
8718 if (REG_P (XEXP (x, 0))
8719 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8720 /* %ebx + GOT/GOTOFF */
8722 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8724 /* %ebx + %reg * scale + GOT/GOTOFF */
8725 reg_addend = XEXP (x, 0);
8726 if (REG_P (XEXP (reg_addend, 0))
8727 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8728 reg_addend = XEXP (reg_addend, 1);
8729 else if (REG_P (XEXP (reg_addend, 1))
8730 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8731 reg_addend = XEXP (reg_addend, 0);
8734 if (!REG_P (reg_addend)
8735 && GET_CODE (reg_addend) != MULT
8736 && GET_CODE (reg_addend) != ASHIFT)
8742 x = XEXP (XEXP (x, 1), 0);
8743 if (GET_CODE (x) == PLUS
8744 && CONST_INT_P (XEXP (x, 1)))
8746 const_addend = XEXP (x, 1);
8750 if (GET_CODE (x) == UNSPEC
8751 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8752 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8753 result = XVECEXP (x, 0, 0);
8755 if (TARGET_MACHO && darwin_local_data_pic (x)
8757 result = XEXP (x, 0);
8763 result = gen_rtx_PLUS (Pmode, result, const_addend);
8765 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8769 /* If X is a machine specific address (i.e. a symbol or label being
8770 referenced as a displacement from the GOT implemented using an
8771 UNSPEC), then return the base term. Otherwise return X. */
8774 ix86_find_base_term (rtx x)
8780 if (GET_CODE (x) != CONST)
8783 if (GET_CODE (term) == PLUS
8784 && (CONST_INT_P (XEXP (term, 1))
8785 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8786 term = XEXP (term, 0);
8787 if (GET_CODE (term) != UNSPEC
8788 || XINT (term, 1) != UNSPEC_GOTPCREL)
8791 term = XVECEXP (term, 0, 0);
8793 if (GET_CODE (term) != SYMBOL_REF
8794 && GET_CODE (term) != LABEL_REF)
8800 term = ix86_delegitimize_address (x);
8802 if (GET_CODE (term) != SYMBOL_REF
8803 && GET_CODE (term) != LABEL_REF)
8810 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8815 if (mode == CCFPmode || mode == CCFPUmode)
8817 enum rtx_code second_code, bypass_code;
8818 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8819 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8820 code = ix86_fp_compare_code_to_integer (code);
8824 code = reverse_condition (code);
8875 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8879 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8880 Those same assemblers have the same but opposite lossage on cmov. */
8882 suffix = fp ? "nbe" : "a";
8883 else if (mode == CCCmode)
8906 gcc_assert (mode == CCmode || mode == CCCmode);
8928 gcc_assert (mode == CCmode || mode == CCCmode);
8929 suffix = fp ? "nb" : "ae";
8932 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8939 else if (mode == CCCmode)
8940 suffix = fp ? "nb" : "ae";
8945 suffix = fp ? "u" : "p";
8948 suffix = fp ? "nu" : "np";
8953 fputs (suffix, file);
8956 /* Print the name of register X to FILE based on its machine mode and number.
8957 If CODE is 'w', pretend the mode is HImode.
8958 If CODE is 'b', pretend the mode is QImode.
8959 If CODE is 'k', pretend the mode is SImode.
8960 If CODE is 'q', pretend the mode is DImode.
8961 If CODE is 'h', pretend the reg is the 'high' byte register.
8962 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8965 print_reg (rtx x, int code, FILE *file)
8967 gcc_assert (x == pc_rtx
8968 || (REGNO (x) != ARG_POINTER_REGNUM
8969 && REGNO (x) != FRAME_POINTER_REGNUM
8970 && REGNO (x) != FLAGS_REG
8971 && REGNO (x) != FPSR_REG
8972 && REGNO (x) != FPCR_REG));
8974 if (ASSEMBLER_DIALECT == ASM_ATT)
8979 gcc_assert (TARGET_64BIT);
8980 fputs ("rip", file);
8984 if (code == 'w' || MMX_REG_P (x))
8986 else if (code == 'b')
8988 else if (code == 'k')
8990 else if (code == 'q')
8992 else if (code == 'y')
8994 else if (code == 'h')
8997 code = GET_MODE_SIZE (GET_MODE (x));
8999 /* Irritatingly, AMD extended registers use different naming convention
9000 from the normal registers. */
9001 if (REX_INT_REG_P (x))
9003 gcc_assert (TARGET_64BIT);
9007 error ("extended registers have no high halves");
9010 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
9013 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
9016 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
9019 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
9022 error ("unsupported operand size for extended register");
9030 if (STACK_TOP_P (x))
9032 fputs ("st(0)", file);
9039 if (! ANY_FP_REG_P (x))
9040 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
9045 fputs (hi_reg_name[REGNO (x)], file);
9048 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
9050 fputs (qi_reg_name[REGNO (x)], file);
9053 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
9055 fputs (qi_high_reg_name[REGNO (x)], file);
9062 /* Locate some local-dynamic symbol still in use by this function
9063 so that we can print its name in some tls_local_dynamic_base
9067 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
9071 if (GET_CODE (x) == SYMBOL_REF
9072 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
9074 cfun->machine->some_ld_name = XSTR (x, 0);
9082 get_some_local_dynamic_name (void)
9086 if (cfun->machine->some_ld_name)
9087 return cfun->machine->some_ld_name;
9089 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
9091 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
9092 return cfun->machine->some_ld_name;
9098 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
9099 C -- print opcode suffix for set/cmov insn.
9100 c -- like C, but print reversed condition
9101 E,e -- likewise, but for compare-and-branch fused insn.
9102 F,f -- likewise, but for floating-point.
9103 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
9105 R -- print the prefix for register names.
9106 z -- print the opcode suffix for the size of the current operand.
9107 * -- print a star (in certain assembler syntax)
9108 A -- print an absolute memory reference.
9109 w -- print the operand as if it's a "word" (HImode) even if it isn't.
9110 s -- print a shift double count, followed by the assemblers argument
9112 b -- print the QImode name of the register for the indicated operand.
9113 %b0 would print %al if operands[0] is reg 0.
9114 w -- likewise, print the HImode name of the register.
9115 k -- likewise, print the SImode name of the register.
9116 q -- likewise, print the DImode name of the register.
9117 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
9118 y -- print "st(0)" instead of "st" as a register.
9119 D -- print condition for SSE cmp instruction.
9120 P -- if PIC, print an @PLT suffix.
9121 X -- don't print any sort of PIC '@' suffix for a symbol.
9122 & -- print some in-use local-dynamic symbol name.
9123 H -- print a memory address offset by 8; used for sse high-parts
9124 Y -- print condition for SSE5 com* instruction.
9125 + -- print a branch hint as 'cs' or 'ds' prefix
9126 ; -- print a semicolon (after prefixes due to bug in older gas).
9130 print_operand (FILE *file, rtx x, int code)
9137 if (ASSEMBLER_DIALECT == ASM_ATT)
9142 assemble_name (file, get_some_local_dynamic_name ());
9146 switch (ASSEMBLER_DIALECT)
9153 /* Intel syntax. For absolute addresses, registers should not
9154 be surrounded by braces. */
9158 PRINT_OPERAND (file, x, 0);
9168 PRINT_OPERAND (file, x, 0);
9173 if (ASSEMBLER_DIALECT == ASM_ATT)
9178 if (ASSEMBLER_DIALECT == ASM_ATT)
9183 if (ASSEMBLER_DIALECT == ASM_ATT)
9188 if (ASSEMBLER_DIALECT == ASM_ATT)
9193 if (ASSEMBLER_DIALECT == ASM_ATT)
9198 if (ASSEMBLER_DIALECT == ASM_ATT)
9203 /* 387 opcodes don't get size suffixes if the operands are
9205 if (STACK_REG_P (x))
9208 /* Likewise if using Intel opcodes. */
9209 if (ASSEMBLER_DIALECT == ASM_INTEL)
9212 /* This is the size of op from size of operand. */
9213 switch (GET_MODE_SIZE (GET_MODE (x)))
9222 #ifdef HAVE_GAS_FILDS_FISTS
9232 if (GET_MODE (x) == SFmode)
9247 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
9251 #ifdef GAS_MNEMONICS
9280 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
9282 PRINT_OPERAND (file, x, 0);
9288 /* Little bit of braindamage here. The SSE compare instructions
9289 does use completely different names for the comparisons that the
9290 fp conditional moves. */
9291 switch (GET_CODE (x))
9306 fputs ("unord", file);
9310 fputs ("neq", file);
9314 fputs ("nlt", file);
9318 fputs ("nle", file);
9321 fputs ("ord", file);
9328 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9329 if (ASSEMBLER_DIALECT == ASM_ATT)
9331 switch (GET_MODE (x))
9333 case HImode: putc ('w', file); break;
9335 case SFmode: putc ('l', file); break;
9337 case DFmode: putc ('q', file); break;
9338 default: gcc_unreachable ();
9345 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
9348 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9349 if (ASSEMBLER_DIALECT == ASM_ATT)
9352 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
9355 /* Like above, but reverse condition */
9357 /* Check to see if argument to %c is really a constant
9358 and not a condition code which needs to be reversed. */
9359 if (!COMPARISON_P (x))
9361 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
9364 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
9367 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
9368 if (ASSEMBLER_DIALECT == ASM_ATT)
9371 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
9375 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
9379 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
9383 /* It doesn't actually matter what mode we use here, as we're
9384 only going to use this for printing. */
9385 x = adjust_address_nv (x, DImode, 8);
9392 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
9395 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
9398 int pred_val = INTVAL (XEXP (x, 0));
9400 if (pred_val < REG_BR_PROB_BASE * 45 / 100
9401 || pred_val > REG_BR_PROB_BASE * 55 / 100)
9403 int taken = pred_val > REG_BR_PROB_BASE / 2;
9404 int cputaken = final_forward_branch_p (current_output_insn) == 0;
9406 /* Emit hints only in the case default branch prediction
9407 heuristics would fail. */
9408 if (taken != cputaken)
9410 /* We use 3e (DS) prefix for taken branches and
9411 2e (CS) prefix for not taken branches. */
9413 fputs ("ds ; ", file);
9415 fputs ("cs ; ", file);
9423 switch (GET_CODE (x))
9426 fputs ("neq", file);
9433 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
9437 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
9448 fputs ("unord", file);
9451 fputs ("ord", file);
9454 fputs ("ueq", file);
9457 fputs ("nlt", file);
9460 fputs ("nle", file);
9463 fputs ("ule", file);
9466 fputs ("ult", file);
9469 fputs ("une", file);
9478 fputs (" ; ", file);
9485 output_operand_lossage ("invalid operand code '%c'", code);
9490 print_reg (x, code, file);
9494 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
9495 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
9496 && GET_MODE (x) != BLKmode)
9499 switch (GET_MODE_SIZE (GET_MODE (x)))
9501 case 1: size = "BYTE"; break;
9502 case 2: size = "WORD"; break;
9503 case 4: size = "DWORD"; break;
9504 case 8: size = "QWORD"; break;
9505 case 12: size = "XWORD"; break;
9507 if (GET_MODE (x) == XFmode)
9516 /* Check for explicit size override (codes 'b', 'w' and 'k') */
9519 else if (code == 'w')
9521 else if (code == 'k')
9525 fputs (" PTR ", file);
9529 /* Avoid (%rip) for call operands. */
9530 if (CONSTANT_ADDRESS_P (x) && code == 'P'
9531 && !CONST_INT_P (x))
9532 output_addr_const (file, x);
9533 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
9534 output_operand_lossage ("invalid constraints for operand");
9539 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
9544 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
9545 REAL_VALUE_TO_TARGET_SINGLE (r, l);
9547 if (ASSEMBLER_DIALECT == ASM_ATT)
9549 fprintf (file, "0x%08lx", (long unsigned int) l);
9552 /* These float cases don't actually occur as immediate operands. */
9553 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
9557 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9558 fprintf (file, "%s", dstr);
9561 else if (GET_CODE (x) == CONST_DOUBLE
9562 && GET_MODE (x) == XFmode)
9566 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
9567 fprintf (file, "%s", dstr);
9572 /* We have patterns that allow zero sets of memory, for instance.
9573 In 64-bit mode, we should probably support all 8-byte vectors,
9574 since we can in fact encode that into an immediate. */
9575 if (GET_CODE (x) == CONST_VECTOR)
9577 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
9583 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
9585 if (ASSEMBLER_DIALECT == ASM_ATT)
9588 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
9589 || GET_CODE (x) == LABEL_REF)
9591 if (ASSEMBLER_DIALECT == ASM_ATT)
9594 fputs ("OFFSET FLAT:", file);
9597 if (CONST_INT_P (x))
9598 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
9600 output_pic_addr_const (file, x, code);
9602 output_addr_const (file, x);
9606 /* Print a memory operand whose address is ADDR. */
9609 print_operand_address (FILE *file, rtx addr)
9611 struct ix86_address parts;
9612 rtx base, index, disp;
9614 int ok = ix86_decompose_address (addr, &parts);
9619 index = parts.index;
9621 scale = parts.scale;
9629 if (ASSEMBLER_DIALECT == ASM_ATT)
9631 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
9637 /* Use one byte shorter RIP relative addressing for 64bit mode. */
9638 if (TARGET_64BIT && !base && !index)
9642 if (GET_CODE (disp) == CONST
9643 && GET_CODE (XEXP (disp, 0)) == PLUS
9644 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9645 symbol = XEXP (XEXP (disp, 0), 0);
9647 if (GET_CODE (symbol) == LABEL_REF
9648 || (GET_CODE (symbol) == SYMBOL_REF
9649 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
9652 if (!base && !index)
9654 /* Displacement only requires special attention. */
9656 if (CONST_INT_P (disp))
9658 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
9659 fputs ("ds:", file);
9660 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
9663 output_pic_addr_const (file, disp, 0);
9665 output_addr_const (file, disp);
9669 if (ASSEMBLER_DIALECT == ASM_ATT)
9674 output_pic_addr_const (file, disp, 0);
9675 else if (GET_CODE (disp) == LABEL_REF)
9676 output_asm_label (disp);
9678 output_addr_const (file, disp);
9683 print_reg (base, 0, file);
9687 print_reg (index, 0, file);
9689 fprintf (file, ",%d", scale);
9695 rtx offset = NULL_RTX;
9699 /* Pull out the offset of a symbol; print any symbol itself. */
9700 if (GET_CODE (disp) == CONST
9701 && GET_CODE (XEXP (disp, 0)) == PLUS
9702 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9704 offset = XEXP (XEXP (disp, 0), 1);
9705 disp = gen_rtx_CONST (VOIDmode,
9706 XEXP (XEXP (disp, 0), 0));
9710 output_pic_addr_const (file, disp, 0);
9711 else if (GET_CODE (disp) == LABEL_REF)
9712 output_asm_label (disp);
9713 else if (CONST_INT_P (disp))
9716 output_addr_const (file, disp);
9722 print_reg (base, 0, file);
9725 if (INTVAL (offset) >= 0)
9727 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9731 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9738 print_reg (index, 0, file);
9740 fprintf (file, "*%d", scale);
9748 output_addr_const_extra (FILE *file, rtx x)
9752 if (GET_CODE (x) != UNSPEC)
9755 op = XVECEXP (x, 0, 0);
9756 switch (XINT (x, 1))
9758 case UNSPEC_GOTTPOFF:
9759 output_addr_const (file, op);
9760 /* FIXME: This might be @TPOFF in Sun ld. */
9761 fputs ("@GOTTPOFF", file);
9764 output_addr_const (file, op);
9765 fputs ("@TPOFF", file);
9768 output_addr_const (file, op);
9770 fputs ("@TPOFF", file);
9772 fputs ("@NTPOFF", file);
9775 output_addr_const (file, op);
9776 fputs ("@DTPOFF", file);
9778 case UNSPEC_GOTNTPOFF:
9779 output_addr_const (file, op);
9781 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
9782 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
9784 fputs ("@GOTNTPOFF", file);
9786 case UNSPEC_INDNTPOFF:
9787 output_addr_const (file, op);
9788 fputs ("@INDNTPOFF", file);
9798 /* Split one or more DImode RTL references into pairs of SImode
9799 references. The RTL can be REG, offsettable MEM, integer constant, or
9800 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9801 split and "num" is its length. lo_half and hi_half are output arrays
9802 that parallel "operands". */
9805 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9809 rtx op = operands[num];
9811 /* simplify_subreg refuse to split volatile memory addresses,
9812 but we still have to handle it. */
9815 lo_half[num] = adjust_address (op, SImode, 0);
9816 hi_half[num] = adjust_address (op, SImode, 4);
9820 lo_half[num] = simplify_gen_subreg (SImode, op,
9821 GET_MODE (op) == VOIDmode
9822 ? DImode : GET_MODE (op), 0);
9823 hi_half[num] = simplify_gen_subreg (SImode, op,
9824 GET_MODE (op) == VOIDmode
9825 ? DImode : GET_MODE (op), 4);
9829 /* Split one or more TImode RTL references into pairs of DImode
9830 references. The RTL can be REG, offsettable MEM, integer constant, or
9831 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9832 split and "num" is its length. lo_half and hi_half are output arrays
9833 that parallel "operands". */
9836 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9840 rtx op = operands[num];
9842 /* simplify_subreg refuse to split volatile memory addresses, but we
9843 still have to handle it. */
9846 lo_half[num] = adjust_address (op, DImode, 0);
9847 hi_half[num] = adjust_address (op, DImode, 8);
9851 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9852 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9857 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9858 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9859 is the expression of the binary operation. The output may either be
9860 emitted here, or returned to the caller, like all output_* functions.
9862 There is no guarantee that the operands are the same mode, as they
9863 might be within FLOAT or FLOAT_EXTEND expressions. */
9865 #ifndef SYSV386_COMPAT
9866 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9867 wants to fix the assemblers because that causes incompatibility
9868 with gcc. No-one wants to fix gcc because that causes
9869 incompatibility with assemblers... You can use the option of
9870 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9871 #define SYSV386_COMPAT 1
9875 output_387_binary_op (rtx insn, rtx *operands)
9877 static char buf[30];
9880 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9882 #ifdef ENABLE_CHECKING
9883 /* Even if we do not want to check the inputs, this documents input
9884 constraints. Which helps in understanding the following code. */
9885 if (STACK_REG_P (operands[0])
9886 && ((REG_P (operands[1])
9887 && REGNO (operands[0]) == REGNO (operands[1])
9888 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9889 || (REG_P (operands[2])
9890 && REGNO (operands[0]) == REGNO (operands[2])
9891 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9892 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9895 gcc_assert (is_sse);
9898 switch (GET_CODE (operands[3]))
9901 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9902 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9910 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9911 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9919 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9920 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9928 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9929 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9943 if (GET_MODE (operands[0]) == SFmode)
9944 strcat (buf, "ss\t{%2, %0|%0, %2}");
9946 strcat (buf, "sd\t{%2, %0|%0, %2}");
9951 switch (GET_CODE (operands[3]))
9955 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9957 rtx temp = operands[2];
9958 operands[2] = operands[1];
9962 /* know operands[0] == operands[1]. */
9964 if (MEM_P (operands[2]))
9970 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9972 if (STACK_TOP_P (operands[0]))
9973 /* How is it that we are storing to a dead operand[2]?
9974 Well, presumably operands[1] is dead too. We can't
9975 store the result to st(0) as st(0) gets popped on this
9976 instruction. Instead store to operands[2] (which I
9977 think has to be st(1)). st(1) will be popped later.
9978 gcc <= 2.8.1 didn't have this check and generated
9979 assembly code that the Unixware assembler rejected. */
9980 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9982 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9986 if (STACK_TOP_P (operands[0]))
9987 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9989 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9994 if (MEM_P (operands[1]))
10000 if (MEM_P (operands[2]))
10006 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
10009 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
10010 derived assemblers, confusingly reverse the direction of
10011 the operation for fsub{r} and fdiv{r} when the
10012 destination register is not st(0). The Intel assembler
10013 doesn't have this brain damage. Read !SYSV386_COMPAT to
10014 figure out what the hardware really does. */
10015 if (STACK_TOP_P (operands[0]))
10016 p = "{p\t%0, %2|rp\t%2, %0}";
10018 p = "{rp\t%2, %0|p\t%0, %2}";
10020 if (STACK_TOP_P (operands[0]))
10021 /* As above for fmul/fadd, we can't store to st(0). */
10022 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
10024 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
10029 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
10032 if (STACK_TOP_P (operands[0]))
10033 p = "{rp\t%0, %1|p\t%1, %0}";
10035 p = "{p\t%1, %0|rp\t%0, %1}";
10037 if (STACK_TOP_P (operands[0]))
10038 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
10040 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
10045 if (STACK_TOP_P (operands[0]))
10047 if (STACK_TOP_P (operands[1]))
10048 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
10050 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
10053 else if (STACK_TOP_P (operands[1]))
10056 p = "{\t%1, %0|r\t%0, %1}";
10058 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
10064 p = "{r\t%2, %0|\t%0, %2}";
10066 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
10072 gcc_unreachable ();
10079 /* Return needed mode for entity in optimize_mode_switching pass. */
10082 ix86_mode_needed (int entity, rtx insn)
10084 enum attr_i387_cw mode;
10086 /* The mode UNINITIALIZED is used to store control word after a
10087 function call or ASM pattern. The mode ANY specify that function
10088 has no requirements on the control word and make no changes in the
10089 bits we are interested in. */
10092 || (NONJUMP_INSN_P (insn)
10093 && (asm_noperands (PATTERN (insn)) >= 0
10094 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
10095 return I387_CW_UNINITIALIZED;
10097 if (recog_memoized (insn) < 0)
10098 return I387_CW_ANY;
10100 mode = get_attr_i387_cw (insn);
10105 if (mode == I387_CW_TRUNC)
10110 if (mode == I387_CW_FLOOR)
10115 if (mode == I387_CW_CEIL)
10120 if (mode == I387_CW_MASK_PM)
10125 gcc_unreachable ();
10128 return I387_CW_ANY;
10131 /* Output code to initialize control word copies used by trunc?f?i and
10132 rounding patterns. CURRENT_MODE is set to current control word,
10133 while NEW_MODE is set to new control word. */
10136 emit_i387_cw_initialization (int mode)
10138 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
10141 enum ix86_stack_slot slot;
10143 rtx reg = gen_reg_rtx (HImode);
10145 emit_insn (gen_x86_fnstcw_1 (stored_mode));
10146 emit_move_insn (reg, copy_rtx (stored_mode));
10148 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
10152 case I387_CW_TRUNC:
10153 /* round toward zero (truncate) */
10154 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
10155 slot = SLOT_CW_TRUNC;
10158 case I387_CW_FLOOR:
10159 /* round down toward -oo */
10160 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10161 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
10162 slot = SLOT_CW_FLOOR;
10166 /* round up toward +oo */
10167 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
10168 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
10169 slot = SLOT_CW_CEIL;
10172 case I387_CW_MASK_PM:
10173 /* mask precision exception for nearbyint() */
10174 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
10175 slot = SLOT_CW_MASK_PM;
10179 gcc_unreachable ();
10186 case I387_CW_TRUNC:
10187 /* round toward zero (truncate) */
10188 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
10189 slot = SLOT_CW_TRUNC;
10192 case I387_CW_FLOOR:
10193 /* round down toward -oo */
10194 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
10195 slot = SLOT_CW_FLOOR;
10199 /* round up toward +oo */
10200 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
10201 slot = SLOT_CW_CEIL;
10204 case I387_CW_MASK_PM:
10205 /* mask precision exception for nearbyint() */
10206 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
10207 slot = SLOT_CW_MASK_PM;
10211 gcc_unreachable ();
10215 gcc_assert (slot < MAX_386_STACK_LOCALS);
10217 new_mode = assign_386_stack_local (HImode, slot);
10218 emit_move_insn (new_mode, reg);
10221 /* Output code for INSN to convert a float to a signed int. OPERANDS
10222 are the insn operands. The output may be [HSD]Imode and the input
10223 operand may be [SDX]Fmode. */
10226 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
10228 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10229 int dimode_p = GET_MODE (operands[0]) == DImode;
10230 int round_mode = get_attr_i387_cw (insn);
10232 /* Jump through a hoop or two for DImode, since the hardware has no
10233 non-popping instruction. We used to do this a different way, but
10234 that was somewhat fragile and broke with post-reload splitters. */
10235 if ((dimode_p || fisttp) && !stack_top_dies)
10236 output_asm_insn ("fld\t%y1", operands);
10238 gcc_assert (STACK_TOP_P (operands[1]));
10239 gcc_assert (MEM_P (operands[0]));
10240 gcc_assert (GET_MODE (operands[1]) != TFmode);
10243 output_asm_insn ("fisttp%z0\t%0", operands);
10246 if (round_mode != I387_CW_ANY)
10247 output_asm_insn ("fldcw\t%3", operands);
10248 if (stack_top_dies || dimode_p)
10249 output_asm_insn ("fistp%z0\t%0", operands);
10251 output_asm_insn ("fist%z0\t%0", operands);
10252 if (round_mode != I387_CW_ANY)
10253 output_asm_insn ("fldcw\t%2", operands);
10259 /* Output code for x87 ffreep insn. The OPNO argument, which may only
10260 have the values zero or one, indicates the ffreep insn's operand
10261 from the OPERANDS array. */
10263 static const char *
10264 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
10266 if (TARGET_USE_FFREEP)
10267 #if HAVE_AS_IX86_FFREEP
10268 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
10271 static char retval[] = ".word\t0xc_df";
10272 int regno = REGNO (operands[opno]);
10274 gcc_assert (FP_REGNO_P (regno));
10276 retval[9] = '0' + (regno - FIRST_STACK_REG);
10281 return opno ? "fstp\t%y1" : "fstp\t%y0";
10285 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
10286 should be used. UNORDERED_P is true when fucom should be used. */
10289 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
10291 int stack_top_dies;
10292 rtx cmp_op0, cmp_op1;
10293 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
10297 cmp_op0 = operands[0];
10298 cmp_op1 = operands[1];
10302 cmp_op0 = operands[1];
10303 cmp_op1 = operands[2];
10308 if (GET_MODE (operands[0]) == SFmode)
10310 return "ucomiss\t{%1, %0|%0, %1}";
10312 return "comiss\t{%1, %0|%0, %1}";
10315 return "ucomisd\t{%1, %0|%0, %1}";
10317 return "comisd\t{%1, %0|%0, %1}";
10320 gcc_assert (STACK_TOP_P (cmp_op0));
10322 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
10324 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
10326 if (stack_top_dies)
10328 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
10329 return output_387_ffreep (operands, 1);
10332 return "ftst\n\tfnstsw\t%0";
10335 if (STACK_REG_P (cmp_op1)
10337 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
10338 && REGNO (cmp_op1) != FIRST_STACK_REG)
10340 /* If both the top of the 387 stack dies, and the other operand
10341 is also a stack register that dies, then this must be a
10342 `fcompp' float compare */
10346 /* There is no double popping fcomi variant. Fortunately,
10347 eflags is immune from the fstp's cc clobbering. */
10349 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
10351 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
10352 return output_387_ffreep (operands, 0);
10357 return "fucompp\n\tfnstsw\t%0";
10359 return "fcompp\n\tfnstsw\t%0";
10364 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
10366 static const char * const alt[16] =
10368 "fcom%z2\t%y2\n\tfnstsw\t%0",
10369 "fcomp%z2\t%y2\n\tfnstsw\t%0",
10370 "fucom%z2\t%y2\n\tfnstsw\t%0",
10371 "fucomp%z2\t%y2\n\tfnstsw\t%0",
10373 "ficom%z2\t%y2\n\tfnstsw\t%0",
10374 "ficomp%z2\t%y2\n\tfnstsw\t%0",
10378 "fcomi\t{%y1, %0|%0, %y1}",
10379 "fcomip\t{%y1, %0|%0, %y1}",
10380 "fucomi\t{%y1, %0|%0, %y1}",
10381 "fucomip\t{%y1, %0|%0, %y1}",
10392 mask = eflags_p << 3;
10393 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
10394 mask |= unordered_p << 1;
10395 mask |= stack_top_dies;
10397 gcc_assert (mask < 16);
10406 ix86_output_addr_vec_elt (FILE *file, int value)
10408 const char *directive = ASM_LONG;
10412 directive = ASM_QUAD;
10414 gcc_assert (!TARGET_64BIT);
10417 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
10421 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
10423 const char *directive = ASM_LONG;
10426 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
10427 directive = ASM_QUAD;
10429 gcc_assert (!TARGET_64BIT);
10431 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
10432 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
10433 fprintf (file, "%s%s%d-%s%d\n",
10434 directive, LPREFIX, value, LPREFIX, rel);
10435 else if (HAVE_AS_GOTOFF_IN_DATA)
10436 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
10438 else if (TARGET_MACHO)
10440 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
10441 machopic_output_function_base_name (file);
10442 fprintf(file, "\n");
10446 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
10447 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
10450 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
10454 ix86_expand_clear (rtx dest)
10458 /* We play register width games, which are only valid after reload. */
10459 gcc_assert (reload_completed);
10461 /* Avoid HImode and its attendant prefix byte. */
10462 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
10463 dest = gen_rtx_REG (SImode, REGNO (dest));
10464 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
10466 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
10467 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
10469 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10470 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
10476 /* X is an unchanging MEM. If it is a constant pool reference, return
10477 the constant pool rtx, else NULL. */
10480 maybe_get_pool_constant (rtx x)
10482 x = ix86_delegitimize_address (XEXP (x, 0));
10484 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
10485 return get_pool_constant (x);
10491 ix86_expand_move (enum machine_mode mode, rtx operands[])
10494 enum tls_model model;
10499 if (GET_CODE (op1) == SYMBOL_REF)
10501 model = SYMBOL_REF_TLS_MODEL (op1);
10504 op1 = legitimize_tls_address (op1, model, true);
10505 op1 = force_operand (op1, op0);
10509 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10510 && SYMBOL_REF_DLLIMPORT_P (op1))
10511 op1 = legitimize_dllimport_symbol (op1, false);
10513 else if (GET_CODE (op1) == CONST
10514 && GET_CODE (XEXP (op1, 0)) == PLUS
10515 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
10517 rtx addend = XEXP (XEXP (op1, 0), 1);
10518 rtx symbol = XEXP (XEXP (op1, 0), 0);
10521 model = SYMBOL_REF_TLS_MODEL (symbol);
10523 tmp = legitimize_tls_address (symbol, model, true);
10524 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10525 && SYMBOL_REF_DLLIMPORT_P (symbol))
10526 tmp = legitimize_dllimport_symbol (symbol, true);
10530 tmp = force_operand (tmp, NULL);
10531 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
10532 op0, 1, OPTAB_DIRECT);
10538 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
10540 if (TARGET_MACHO && !TARGET_64BIT)
10545 rtx temp = ((reload_in_progress
10546 || ((op0 && REG_P (op0))
10548 ? op0 : gen_reg_rtx (Pmode));
10549 op1 = machopic_indirect_data_reference (op1, temp);
10550 op1 = machopic_legitimize_pic_address (op1, mode,
10551 temp == op1 ? 0 : temp);
10553 else if (MACHOPIC_INDIRECT)
10554 op1 = machopic_indirect_data_reference (op1, 0);
10562 op1 = force_reg (Pmode, op1);
10563 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
10565 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
10566 op1 = legitimize_pic_address (op1, reg);
10575 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
10576 || !push_operand (op0, mode))
10578 op1 = force_reg (mode, op1);
10580 if (push_operand (op0, mode)
10581 && ! general_no_elim_operand (op1, mode))
10582 op1 = copy_to_mode_reg (mode, op1);
10584 /* Force large constants in 64bit compilation into register
10585 to get them CSEed. */
10586 if (can_create_pseudo_p ()
10587 && (mode == DImode) && TARGET_64BIT
10588 && immediate_operand (op1, mode)
10589 && !x86_64_zext_immediate_operand (op1, VOIDmode)
10590 && !register_operand (op0, mode)
10592 op1 = copy_to_mode_reg (mode, op1);
10594 if (can_create_pseudo_p ()
10595 && FLOAT_MODE_P (mode)
10596 && GET_CODE (op1) == CONST_DOUBLE)
10598 /* If we are loading a floating point constant to a register,
10599 force the value to memory now, since we'll get better code
10600 out the back end. */
10602 op1 = validize_mem (force_const_mem (mode, op1));
10603 if (!register_operand (op0, mode))
10605 rtx temp = gen_reg_rtx (mode);
10606 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
10607 emit_move_insn (op0, temp);
10613 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10617 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
10619 rtx op0 = operands[0], op1 = operands[1];
10620 unsigned int align = GET_MODE_ALIGNMENT (mode);
10622 /* Force constants other than zero into memory. We do not know how
10623 the instructions used to build constants modify the upper 64 bits
10624 of the register, once we have that information we may be able
10625 to handle some of them more efficiently. */
10626 if (can_create_pseudo_p ()
10627 && register_operand (op0, mode)
10628 && (CONSTANT_P (op1)
10629 || (GET_CODE (op1) == SUBREG
10630 && CONSTANT_P (SUBREG_REG (op1))))
10631 && standard_sse_constant_p (op1) <= 0)
10632 op1 = validize_mem (force_const_mem (mode, op1));
10634 /* We need to check memory alignment for SSE mode since attribute
10635 can make operands unaligned. */
10636 if (can_create_pseudo_p ()
10637 && SSE_REG_MODE_P (mode)
10638 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
10639 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
10643 /* ix86_expand_vector_move_misalign() does not like constants ... */
10644 if (CONSTANT_P (op1)
10645 || (GET_CODE (op1) == SUBREG
10646 && CONSTANT_P (SUBREG_REG (op1))))
10647 op1 = validize_mem (force_const_mem (mode, op1));
10649 /* ... nor both arguments in memory. */
10650 if (!register_operand (op0, mode)
10651 && !register_operand (op1, mode))
10652 op1 = force_reg (mode, op1);
10654 tmp[0] = op0; tmp[1] = op1;
10655 ix86_expand_vector_move_misalign (mode, tmp);
10659 /* Make operand1 a register if it isn't already. */
10660 if (can_create_pseudo_p ()
10661 && !register_operand (op0, mode)
10662 && !register_operand (op1, mode))
10664 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
10668 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
10671 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
10672 straight to ix86_expand_vector_move. */
10673 /* Code generation for scalar reg-reg moves of single and double precision data:
10674 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10678 if (x86_sse_partial_reg_dependency == true)
10683 Code generation for scalar loads of double precision data:
10684 if (x86_sse_split_regs == true)
10685 movlpd mem, reg (gas syntax)
10689 Code generation for unaligned packed loads of single precision data
10690 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10691 if (x86_sse_unaligned_move_optimal)
10694 if (x86_sse_partial_reg_dependency == true)
10706 Code generation for unaligned packed loads of double precision data
10707 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10708 if (x86_sse_unaligned_move_optimal)
10711 if (x86_sse_split_regs == true)
10724 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10733 /* If we're optimizing for size, movups is the smallest. */
10736 op0 = gen_lowpart (V4SFmode, op0);
10737 op1 = gen_lowpart (V4SFmode, op1);
10738 emit_insn (gen_sse_movups (op0, op1));
10742 /* ??? If we have typed data, then it would appear that using
10743 movdqu is the only way to get unaligned data loaded with
10745 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10747 op0 = gen_lowpart (V16QImode, op0);
10748 op1 = gen_lowpart (V16QImode, op1);
10749 emit_insn (gen_sse2_movdqu (op0, op1));
10753 if (TARGET_SSE2 && mode == V2DFmode)
10757 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10759 op0 = gen_lowpart (V2DFmode, op0);
10760 op1 = gen_lowpart (V2DFmode, op1);
10761 emit_insn (gen_sse2_movupd (op0, op1));
10765 /* When SSE registers are split into halves, we can avoid
10766 writing to the top half twice. */
10767 if (TARGET_SSE_SPLIT_REGS)
10769 emit_clobber (op0);
10774 /* ??? Not sure about the best option for the Intel chips.
10775 The following would seem to satisfy; the register is
10776 entirely cleared, breaking the dependency chain. We
10777 then store to the upper half, with a dependency depth
10778 of one. A rumor has it that Intel recommends two movsd
10779 followed by an unpacklpd, but this is unconfirmed. And
10780 given that the dependency depth of the unpacklpd would
10781 still be one, I'm not sure why this would be better. */
10782 zero = CONST0_RTX (V2DFmode);
10785 m = adjust_address (op1, DFmode, 0);
10786 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10787 m = adjust_address (op1, DFmode, 8);
10788 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10792 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10794 op0 = gen_lowpart (V4SFmode, op0);
10795 op1 = gen_lowpart (V4SFmode, op1);
10796 emit_insn (gen_sse_movups (op0, op1));
10800 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10801 emit_move_insn (op0, CONST0_RTX (mode));
10803 emit_clobber (op0);
10805 if (mode != V4SFmode)
10806 op0 = gen_lowpart (V4SFmode, op0);
10807 m = adjust_address (op1, V2SFmode, 0);
10808 emit_insn (gen_sse_loadlps (op0, op0, m));
10809 m = adjust_address (op1, V2SFmode, 8);
10810 emit_insn (gen_sse_loadhps (op0, op0, m));
10813 else if (MEM_P (op0))
10815 /* If we're optimizing for size, movups is the smallest. */
10818 op0 = gen_lowpart (V4SFmode, op0);
10819 op1 = gen_lowpart (V4SFmode, op1);
10820 emit_insn (gen_sse_movups (op0, op1));
10824 /* ??? Similar to above, only less clear because of quote
10825 typeless stores unquote. */
10826 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10827 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10829 op0 = gen_lowpart (V16QImode, op0);
10830 op1 = gen_lowpart (V16QImode, op1);
10831 emit_insn (gen_sse2_movdqu (op0, op1));
10835 if (TARGET_SSE2 && mode == V2DFmode)
10837 m = adjust_address (op0, DFmode, 0);
10838 emit_insn (gen_sse2_storelpd (m, op1));
10839 m = adjust_address (op0, DFmode, 8);
10840 emit_insn (gen_sse2_storehpd (m, op1));
10844 if (mode != V4SFmode)
10845 op1 = gen_lowpart (V4SFmode, op1);
10846 m = adjust_address (op0, V2SFmode, 0);
10847 emit_insn (gen_sse_storelps (m, op1));
10848 m = adjust_address (op0, V2SFmode, 8);
10849 emit_insn (gen_sse_storehps (m, op1));
10853 gcc_unreachable ();
10856 /* Expand a push in MODE. This is some mode for which we do not support
10857 proper push instructions, at least from the registers that we expect
10858 the value to live in. */
10861 ix86_expand_push (enum machine_mode mode, rtx x)
10865 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10866 GEN_INT (-GET_MODE_SIZE (mode)),
10867 stack_pointer_rtx, 1, OPTAB_DIRECT);
10868 if (tmp != stack_pointer_rtx)
10869 emit_move_insn (stack_pointer_rtx, tmp);
10871 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10872 emit_move_insn (tmp, x);
10875 /* Helper function of ix86_fixup_binary_operands to canonicalize
10876 operand order. Returns true if the operands should be swapped. */
10879 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10882 rtx dst = operands[0];
10883 rtx src1 = operands[1];
10884 rtx src2 = operands[2];
10886 /* If the operation is not commutative, we can't do anything. */
10887 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10890 /* Highest priority is that src1 should match dst. */
10891 if (rtx_equal_p (dst, src1))
10893 if (rtx_equal_p (dst, src2))
10896 /* Next highest priority is that immediate constants come second. */
10897 if (immediate_operand (src2, mode))
10899 if (immediate_operand (src1, mode))
10902 /* Lowest priority is that memory references should come second. */
10912 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10913 destination to use for the operation. If different from the true
10914 destination in operands[0], a copy operation will be required. */
10917 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10920 rtx dst = operands[0];
10921 rtx src1 = operands[1];
10922 rtx src2 = operands[2];
10924 /* Canonicalize operand order. */
10925 if (ix86_swap_binary_operands_p (code, mode, operands))
10929 /* It is invalid to swap operands of different modes. */
10930 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
10937 /* Both source operands cannot be in memory. */
10938 if (MEM_P (src1) && MEM_P (src2))
10940 /* Optimization: Only read from memory once. */
10941 if (rtx_equal_p (src1, src2))
10943 src2 = force_reg (mode, src2);
10947 src2 = force_reg (mode, src2);
10950 /* If the destination is memory, and we do not have matching source
10951 operands, do things in registers. */
10952 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10953 dst = gen_reg_rtx (mode);
10955 /* Source 1 cannot be a constant. */
10956 if (CONSTANT_P (src1))
10957 src1 = force_reg (mode, src1);
10959 /* Source 1 cannot be a non-matching memory. */
10960 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10961 src1 = force_reg (mode, src1);
10963 operands[1] = src1;
10964 operands[2] = src2;
10968 /* Similarly, but assume that the destination has already been
10969 set up properly. */
10972 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10973 enum machine_mode mode, rtx operands[])
10975 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10976 gcc_assert (dst == operands[0]);
10979 /* Attempt to expand a binary operator. Make the expansion closer to the
10980 actual machine, then just general_operand, which will allow 3 separate
10981 memory references (one output, two input) in a single insn. */
10984 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10987 rtx src1, src2, dst, op, clob;
10989 dst = ix86_fixup_binary_operands (code, mode, operands);
10990 src1 = operands[1];
10991 src2 = operands[2];
10993 /* Emit the instruction. */
10995 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10996 if (reload_in_progress)
10998 /* Reload doesn't know about the flags register, and doesn't know that
10999 it doesn't want to clobber it. We can only do this with PLUS. */
11000 gcc_assert (code == PLUS);
11005 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11006 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
11009 /* Fix up the destination if needed. */
11010 if (dst != operands[0])
11011 emit_move_insn (operands[0], dst);
11014 /* Return TRUE or FALSE depending on whether the binary operator meets the
11015 appropriate constraints. */
11018 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
11021 rtx dst = operands[0];
11022 rtx src1 = operands[1];
11023 rtx src2 = operands[2];
11025 /* Both source operands cannot be in memory. */
11026 if (MEM_P (src1) && MEM_P (src2))
11029 /* Canonicalize operand order for commutative operators. */
11030 if (ix86_swap_binary_operands_p (code, mode, operands))
11037 /* If the destination is memory, we must have a matching source operand. */
11038 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
11041 /* Source 1 cannot be a constant. */
11042 if (CONSTANT_P (src1))
11045 /* Source 1 cannot be a non-matching memory. */
11046 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
11052 /* Attempt to expand a unary operator. Make the expansion closer to the
11053 actual machine, then just general_operand, which will allow 2 separate
11054 memory references (one output, one input) in a single insn. */
11057 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
11060 int matching_memory;
11061 rtx src, dst, op, clob;
11066 /* If the destination is memory, and we do not have matching source
11067 operands, do things in registers. */
11068 matching_memory = 0;
11071 if (rtx_equal_p (dst, src))
11072 matching_memory = 1;
11074 dst = gen_reg_rtx (mode);
11077 /* When source operand is memory, destination must match. */
11078 if (MEM_P (src) && !matching_memory)
11079 src = force_reg (mode, src);
11081 /* Emit the instruction. */
11083 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
11084 if (reload_in_progress || code == NOT)
11086 /* Reload doesn't know about the flags register, and doesn't know that
11087 it doesn't want to clobber it. */
11088 gcc_assert (code == NOT);
11093 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11094 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
11097 /* Fix up the destination if needed. */
11098 if (dst != operands[0])
11099 emit_move_insn (operands[0], dst);
11102 /* Return TRUE or FALSE depending on whether the unary operator meets the
11103 appropriate constraints. */
11106 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
11107 enum machine_mode mode ATTRIBUTE_UNUSED,
11108 rtx operands[2] ATTRIBUTE_UNUSED)
11110 /* If one of operands is memory, source and destination must match. */
11111 if ((MEM_P (operands[0])
11112 || MEM_P (operands[1]))
11113 && ! rtx_equal_p (operands[0], operands[1]))
11118 /* Post-reload splitter for converting an SF or DFmode value in an
11119 SSE register into an unsigned SImode. */
11122 ix86_split_convert_uns_si_sse (rtx operands[])
11124 enum machine_mode vecmode;
11125 rtx value, large, zero_or_two31, input, two31, x;
11127 large = operands[1];
11128 zero_or_two31 = operands[2];
11129 input = operands[3];
11130 two31 = operands[4];
11131 vecmode = GET_MODE (large);
11132 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
11134 /* Load up the value into the low element. We must ensure that the other
11135 elements are valid floats -- zero is the easiest such value. */
11138 if (vecmode == V4SFmode)
11139 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
11141 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
11145 input = gen_rtx_REG (vecmode, REGNO (input));
11146 emit_move_insn (value, CONST0_RTX (vecmode));
11147 if (vecmode == V4SFmode)
11148 emit_insn (gen_sse_movss (value, value, input));
11150 emit_insn (gen_sse2_movsd (value, value, input));
11153 emit_move_insn (large, two31);
11154 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
11156 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
11157 emit_insn (gen_rtx_SET (VOIDmode, large, x));
11159 x = gen_rtx_AND (vecmode, zero_or_two31, large);
11160 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
11162 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
11163 emit_insn (gen_rtx_SET (VOIDmode, value, x));
11165 large = gen_rtx_REG (V4SImode, REGNO (large));
11166 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
11168 x = gen_rtx_REG (V4SImode, REGNO (value));
11169 if (vecmode == V4SFmode)
11170 emit_insn (gen_sse2_cvttps2dq (x, value));
11172 emit_insn (gen_sse2_cvttpd2dq (x, value));
11175 emit_insn (gen_xorv4si3 (value, value, large));
11178 /* Convert an unsigned DImode value into a DFmode, using only SSE.
11179 Expects the 64-bit DImode to be supplied in a pair of integral
11180 registers. Requires SSE2; will use SSE3 if available. For x86_32,
11181 -mfpmath=sse, !optimize_size only. */
11184 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
11186 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
11187 rtx int_xmm, fp_xmm;
11188 rtx biases, exponents;
11191 int_xmm = gen_reg_rtx (V4SImode);
11192 if (TARGET_INTER_UNIT_MOVES)
11193 emit_insn (gen_movdi_to_sse (int_xmm, input));
11194 else if (TARGET_SSE_SPLIT_REGS)
11196 emit_clobber (int_xmm);
11197 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
11201 x = gen_reg_rtx (V2DImode);
11202 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
11203 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
11206 x = gen_rtx_CONST_VECTOR (V4SImode,
11207 gen_rtvec (4, GEN_INT (0x43300000UL),
11208 GEN_INT (0x45300000UL),
11209 const0_rtx, const0_rtx));
11210 exponents = validize_mem (force_const_mem (V4SImode, x));
11212 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
11213 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
11215 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
11216 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
11217 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
11218 (0x1.0p84 + double(fp_value_hi_xmm)).
11219 Note these exponents differ by 32. */
11221 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
11223 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
11224 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
11225 real_ldexp (&bias_lo_rvt, &dconst1, 52);
11226 real_ldexp (&bias_hi_rvt, &dconst1, 84);
11227 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
11228 x = const_double_from_real_value (bias_hi_rvt, DFmode);
11229 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
11230 biases = validize_mem (force_const_mem (V2DFmode, biases));
11231 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
11233 /* Add the upper and lower DFmode values together. */
11235 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
11238 x = copy_to_mode_reg (V2DFmode, fp_xmm);
11239 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
11240 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
11243 ix86_expand_vector_extract (false, target, fp_xmm, 0);
11246 /* Not used, but eases macroization of patterns. */
11248 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
11249 rtx input ATTRIBUTE_UNUSED)
11251 gcc_unreachable ();
11254 /* Convert an unsigned SImode value into a DFmode. Only currently used
11255 for SSE, but applicable anywhere. */
11258 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
11260 REAL_VALUE_TYPE TWO31r;
11263 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
11264 NULL, 1, OPTAB_DIRECT);
11266 fp = gen_reg_rtx (DFmode);
11267 emit_insn (gen_floatsidf2 (fp, x));
11269 real_ldexp (&TWO31r, &dconst1, 31);
11270 x = const_double_from_real_value (TWO31r, DFmode);
11272 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
11274 emit_move_insn (target, x);
11277 /* Convert a signed DImode value into a DFmode. Only used for SSE in
11278 32-bit mode; otherwise we have a direct convert instruction. */
11281 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
11283 REAL_VALUE_TYPE TWO32r;
11284 rtx fp_lo, fp_hi, x;
11286 fp_lo = gen_reg_rtx (DFmode);
11287 fp_hi = gen_reg_rtx (DFmode);
11289 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
11291 real_ldexp (&TWO32r, &dconst1, 32);
11292 x = const_double_from_real_value (TWO32r, DFmode);
11293 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
11295 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
11297 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
11300 emit_move_insn (target, x);
11303 /* Convert an unsigned SImode value into a SFmode, using only SSE.
11304 For x86_32, -mfpmath=sse, !optimize_size only. */
11306 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
11308 REAL_VALUE_TYPE ONE16r;
11309 rtx fp_hi, fp_lo, int_hi, int_lo, x;
11311 real_ldexp (&ONE16r, &dconst1, 16);
11312 x = const_double_from_real_value (ONE16r, SFmode);
11313 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
11314 NULL, 0, OPTAB_DIRECT);
11315 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
11316 NULL, 0, OPTAB_DIRECT);
11317 fp_hi = gen_reg_rtx (SFmode);
11318 fp_lo = gen_reg_rtx (SFmode);
11319 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
11320 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
11321 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
11323 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
11325 if (!rtx_equal_p (target, fp_hi))
11326 emit_move_insn (target, fp_hi);
11329 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
11330 then replicate the value for all elements of the vector
11334 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
11341 v = gen_rtvec (4, value, value, value, value);
11342 return gen_rtx_CONST_VECTOR (V4SImode, v);
11346 v = gen_rtvec (2, value, value);
11347 return gen_rtx_CONST_VECTOR (V2DImode, v);
11351 v = gen_rtvec (4, value, value, value, value);
11353 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
11354 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11355 return gen_rtx_CONST_VECTOR (V4SFmode, v);
11359 v = gen_rtvec (2, value, value);
11361 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
11362 return gen_rtx_CONST_VECTOR (V2DFmode, v);
11365 gcc_unreachable ();
11369 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
11370 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
11371 for an SSE register. If VECT is true, then replicate the mask for
11372 all elements of the vector register. If INVERT is true, then create
11373 a mask excluding the sign bit. */
11376 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
11378 enum machine_mode vec_mode, imode;
11379 HOST_WIDE_INT hi, lo;
11384 /* Find the sign bit, sign extended to 2*HWI. */
11390 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
11391 lo = 0x80000000, hi = lo < 0;
11397 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
11398 if (HOST_BITS_PER_WIDE_INT >= 64)
11399 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
11401 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11406 vec_mode = VOIDmode;
11407 if (HOST_BITS_PER_WIDE_INT >= 64)
11410 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
11417 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
11421 lo = ~lo, hi = ~hi;
11427 mask = immed_double_const (lo, hi, imode);
11429 vec = gen_rtvec (2, v, mask);
11430 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
11431 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
11438 gcc_unreachable ();
11442 lo = ~lo, hi = ~hi;
11444 /* Force this value into the low part of a fp vector constant. */
11445 mask = immed_double_const (lo, hi, imode);
11446 mask = gen_lowpart (mode, mask);
11448 if (vec_mode == VOIDmode)
11449 return force_reg (mode, mask);
11451 v = ix86_build_const_vector (mode, vect, mask);
11452 return force_reg (vec_mode, v);
11455 /* Generate code for floating point ABS or NEG. */
11458 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
11461 rtx mask, set, use, clob, dst, src;
11462 bool use_sse = false;
11463 bool vector_mode = VECTOR_MODE_P (mode);
11464 enum machine_mode elt_mode = mode;
11468 elt_mode = GET_MODE_INNER (mode);
11471 else if (mode == TFmode)
11473 else if (TARGET_SSE_MATH)
11474 use_sse = SSE_FLOAT_MODE_P (mode);
11476 /* NEG and ABS performed with SSE use bitwise mask operations.
11477 Create the appropriate mask now. */
11479 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
11488 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
11489 set = gen_rtx_SET (VOIDmode, dst, set);
11494 set = gen_rtx_fmt_e (code, mode, src);
11495 set = gen_rtx_SET (VOIDmode, dst, set);
11498 use = gen_rtx_USE (VOIDmode, mask);
11499 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
11500 emit_insn (gen_rtx_PARALLEL (VOIDmode,
11501 gen_rtvec (3, set, use, clob)));
11508 /* Expand a copysign operation. Special case operand 0 being a constant. */
11511 ix86_expand_copysign (rtx operands[])
11513 enum machine_mode mode;
11514 rtx dest, op0, op1, mask, nmask;
11516 dest = operands[0];
11520 mode = GET_MODE (dest);
11522 if (GET_CODE (op0) == CONST_DOUBLE)
11524 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
11526 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
11527 op0 = simplify_unary_operation (ABS, mode, op0, mode);
11529 if (mode == SFmode || mode == DFmode)
11531 enum machine_mode vmode;
11533 vmode = mode == SFmode ? V4SFmode : V2DFmode;
11535 if (op0 == CONST0_RTX (mode))
11536 op0 = CONST0_RTX (vmode);
11541 if (mode == SFmode)
11542 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
11543 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
11545 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
11547 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
11550 else if (op0 != CONST0_RTX (mode))
11551 op0 = force_reg (mode, op0);
11553 mask = ix86_build_signbit_mask (mode, 0, 0);
11555 if (mode == SFmode)
11556 copysign_insn = gen_copysignsf3_const;
11557 else if (mode == DFmode)
11558 copysign_insn = gen_copysigndf3_const;
11560 copysign_insn = gen_copysigntf3_const;
11562 emit_insn (copysign_insn (dest, op0, op1, mask));
11566 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
11568 nmask = ix86_build_signbit_mask (mode, 0, 1);
11569 mask = ix86_build_signbit_mask (mode, 0, 0);
11571 if (mode == SFmode)
11572 copysign_insn = gen_copysignsf3_var;
11573 else if (mode == DFmode)
11574 copysign_insn = gen_copysigndf3_var;
11576 copysign_insn = gen_copysigntf3_var;
11578 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
11582 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
11583 be a constant, and so has already been expanded into a vector constant. */
11586 ix86_split_copysign_const (rtx operands[])
11588 enum machine_mode mode, vmode;
11589 rtx dest, op0, op1, mask, x;
11591 dest = operands[0];
11594 mask = operands[3];
11596 mode = GET_MODE (dest);
11597 vmode = GET_MODE (mask);
11599 dest = simplify_gen_subreg (vmode, dest, mode, 0);
11600 x = gen_rtx_AND (vmode, dest, mask);
11601 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11603 if (op0 != CONST0_RTX (vmode))
11605 x = gen_rtx_IOR (vmode, dest, op0);
11606 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11610 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
11611 so we have to do two masks. */
11614 ix86_split_copysign_var (rtx operands[])
11616 enum machine_mode mode, vmode;
11617 rtx dest, scratch, op0, op1, mask, nmask, x;
11619 dest = operands[0];
11620 scratch = operands[1];
11623 nmask = operands[4];
11624 mask = operands[5];
11626 mode = GET_MODE (dest);
11627 vmode = GET_MODE (mask);
11629 if (rtx_equal_p (op0, op1))
11631 /* Shouldn't happen often (it's useless, obviously), but when it does
11632 we'd generate incorrect code if we continue below. */
11633 emit_move_insn (dest, op0);
11637 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
11639 gcc_assert (REGNO (op1) == REGNO (scratch));
11641 x = gen_rtx_AND (vmode, scratch, mask);
11642 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11645 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11646 x = gen_rtx_NOT (vmode, dest);
11647 x = gen_rtx_AND (vmode, x, op0);
11648 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11652 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
11654 x = gen_rtx_AND (vmode, scratch, mask);
11656 else /* alternative 2,4 */
11658 gcc_assert (REGNO (mask) == REGNO (scratch));
11659 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
11660 x = gen_rtx_AND (vmode, scratch, op1);
11662 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
11664 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
11666 dest = simplify_gen_subreg (vmode, op0, mode, 0);
11667 x = gen_rtx_AND (vmode, dest, nmask);
11669 else /* alternative 3,4 */
11671 gcc_assert (REGNO (nmask) == REGNO (dest));
11673 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
11674 x = gen_rtx_AND (vmode, dest, op0);
11676 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11679 x = gen_rtx_IOR (vmode, dest, scratch);
11680 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11683 /* Return TRUE or FALSE depending on whether the first SET in INSN
11684 has source and destination with matching CC modes, and that the
11685 CC mode is at least as constrained as REQ_MODE. */
11688 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
11691 enum machine_mode set_mode;
11693 set = PATTERN (insn);
11694 if (GET_CODE (set) == PARALLEL)
11695 set = XVECEXP (set, 0, 0);
11696 gcc_assert (GET_CODE (set) == SET);
11697 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
11699 set_mode = GET_MODE (SET_DEST (set));
11703 if (req_mode != CCNOmode
11704 && (req_mode != CCmode
11705 || XEXP (SET_SRC (set), 1) != const0_rtx))
11709 if (req_mode == CCGCmode)
11713 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11717 if (req_mode == CCZmode)
11724 gcc_unreachable ();
11727 return (GET_MODE (SET_SRC (set)) == set_mode);
11730 /* Generate insn patterns to do an integer compare of OPERANDS. */
11733 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11735 enum machine_mode cmpmode;
11738 cmpmode = SELECT_CC_MODE (code, op0, op1);
11739 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11741 /* This is very simple, but making the interface the same as in the
11742 FP case makes the rest of the code easier. */
11743 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11744 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11746 /* Return the test that should be put into the flags user, i.e.
11747 the bcc, scc, or cmov instruction. */
11748 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11751 /* Figure out whether to use ordered or unordered fp comparisons.
11752 Return the appropriate mode to use. */
11755 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11757 /* ??? In order to make all comparisons reversible, we do all comparisons
11758 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11759 all forms trapping and nontrapping comparisons, we can make inequality
11760 comparisons trapping again, since it results in better code when using
11761 FCOM based compares. */
11762 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11766 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11768 enum machine_mode mode = GET_MODE (op0);
11770 if (SCALAR_FLOAT_MODE_P (mode))
11772 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11773 return ix86_fp_compare_mode (code);
11778 /* Only zero flag is needed. */
11779 case EQ: /* ZF=0 */
11780 case NE: /* ZF!=0 */
11782 /* Codes needing carry flag. */
11783 case GEU: /* CF=0 */
11784 case LTU: /* CF=1 */
11785 /* Detect overflow checks. They need just the carry flag. */
11786 if (GET_CODE (op0) == PLUS
11787 && rtx_equal_p (op1, XEXP (op0, 0)))
11791 case GTU: /* CF=0 & ZF=0 */
11792 case LEU: /* CF=1 | ZF=1 */
11793 /* Detect overflow checks. They need just the carry flag. */
11794 if (GET_CODE (op0) == MINUS
11795 && rtx_equal_p (op1, XEXP (op0, 0)))
11799 /* Codes possibly doable only with sign flag when
11800 comparing against zero. */
11801 case GE: /* SF=OF or SF=0 */
11802 case LT: /* SF<>OF or SF=1 */
11803 if (op1 == const0_rtx)
11806 /* For other cases Carry flag is not required. */
11808 /* Codes doable only with sign flag when comparing
11809 against zero, but we miss jump instruction for it
11810 so we need to use relational tests against overflow
11811 that thus needs to be zero. */
11812 case GT: /* ZF=0 & SF=OF */
11813 case LE: /* ZF=1 | SF<>OF */
11814 if (op1 == const0_rtx)
11818 /* strcmp pattern do (use flags) and combine may ask us for proper
11823 gcc_unreachable ();
11827 /* Return the fixed registers used for condition codes. */
11830 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11837 /* If two condition code modes are compatible, return a condition code
11838 mode which is compatible with both. Otherwise, return
11841 static enum machine_mode
11842 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11847 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11850 if ((m1 == CCGCmode && m2 == CCGOCmode)
11851 || (m1 == CCGOCmode && m2 == CCGCmode))
11857 gcc_unreachable ();
11887 /* These are only compatible with themselves, which we already
11893 /* Split comparison code CODE into comparisons we can do using branch
11894 instructions. BYPASS_CODE is comparison code for branch that will
11895 branch around FIRST_CODE and SECOND_CODE. If some of branches
11896 is not required, set value to UNKNOWN.
11897 We never require more than two branches. */
11900 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11901 enum rtx_code *first_code,
11902 enum rtx_code *second_code)
11904 *first_code = code;
11905 *bypass_code = UNKNOWN;
11906 *second_code = UNKNOWN;
11908 /* The fcomi comparison sets flags as follows:
11918 case GT: /* GTU - CF=0 & ZF=0 */
11919 case GE: /* GEU - CF=0 */
11920 case ORDERED: /* PF=0 */
11921 case UNORDERED: /* PF=1 */
11922 case UNEQ: /* EQ - ZF=1 */
11923 case UNLT: /* LTU - CF=1 */
11924 case UNLE: /* LEU - CF=1 | ZF=1 */
11925 case LTGT: /* EQ - ZF=0 */
11927 case LT: /* LTU - CF=1 - fails on unordered */
11928 *first_code = UNLT;
11929 *bypass_code = UNORDERED;
11931 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11932 *first_code = UNLE;
11933 *bypass_code = UNORDERED;
11935 case EQ: /* EQ - ZF=1 - fails on unordered */
11936 *first_code = UNEQ;
11937 *bypass_code = UNORDERED;
11939 case NE: /* NE - ZF=0 - fails on unordered */
11940 *first_code = LTGT;
11941 *second_code = UNORDERED;
11943 case UNGE: /* GEU - CF=0 - fails on unordered */
11945 *second_code = UNORDERED;
11947 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11949 *second_code = UNORDERED;
11952 gcc_unreachable ();
11954 if (!TARGET_IEEE_FP)
11956 *second_code = UNKNOWN;
11957 *bypass_code = UNKNOWN;
11961 /* Return cost of comparison done fcom + arithmetics operations on AX.
11962 All following functions do use number of instructions as a cost metrics.
11963 In future this should be tweaked to compute bytes for optimize_size and
11964 take into account performance of various instructions on various CPUs. */
11966 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11968 if (!TARGET_IEEE_FP)
11970 /* The cost of code output by ix86_expand_fp_compare. */
11994 gcc_unreachable ();
11998 /* Return cost of comparison done using fcomi operation.
11999 See ix86_fp_comparison_arithmetics_cost for the metrics. */
12001 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
12003 enum rtx_code bypass_code, first_code, second_code;
12004 /* Return arbitrarily high cost when instruction is not supported - this
12005 prevents gcc from using it. */
12008 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12009 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
12012 /* Return cost of comparison done using sahf operation.
12013 See ix86_fp_comparison_arithmetics_cost for the metrics. */
12015 ix86_fp_comparison_sahf_cost (enum rtx_code code)
12017 enum rtx_code bypass_code, first_code, second_code;
12018 /* Return arbitrarily high cost when instruction is not preferred - this
12019 avoids gcc from using it. */
12020 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
12022 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12023 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
12026 /* Compute cost of the comparison done using any method.
12027 See ix86_fp_comparison_arithmetics_cost for the metrics. */
12029 ix86_fp_comparison_cost (enum rtx_code code)
12031 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
12034 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
12035 sahf_cost = ix86_fp_comparison_sahf_cost (code);
12037 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
12038 if (min > sahf_cost)
12040 if (min > fcomi_cost)
12045 /* Return true if we should use an FCOMI instruction for this
12049 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
12051 enum rtx_code swapped_code = swap_condition (code);
12053 return ((ix86_fp_comparison_cost (code)
12054 == ix86_fp_comparison_fcomi_cost (code))
12055 || (ix86_fp_comparison_cost (swapped_code)
12056 == ix86_fp_comparison_fcomi_cost (swapped_code)));
12059 /* Swap, force into registers, or otherwise massage the two operands
12060 to a fp comparison. The operands are updated in place; the new
12061 comparison code is returned. */
12063 static enum rtx_code
12064 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
12066 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
12067 rtx op0 = *pop0, op1 = *pop1;
12068 enum machine_mode op_mode = GET_MODE (op0);
12069 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
12071 /* All of the unordered compare instructions only work on registers.
12072 The same is true of the fcomi compare instructions. The XFmode
12073 compare instructions require registers except when comparing
12074 against zero or when converting operand 1 from fixed point to
12078 && (fpcmp_mode == CCFPUmode
12079 || (op_mode == XFmode
12080 && ! (standard_80387_constant_p (op0) == 1
12081 || standard_80387_constant_p (op1) == 1)
12082 && GET_CODE (op1) != FLOAT)
12083 || ix86_use_fcomi_compare (code)))
12085 op0 = force_reg (op_mode, op0);
12086 op1 = force_reg (op_mode, op1);
12090 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
12091 things around if they appear profitable, otherwise force op0
12092 into a register. */
12094 if (standard_80387_constant_p (op0) == 0
12096 && ! (standard_80387_constant_p (op1) == 0
12100 tmp = op0, op0 = op1, op1 = tmp;
12101 code = swap_condition (code);
12105 op0 = force_reg (op_mode, op0);
12107 if (CONSTANT_P (op1))
12109 int tmp = standard_80387_constant_p (op1);
12111 op1 = validize_mem (force_const_mem (op_mode, op1));
12115 op1 = force_reg (op_mode, op1);
12118 op1 = force_reg (op_mode, op1);
12122 /* Try to rearrange the comparison to make it cheaper. */
12123 if (ix86_fp_comparison_cost (code)
12124 > ix86_fp_comparison_cost (swap_condition (code))
12125 && (REG_P (op1) || can_create_pseudo_p ()))
12128 tmp = op0, op0 = op1, op1 = tmp;
12129 code = swap_condition (code);
12131 op0 = force_reg (op_mode, op0);
12139 /* Convert comparison codes we use to represent FP comparison to integer
12140 code that will result in proper branch. Return UNKNOWN if no such code
12144 ix86_fp_compare_code_to_integer (enum rtx_code code)
12173 /* Generate insn patterns to do a floating point compare of OPERANDS. */
12176 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
12177 rtx *second_test, rtx *bypass_test)
12179 enum machine_mode fpcmp_mode, intcmp_mode;
12181 int cost = ix86_fp_comparison_cost (code);
12182 enum rtx_code bypass_code, first_code, second_code;
12184 fpcmp_mode = ix86_fp_compare_mode (code);
12185 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
12188 *second_test = NULL_RTX;
12190 *bypass_test = NULL_RTX;
12192 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12194 /* Do fcomi/sahf based test when profitable. */
12195 if (ix86_fp_comparison_arithmetics_cost (code) > cost
12196 && (bypass_code == UNKNOWN || bypass_test)
12197 && (second_code == UNKNOWN || second_test))
12199 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
12200 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
12206 gcc_assert (TARGET_SAHF);
12209 scratch = gen_reg_rtx (HImode);
12210 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
12212 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
12215 /* The FP codes work out to act like unsigned. */
12216 intcmp_mode = fpcmp_mode;
12218 if (bypass_code != UNKNOWN)
12219 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
12220 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12222 if (second_code != UNKNOWN)
12223 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
12224 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12229 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
12230 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
12231 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
12233 scratch = gen_reg_rtx (HImode);
12234 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
12236 /* In the unordered case, we have to check C2 for NaN's, which
12237 doesn't happen to work out to anything nice combination-wise.
12238 So do some bit twiddling on the value we've got in AH to come
12239 up with an appropriate set of condition codes. */
12241 intcmp_mode = CCNOmode;
12246 if (code == GT || !TARGET_IEEE_FP)
12248 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12253 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12254 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12255 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
12256 intcmp_mode = CCmode;
12262 if (code == LT && TARGET_IEEE_FP)
12264 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12265 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
12266 intcmp_mode = CCmode;
12271 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
12277 if (code == GE || !TARGET_IEEE_FP)
12279 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
12284 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12285 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12292 if (code == LE && TARGET_IEEE_FP)
12294 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12295 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
12296 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12297 intcmp_mode = CCmode;
12302 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
12308 if (code == EQ && TARGET_IEEE_FP)
12310 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12311 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
12312 intcmp_mode = CCmode;
12317 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12324 if (code == NE && TARGET_IEEE_FP)
12326 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
12327 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
12333 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
12339 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12343 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
12348 gcc_unreachable ();
12352 /* Return the test that should be put into the flags user, i.e.
12353 the bcc, scc, or cmov instruction. */
12354 return gen_rtx_fmt_ee (code, VOIDmode,
12355 gen_rtx_REG (intcmp_mode, FLAGS_REG),
12360 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
12363 op0 = ix86_compare_op0;
12364 op1 = ix86_compare_op1;
12367 *second_test = NULL_RTX;
12369 *bypass_test = NULL_RTX;
12371 if (ix86_compare_emitted)
12373 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
12374 ix86_compare_emitted = NULL_RTX;
12376 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
12378 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
12379 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12380 second_test, bypass_test);
12383 ret = ix86_expand_int_compare (code, op0, op1);
12388 /* Return true if the CODE will result in nontrivial jump sequence. */
12390 ix86_fp_jump_nontrivial_p (enum rtx_code code)
12392 enum rtx_code bypass_code, first_code, second_code;
12395 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12396 return bypass_code != UNKNOWN || second_code != UNKNOWN;
12400 ix86_expand_branch (enum rtx_code code, rtx label)
12404 /* If we have emitted a compare insn, go straight to simple.
12405 ix86_expand_compare won't emit anything if ix86_compare_emitted
12407 if (ix86_compare_emitted)
12410 switch (GET_MODE (ix86_compare_op0))
12416 tmp = ix86_expand_compare (code, NULL, NULL);
12417 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12418 gen_rtx_LABEL_REF (VOIDmode, label),
12420 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
12429 enum rtx_code bypass_code, first_code, second_code;
12431 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
12432 &ix86_compare_op1);
12434 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
12436 /* Check whether we will use the natural sequence with one jump. If
12437 so, we can expand jump early. Otherwise delay expansion by
12438 creating compound insn to not confuse optimizers. */
12439 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
12441 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
12442 gen_rtx_LABEL_REF (VOIDmode, label),
12443 pc_rtx, NULL_RTX, NULL_RTX);
12447 tmp = gen_rtx_fmt_ee (code, VOIDmode,
12448 ix86_compare_op0, ix86_compare_op1);
12449 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
12450 gen_rtx_LABEL_REF (VOIDmode, label),
12452 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
12454 use_fcomi = ix86_use_fcomi_compare (code);
12455 vec = rtvec_alloc (3 + !use_fcomi);
12456 RTVEC_ELT (vec, 0) = tmp;
12458 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
12460 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
12463 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
12465 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
12474 /* Expand DImode branch into multiple compare+branch. */
12476 rtx lo[2], hi[2], label2;
12477 enum rtx_code code1, code2, code3;
12478 enum machine_mode submode;
12480 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
12482 tmp = ix86_compare_op0;
12483 ix86_compare_op0 = ix86_compare_op1;
12484 ix86_compare_op1 = tmp;
12485 code = swap_condition (code);
12487 if (GET_MODE (ix86_compare_op0) == DImode)
12489 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
12490 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
12495 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
12496 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
12500 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
12501 avoid two branches. This costs one extra insn, so disable when
12502 optimizing for size. */
12504 if ((code == EQ || code == NE)
12506 || hi[1] == const0_rtx || lo[1] == const0_rtx))
12511 if (hi[1] != const0_rtx)
12512 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
12513 NULL_RTX, 0, OPTAB_WIDEN);
12516 if (lo[1] != const0_rtx)
12517 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
12518 NULL_RTX, 0, OPTAB_WIDEN);
12520 tmp = expand_binop (submode, ior_optab, xor1, xor0,
12521 NULL_RTX, 0, OPTAB_WIDEN);
12523 ix86_compare_op0 = tmp;
12524 ix86_compare_op1 = const0_rtx;
12525 ix86_expand_branch (code, label);
12529 /* Otherwise, if we are doing less-than or greater-or-equal-than,
12530 op1 is a constant and the low word is zero, then we can just
12531 examine the high word. Similarly for low word -1 and
12532 less-or-equal-than or greater-than. */
12534 if (CONST_INT_P (hi[1]))
12537 case LT: case LTU: case GE: case GEU:
12538 if (lo[1] == const0_rtx)
12540 ix86_compare_op0 = hi[0];
12541 ix86_compare_op1 = hi[1];
12542 ix86_expand_branch (code, label);
12546 case LE: case LEU: case GT: case GTU:
12547 if (lo[1] == constm1_rtx)
12549 ix86_compare_op0 = hi[0];
12550 ix86_compare_op1 = hi[1];
12551 ix86_expand_branch (code, label);
12559 /* Otherwise, we need two or three jumps. */
12561 label2 = gen_label_rtx ();
12564 code2 = swap_condition (code);
12565 code3 = unsigned_condition (code);
12569 case LT: case GT: case LTU: case GTU:
12572 case LE: code1 = LT; code2 = GT; break;
12573 case GE: code1 = GT; code2 = LT; break;
12574 case LEU: code1 = LTU; code2 = GTU; break;
12575 case GEU: code1 = GTU; code2 = LTU; break;
12577 case EQ: code1 = UNKNOWN; code2 = NE; break;
12578 case NE: code2 = UNKNOWN; break;
12581 gcc_unreachable ();
12586 * if (hi(a) < hi(b)) goto true;
12587 * if (hi(a) > hi(b)) goto false;
12588 * if (lo(a) < lo(b)) goto true;
12592 ix86_compare_op0 = hi[0];
12593 ix86_compare_op1 = hi[1];
12595 if (code1 != UNKNOWN)
12596 ix86_expand_branch (code1, label);
12597 if (code2 != UNKNOWN)
12598 ix86_expand_branch (code2, label2);
12600 ix86_compare_op0 = lo[0];
12601 ix86_compare_op1 = lo[1];
12602 ix86_expand_branch (code3, label);
12604 if (code2 != UNKNOWN)
12605 emit_label (label2);
12610 gcc_unreachable ();
12614 /* Split branch based on floating point condition. */
12616 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
12617 rtx target1, rtx target2, rtx tmp, rtx pushed)
12619 rtx second, bypass;
12620 rtx label = NULL_RTX;
12622 int bypass_probability = -1, second_probability = -1, probability = -1;
12625 if (target2 != pc_rtx)
12628 code = reverse_condition_maybe_unordered (code);
12633 condition = ix86_expand_fp_compare (code, op1, op2,
12634 tmp, &second, &bypass);
12636 /* Remove pushed operand from stack. */
12638 ix86_free_from_memory (GET_MODE (pushed));
12640 if (split_branch_probability >= 0)
12642 /* Distribute the probabilities across the jumps.
12643 Assume the BYPASS and SECOND to be always test
12645 probability = split_branch_probability;
12647 /* Value of 1 is low enough to make no need for probability
12648 to be updated. Later we may run some experiments and see
12649 if unordered values are more frequent in practice. */
12651 bypass_probability = 1;
12653 second_probability = 1;
12655 if (bypass != NULL_RTX)
12657 label = gen_label_rtx ();
12658 i = emit_jump_insn (gen_rtx_SET
12660 gen_rtx_IF_THEN_ELSE (VOIDmode,
12662 gen_rtx_LABEL_REF (VOIDmode,
12665 if (bypass_probability >= 0)
12667 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12668 GEN_INT (bypass_probability),
12671 i = emit_jump_insn (gen_rtx_SET
12673 gen_rtx_IF_THEN_ELSE (VOIDmode,
12674 condition, target1, target2)));
12675 if (probability >= 0)
12677 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12678 GEN_INT (probability),
12680 if (second != NULL_RTX)
12682 i = emit_jump_insn (gen_rtx_SET
12684 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
12686 if (second_probability >= 0)
12688 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12689 GEN_INT (second_probability),
12692 if (label != NULL_RTX)
12693 emit_label (label);
12697 ix86_expand_setcc (enum rtx_code code, rtx dest)
12699 rtx ret, tmp, tmpreg, equiv;
12700 rtx second_test, bypass_test;
12702 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
12703 return 0; /* FAIL */
12705 gcc_assert (GET_MODE (dest) == QImode);
12707 ret = ix86_expand_compare (code, &second_test, &bypass_test);
12708 PUT_MODE (ret, QImode);
12713 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
12714 if (bypass_test || second_test)
12716 rtx test = second_test;
12718 rtx tmp2 = gen_reg_rtx (QImode);
12721 gcc_assert (!second_test);
12722 test = bypass_test;
12724 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12726 PUT_MODE (test, QImode);
12727 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12730 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12732 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12735 /* Attach a REG_EQUAL note describing the comparison result. */
12736 if (ix86_compare_op0 && ix86_compare_op1)
12738 equiv = simplify_gen_relational (code, QImode,
12739 GET_MODE (ix86_compare_op0),
12740 ix86_compare_op0, ix86_compare_op1);
12741 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12744 return 1; /* DONE */
12747 /* Expand comparison setting or clearing carry flag. Return true when
12748 successful and set pop for the operation. */
12750 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12752 enum machine_mode mode =
12753 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12755 /* Do not handle DImode compares that go through special path. */
12756 if (mode == (TARGET_64BIT ? TImode : DImode))
12759 if (SCALAR_FLOAT_MODE_P (mode))
12761 rtx second_test = NULL, bypass_test = NULL;
12762 rtx compare_op, compare_seq;
12764 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12766 /* Shortcut: following common codes never translate
12767 into carry flag compares. */
12768 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12769 || code == ORDERED || code == UNORDERED)
12772 /* These comparisons require zero flag; swap operands so they won't. */
12773 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12774 && !TARGET_IEEE_FP)
12779 code = swap_condition (code);
12782 /* Try to expand the comparison and verify that we end up with
12783 carry flag based comparison. This fails to be true only when
12784 we decide to expand comparison using arithmetic that is not
12785 too common scenario. */
12787 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12788 &second_test, &bypass_test);
12789 compare_seq = get_insns ();
12792 if (second_test || bypass_test)
12795 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12796 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12797 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12799 code = GET_CODE (compare_op);
12801 if (code != LTU && code != GEU)
12804 emit_insn (compare_seq);
12809 if (!INTEGRAL_MODE_P (mode))
12818 /* Convert a==0 into (unsigned)a<1. */
12821 if (op1 != const0_rtx)
12824 code = (code == EQ ? LTU : GEU);
12827 /* Convert a>b into b<a or a>=b-1. */
12830 if (CONST_INT_P (op1))
12832 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12833 /* Bail out on overflow. We still can swap operands but that
12834 would force loading of the constant into register. */
12835 if (op1 == const0_rtx
12836 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12838 code = (code == GTU ? GEU : LTU);
12845 code = (code == GTU ? LTU : GEU);
12849 /* Convert a>=0 into (unsigned)a<0x80000000. */
12852 if (mode == DImode || op1 != const0_rtx)
12854 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12855 code = (code == LT ? GEU : LTU);
12859 if (mode == DImode || op1 != constm1_rtx)
12861 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12862 code = (code == LE ? GEU : LTU);
12868 /* Swapping operands may cause constant to appear as first operand. */
12869 if (!nonimmediate_operand (op0, VOIDmode))
12871 if (!can_create_pseudo_p ())
12873 op0 = force_reg (mode, op0);
12875 ix86_compare_op0 = op0;
12876 ix86_compare_op1 = op1;
12877 *pop = ix86_expand_compare (code, NULL, NULL);
12878 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12883 ix86_expand_int_movcc (rtx operands[])
12885 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12886 rtx compare_seq, compare_op;
12887 rtx second_test, bypass_test;
12888 enum machine_mode mode = GET_MODE (operands[0]);
12889 bool sign_bit_compare_p = false;;
12892 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12893 compare_seq = get_insns ();
12896 compare_code = GET_CODE (compare_op);
12898 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12899 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12900 sign_bit_compare_p = true;
12902 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12903 HImode insns, we'd be swallowed in word prefix ops. */
12905 if ((mode != HImode || TARGET_FAST_PREFIX)
12906 && (mode != (TARGET_64BIT ? TImode : DImode))
12907 && CONST_INT_P (operands[2])
12908 && CONST_INT_P (operands[3]))
12910 rtx out = operands[0];
12911 HOST_WIDE_INT ct = INTVAL (operands[2]);
12912 HOST_WIDE_INT cf = INTVAL (operands[3]);
12913 HOST_WIDE_INT diff;
12916 /* Sign bit compares are better done using shifts than we do by using
12918 if (sign_bit_compare_p
12919 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12920 ix86_compare_op1, &compare_op))
12922 /* Detect overlap between destination and compare sources. */
12925 if (!sign_bit_compare_p)
12927 bool fpcmp = false;
12929 compare_code = GET_CODE (compare_op);
12931 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12932 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12935 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12938 /* To simplify rest of code, restrict to the GEU case. */
12939 if (compare_code == LTU)
12941 HOST_WIDE_INT tmp = ct;
12944 compare_code = reverse_condition (compare_code);
12945 code = reverse_condition (code);
12950 PUT_CODE (compare_op,
12951 reverse_condition_maybe_unordered
12952 (GET_CODE (compare_op)));
12954 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12958 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12959 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12960 tmp = gen_reg_rtx (mode);
12962 if (mode == DImode)
12963 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12965 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12969 if (code == GT || code == GE)
12970 code = reverse_condition (code);
12973 HOST_WIDE_INT tmp = ct;
12978 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12979 ix86_compare_op1, VOIDmode, 0, -1);
12992 tmp = expand_simple_binop (mode, PLUS,
12994 copy_rtx (tmp), 1, OPTAB_DIRECT);
13005 tmp = expand_simple_binop (mode, IOR,
13007 copy_rtx (tmp), 1, OPTAB_DIRECT);
13009 else if (diff == -1 && ct)
13019 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
13021 tmp = expand_simple_binop (mode, PLUS,
13022 copy_rtx (tmp), GEN_INT (cf),
13023 copy_rtx (tmp), 1, OPTAB_DIRECT);
13031 * andl cf - ct, dest
13041 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
13044 tmp = expand_simple_binop (mode, AND,
13046 gen_int_mode (cf - ct, mode),
13047 copy_rtx (tmp), 1, OPTAB_DIRECT);
13049 tmp = expand_simple_binop (mode, PLUS,
13050 copy_rtx (tmp), GEN_INT (ct),
13051 copy_rtx (tmp), 1, OPTAB_DIRECT);
13054 if (!rtx_equal_p (tmp, out))
13055 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
13057 return 1; /* DONE */
13062 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
13065 tmp = ct, ct = cf, cf = tmp;
13068 if (SCALAR_FLOAT_MODE_P (cmp_mode))
13070 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
13072 /* We may be reversing unordered compare to normal compare, that
13073 is not valid in general (we may convert non-trapping condition
13074 to trapping one), however on i386 we currently emit all
13075 comparisons unordered. */
13076 compare_code = reverse_condition_maybe_unordered (compare_code);
13077 code = reverse_condition_maybe_unordered (code);
13081 compare_code = reverse_condition (compare_code);
13082 code = reverse_condition (code);
13086 compare_code = UNKNOWN;
13087 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
13088 && CONST_INT_P (ix86_compare_op1))
13090 if (ix86_compare_op1 == const0_rtx
13091 && (code == LT || code == GE))
13092 compare_code = code;
13093 else if (ix86_compare_op1 == constm1_rtx)
13097 else if (code == GT)
13102 /* Optimize dest = (op0 < 0) ? -1 : cf. */
13103 if (compare_code != UNKNOWN
13104 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
13105 && (cf == -1 || ct == -1))
13107 /* If lea code below could be used, only optimize
13108 if it results in a 2 insn sequence. */
13110 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
13111 || diff == 3 || diff == 5 || diff == 9)
13112 || (compare_code == LT && ct == -1)
13113 || (compare_code == GE && cf == -1))
13116 * notl op1 (if necessary)
13124 code = reverse_condition (code);
13127 out = emit_store_flag (out, code, ix86_compare_op0,
13128 ix86_compare_op1, VOIDmode, 0, -1);
13130 out = expand_simple_binop (mode, IOR,
13132 out, 1, OPTAB_DIRECT);
13133 if (out != operands[0])
13134 emit_move_insn (operands[0], out);
13136 return 1; /* DONE */
13141 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
13142 || diff == 3 || diff == 5 || diff == 9)
13143 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
13145 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
13151 * lea cf(dest*(ct-cf)),dest
13155 * This also catches the degenerate setcc-only case.
13161 out = emit_store_flag (out, code, ix86_compare_op0,
13162 ix86_compare_op1, VOIDmode, 0, 1);
13165 /* On x86_64 the lea instruction operates on Pmode, so we need
13166 to get arithmetics done in proper mode to match. */
13168 tmp = copy_rtx (out);
13172 out1 = copy_rtx (out);
13173 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
13177 tmp = gen_rtx_PLUS (mode, tmp, out1);
13183 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
13186 if (!rtx_equal_p (tmp, out))
13189 out = force_operand (tmp, copy_rtx (out));
13191 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
13193 if (!rtx_equal_p (out, operands[0]))
13194 emit_move_insn (operands[0], copy_rtx (out));
13196 return 1; /* DONE */
13200 * General case: Jumpful:
13201 * xorl dest,dest cmpl op1, op2
13202 * cmpl op1, op2 movl ct, dest
13203 * setcc dest jcc 1f
13204 * decl dest movl cf, dest
13205 * andl (cf-ct),dest 1:
13208 * Size 20. Size 14.
13210 * This is reasonably steep, but branch mispredict costs are
13211 * high on modern cpus, so consider failing only if optimizing
13215 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13216 && BRANCH_COST >= 2)
13220 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
13225 if (SCALAR_FLOAT_MODE_P (cmp_mode))
13227 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
13229 /* We may be reversing unordered compare to normal compare,
13230 that is not valid in general (we may convert non-trapping
13231 condition to trapping one), however on i386 we currently
13232 emit all comparisons unordered. */
13233 code = reverse_condition_maybe_unordered (code);
13237 code = reverse_condition (code);
13238 if (compare_code != UNKNOWN)
13239 compare_code = reverse_condition (compare_code);
13243 if (compare_code != UNKNOWN)
13245 /* notl op1 (if needed)
13250 For x < 0 (resp. x <= -1) there will be no notl,
13251 so if possible swap the constants to get rid of the
13253 True/false will be -1/0 while code below (store flag
13254 followed by decrement) is 0/-1, so the constants need
13255 to be exchanged once more. */
13257 if (compare_code == GE || !cf)
13259 code = reverse_condition (code);
13264 HOST_WIDE_INT tmp = cf;
13269 out = emit_store_flag (out, code, ix86_compare_op0,
13270 ix86_compare_op1, VOIDmode, 0, -1);
13274 out = emit_store_flag (out, code, ix86_compare_op0,
13275 ix86_compare_op1, VOIDmode, 0, 1);
13277 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
13278 copy_rtx (out), 1, OPTAB_DIRECT);
13281 out = expand_simple_binop (mode, AND, copy_rtx (out),
13282 gen_int_mode (cf - ct, mode),
13283 copy_rtx (out), 1, OPTAB_DIRECT);
13285 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
13286 copy_rtx (out), 1, OPTAB_DIRECT);
13287 if (!rtx_equal_p (out, operands[0]))
13288 emit_move_insn (operands[0], copy_rtx (out));
13290 return 1; /* DONE */
13294 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
13296 /* Try a few things more with specific constants and a variable. */
13299 rtx var, orig_out, out, tmp;
13301 if (BRANCH_COST <= 2)
13302 return 0; /* FAIL */
13304 /* If one of the two operands is an interesting constant, load a
13305 constant with the above and mask it in with a logical operation. */
13307 if (CONST_INT_P (operands[2]))
13310 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
13311 operands[3] = constm1_rtx, op = and_optab;
13312 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
13313 operands[3] = const0_rtx, op = ior_optab;
13315 return 0; /* FAIL */
13317 else if (CONST_INT_P (operands[3]))
13320 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
13321 operands[2] = constm1_rtx, op = and_optab;
13322 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
13323 operands[2] = const0_rtx, op = ior_optab;
13325 return 0; /* FAIL */
13328 return 0; /* FAIL */
13330 orig_out = operands[0];
13331 tmp = gen_reg_rtx (mode);
13334 /* Recurse to get the constant loaded. */
13335 if (ix86_expand_int_movcc (operands) == 0)
13336 return 0; /* FAIL */
13338 /* Mask in the interesting variable. */
13339 out = expand_binop (mode, op, var, tmp, orig_out, 0,
13341 if (!rtx_equal_p (out, orig_out))
13342 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
13344 return 1; /* DONE */
13348 * For comparison with above,
13358 if (! nonimmediate_operand (operands[2], mode))
13359 operands[2] = force_reg (mode, operands[2]);
13360 if (! nonimmediate_operand (operands[3], mode))
13361 operands[3] = force_reg (mode, operands[3]);
13363 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13365 rtx tmp = gen_reg_rtx (mode);
13366 emit_move_insn (tmp, operands[3]);
13369 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13371 rtx tmp = gen_reg_rtx (mode);
13372 emit_move_insn (tmp, operands[2]);
13376 if (! register_operand (operands[2], VOIDmode)
13378 || ! register_operand (operands[3], VOIDmode)))
13379 operands[2] = force_reg (mode, operands[2]);
13382 && ! register_operand (operands[3], VOIDmode))
13383 operands[3] = force_reg (mode, operands[3]);
13385 emit_insn (compare_seq);
13386 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13387 gen_rtx_IF_THEN_ELSE (mode,
13388 compare_op, operands[2],
13391 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13392 gen_rtx_IF_THEN_ELSE (mode,
13394 copy_rtx (operands[3]),
13395 copy_rtx (operands[0]))));
13397 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
13398 gen_rtx_IF_THEN_ELSE (mode,
13400 copy_rtx (operands[2]),
13401 copy_rtx (operands[0]))));
13403 return 1; /* DONE */
13406 /* Swap, force into registers, or otherwise massage the two operands
13407 to an sse comparison with a mask result. Thus we differ a bit from
13408 ix86_prepare_fp_compare_args which expects to produce a flags result.
13410 The DEST operand exists to help determine whether to commute commutative
13411 operators. The POP0/POP1 operands are updated in place. The new
13412 comparison code is returned, or UNKNOWN if not implementable. */
13414 static enum rtx_code
13415 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
13416 rtx *pop0, rtx *pop1)
13424 /* We have no LTGT as an operator. We could implement it with
13425 NE & ORDERED, but this requires an extra temporary. It's
13426 not clear that it's worth it. */
13433 /* These are supported directly. */
13440 /* For commutative operators, try to canonicalize the destination
13441 operand to be first in the comparison - this helps reload to
13442 avoid extra moves. */
13443 if (!dest || !rtx_equal_p (dest, *pop1))
13451 /* These are not supported directly. Swap the comparison operands
13452 to transform into something that is supported. */
13456 code = swap_condition (code);
13460 gcc_unreachable ();
13466 /* Detect conditional moves that exactly match min/max operational
13467 semantics. Note that this is IEEE safe, as long as we don't
13468 interchange the operands.
13470 Returns FALSE if this conditional move doesn't match a MIN/MAX,
13471 and TRUE if the operation is successful and instructions are emitted. */
13474 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
13475 rtx cmp_op1, rtx if_true, rtx if_false)
13477 enum machine_mode mode;
13483 else if (code == UNGE)
13486 if_true = if_false;
13492 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
13494 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
13499 mode = GET_MODE (dest);
13501 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
13502 but MODE may be a vector mode and thus not appropriate. */
13503 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
13505 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
13508 if_true = force_reg (mode, if_true);
13509 v = gen_rtvec (2, if_true, if_false);
13510 tmp = gen_rtx_UNSPEC (mode, v, u);
13514 code = is_min ? SMIN : SMAX;
13515 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
13518 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
13522 /* Expand an sse vector comparison. Return the register with the result. */
13525 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
13526 rtx op_true, rtx op_false)
13528 enum machine_mode mode = GET_MODE (dest);
13531 cmp_op0 = force_reg (mode, cmp_op0);
13532 if (!nonimmediate_operand (cmp_op1, mode))
13533 cmp_op1 = force_reg (mode, cmp_op1);
13536 || reg_overlap_mentioned_p (dest, op_true)
13537 || reg_overlap_mentioned_p (dest, op_false))
13538 dest = gen_reg_rtx (mode);
13540 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
13541 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13546 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
13547 operations. This is used for both scalar and vector conditional moves. */
13550 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
13552 enum machine_mode mode = GET_MODE (dest);
13555 if (op_false == CONST0_RTX (mode))
13557 op_true = force_reg (mode, op_true);
13558 x = gen_rtx_AND (mode, cmp, op_true);
13559 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13561 else if (op_true == CONST0_RTX (mode))
13563 op_false = force_reg (mode, op_false);
13564 x = gen_rtx_NOT (mode, cmp);
13565 x = gen_rtx_AND (mode, x, op_false);
13566 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13568 else if (TARGET_SSE5)
13570 rtx pcmov = gen_rtx_SET (mode, dest,
13571 gen_rtx_IF_THEN_ELSE (mode, cmp,
13578 op_true = force_reg (mode, op_true);
13579 op_false = force_reg (mode, op_false);
13581 t2 = gen_reg_rtx (mode);
13583 t3 = gen_reg_rtx (mode);
13587 x = gen_rtx_AND (mode, op_true, cmp);
13588 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
13590 x = gen_rtx_NOT (mode, cmp);
13591 x = gen_rtx_AND (mode, x, op_false);
13592 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
13594 x = gen_rtx_IOR (mode, t3, t2);
13595 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13599 /* Expand a floating-point conditional move. Return true if successful. */
13602 ix86_expand_fp_movcc (rtx operands[])
13604 enum machine_mode mode = GET_MODE (operands[0]);
13605 enum rtx_code code = GET_CODE (operands[1]);
13606 rtx tmp, compare_op, second_test, bypass_test;
13608 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
13610 enum machine_mode cmode;
13612 /* Since we've no cmove for sse registers, don't force bad register
13613 allocation just to gain access to it. Deny movcc when the
13614 comparison mode doesn't match the move mode. */
13615 cmode = GET_MODE (ix86_compare_op0);
13616 if (cmode == VOIDmode)
13617 cmode = GET_MODE (ix86_compare_op1);
13621 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13623 &ix86_compare_op1);
13624 if (code == UNKNOWN)
13627 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
13628 ix86_compare_op1, operands[2],
13632 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
13633 ix86_compare_op1, operands[2], operands[3]);
13634 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
13638 /* The floating point conditional move instructions don't directly
13639 support conditions resulting from a signed integer comparison. */
13641 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13643 /* The floating point conditional move instructions don't directly
13644 support signed integer comparisons. */
13646 if (!fcmov_comparison_operator (compare_op, VOIDmode))
13648 gcc_assert (!second_test && !bypass_test);
13649 tmp = gen_reg_rtx (QImode);
13650 ix86_expand_setcc (code, tmp);
13652 ix86_compare_op0 = tmp;
13653 ix86_compare_op1 = const0_rtx;
13654 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
13656 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
13658 tmp = gen_reg_rtx (mode);
13659 emit_move_insn (tmp, operands[3]);
13662 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
13664 tmp = gen_reg_rtx (mode);
13665 emit_move_insn (tmp, operands[2]);
13669 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13670 gen_rtx_IF_THEN_ELSE (mode, compare_op,
13671 operands[2], operands[3])));
13673 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13674 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
13675 operands[3], operands[0])));
13677 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
13678 gen_rtx_IF_THEN_ELSE (mode, second_test,
13679 operands[2], operands[0])));
13684 /* Expand a floating-point vector conditional move; a vcond operation
13685 rather than a movcc operation. */
13688 ix86_expand_fp_vcond (rtx operands[])
13690 enum rtx_code code = GET_CODE (operands[3]);
13693 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
13694 &operands[4], &operands[5]);
13695 if (code == UNKNOWN)
13698 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
13699 operands[5], operands[1], operands[2]))
13702 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
13703 operands[1], operands[2]);
13704 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
13708 /* Expand a signed/unsigned integral vector conditional move. */
13711 ix86_expand_int_vcond (rtx operands[])
13713 enum machine_mode mode = GET_MODE (operands[0]);
13714 enum rtx_code code = GET_CODE (operands[3]);
13715 bool negate = false;
13718 cop0 = operands[4];
13719 cop1 = operands[5];
13721 /* SSE5 supports all of the comparisons on all vector int types. */
13724 /* Canonicalize the comparison to EQ, GT, GTU. */
13735 code = reverse_condition (code);
13741 code = reverse_condition (code);
13747 code = swap_condition (code);
13748 x = cop0, cop0 = cop1, cop1 = x;
13752 gcc_unreachable ();
13755 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13756 if (mode == V2DImode)
13761 /* SSE4.1 supports EQ. */
13762 if (!TARGET_SSE4_1)
13768 /* SSE4.2 supports GT/GTU. */
13769 if (!TARGET_SSE4_2)
13774 gcc_unreachable ();
13778 /* Unsigned parallel compare is not supported by the hardware. Play some
13779 tricks to turn this into a signed comparison against 0. */
13782 cop0 = force_reg (mode, cop0);
13791 /* Perform a parallel modulo subtraction. */
13792 t1 = gen_reg_rtx (mode);
13793 emit_insn ((mode == V4SImode
13795 : gen_subv2di3) (t1, cop0, cop1));
13797 /* Extract the original sign bit of op0. */
13798 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13800 t2 = gen_reg_rtx (mode);
13801 emit_insn ((mode == V4SImode
13803 : gen_andv2di3) (t2, cop0, mask));
13805 /* XOR it back into the result of the subtraction. This results
13806 in the sign bit set iff we saw unsigned underflow. */
13807 x = gen_reg_rtx (mode);
13808 emit_insn ((mode == V4SImode
13810 : gen_xorv2di3) (x, t1, t2));
13818 /* Perform a parallel unsigned saturating subtraction. */
13819 x = gen_reg_rtx (mode);
13820 emit_insn (gen_rtx_SET (VOIDmode, x,
13821 gen_rtx_US_MINUS (mode, cop0, cop1)));
13828 gcc_unreachable ();
13832 cop1 = CONST0_RTX (mode);
13836 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13837 operands[1+negate], operands[2-negate]);
13839 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13840 operands[2-negate]);
13844 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13845 true if we should do zero extension, else sign extension. HIGH_P is
13846 true if we want the N/2 high elements, else the low elements. */
13849 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13851 enum machine_mode imode = GET_MODE (operands[1]);
13852 rtx (*unpack)(rtx, rtx, rtx);
13859 unpack = gen_vec_interleave_highv16qi;
13861 unpack = gen_vec_interleave_lowv16qi;
13865 unpack = gen_vec_interleave_highv8hi;
13867 unpack = gen_vec_interleave_lowv8hi;
13871 unpack = gen_vec_interleave_highv4si;
13873 unpack = gen_vec_interleave_lowv4si;
13876 gcc_unreachable ();
13879 dest = gen_lowpart (imode, operands[0]);
13882 se = force_reg (imode, CONST0_RTX (imode));
13884 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13885 operands[1], pc_rtx, pc_rtx);
13887 emit_insn (unpack (dest, operands[1], se));
13890 /* This function performs the same task as ix86_expand_sse_unpack,
13891 but with SSE4.1 instructions. */
13894 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13896 enum machine_mode imode = GET_MODE (operands[1]);
13897 rtx (*unpack)(rtx, rtx);
13904 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13906 unpack = gen_sse4_1_extendv8qiv8hi2;
13910 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13912 unpack = gen_sse4_1_extendv4hiv4si2;
13916 unpack = gen_sse4_1_zero_extendv2siv2di2;
13918 unpack = gen_sse4_1_extendv2siv2di2;
13921 gcc_unreachable ();
13924 dest = operands[0];
13927 /* Shift higher 8 bytes to lower 8 bytes. */
13928 src = gen_reg_rtx (imode);
13929 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13930 gen_lowpart (TImode, operands[1]),
13936 emit_insn (unpack (dest, src));
13939 /* This function performs the same task as ix86_expand_sse_unpack,
13940 but with sse5 instructions. */
13943 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13945 enum machine_mode imode = GET_MODE (operands[1]);
13946 int pperm_bytes[16];
13948 int h = (high_p) ? 8 : 0;
13951 rtvec v = rtvec_alloc (16);
13954 rtx op0 = operands[0], op1 = operands[1];
13959 vs = rtvec_alloc (8);
13960 h2 = (high_p) ? 8 : 0;
13961 for (i = 0; i < 8; i++)
13963 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
13964 pperm_bytes[2*i+1] = ((unsigned_p)
13966 : PPERM_SIGN | PPERM_SRC2 | i | h);
13969 for (i = 0; i < 16; i++)
13970 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
13972 for (i = 0; i < 8; i++)
13973 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
13975 p = gen_rtx_PARALLEL (VOIDmode, vs);
13976 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
13978 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
13980 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
13984 vs = rtvec_alloc (4);
13985 h2 = (high_p) ? 4 : 0;
13986 for (i = 0; i < 4; i++)
13988 sign_extend = ((unsigned_p)
13990 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
13991 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
13992 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
13993 pperm_bytes[4*i+2] = sign_extend;
13994 pperm_bytes[4*i+3] = sign_extend;
13997 for (i = 0; i < 16; i++)
13998 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14000 for (i = 0; i < 4; i++)
14001 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
14003 p = gen_rtx_PARALLEL (VOIDmode, vs);
14004 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14006 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
14008 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
14012 vs = rtvec_alloc (2);
14013 h2 = (high_p) ? 2 : 0;
14014 for (i = 0; i < 2; i++)
14016 sign_extend = ((unsigned_p)
14018 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
14019 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
14020 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
14021 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
14022 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
14023 pperm_bytes[8*i+4] = sign_extend;
14024 pperm_bytes[8*i+5] = sign_extend;
14025 pperm_bytes[8*i+6] = sign_extend;
14026 pperm_bytes[8*i+7] = sign_extend;
14029 for (i = 0; i < 16; i++)
14030 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14032 for (i = 0; i < 2; i++)
14033 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
14035 p = gen_rtx_PARALLEL (VOIDmode, vs);
14036 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14038 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
14040 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
14044 gcc_unreachable ();
14050 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
14051 next narrower integer vector type */
14053 ix86_expand_sse5_pack (rtx operands[3])
14055 enum machine_mode imode = GET_MODE (operands[0]);
14056 int pperm_bytes[16];
14058 rtvec v = rtvec_alloc (16);
14060 rtx op0 = operands[0];
14061 rtx op1 = operands[1];
14062 rtx op2 = operands[2];
14067 for (i = 0; i < 8; i++)
14069 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
14070 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
14073 for (i = 0; i < 16; i++)
14074 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14076 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14077 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
14081 for (i = 0; i < 4; i++)
14083 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
14084 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
14085 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
14086 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
14089 for (i = 0; i < 16; i++)
14090 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14092 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14093 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
14097 for (i = 0; i < 2; i++)
14099 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
14100 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
14101 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
14102 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
14103 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
14104 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
14105 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
14106 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
14109 for (i = 0; i < 16; i++)
14110 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
14112 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
14113 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
14117 gcc_unreachable ();
14123 /* Expand conditional increment or decrement using adb/sbb instructions.
14124 The default case using setcc followed by the conditional move can be
14125 done by generic code. */
14127 ix86_expand_int_addcc (rtx operands[])
14129 enum rtx_code code = GET_CODE (operands[1]);
14131 rtx val = const0_rtx;
14132 bool fpcmp = false;
14133 enum machine_mode mode = GET_MODE (operands[0]);
14135 if (operands[3] != const1_rtx
14136 && operands[3] != constm1_rtx)
14138 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14139 ix86_compare_op1, &compare_op))
14141 code = GET_CODE (compare_op);
14143 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14144 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14147 code = ix86_fp_compare_code_to_integer (code);
14154 PUT_CODE (compare_op,
14155 reverse_condition_maybe_unordered
14156 (GET_CODE (compare_op)));
14158 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14160 PUT_MODE (compare_op, mode);
14162 /* Construct either adc or sbb insn. */
14163 if ((code == LTU) == (operands[3] == constm1_rtx))
14165 switch (GET_MODE (operands[0]))
14168 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
14171 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
14174 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
14177 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
14180 gcc_unreachable ();
14185 switch (GET_MODE (operands[0]))
14188 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
14191 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
14194 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
14197 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
14200 gcc_unreachable ();
14203 return 1; /* DONE */
14207 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
14208 works for floating pointer parameters and nonoffsetable memories.
14209 For pushes, it returns just stack offsets; the values will be saved
14210 in the right order. Maximally three parts are generated. */
14213 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
14218 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
14220 size = (GET_MODE_SIZE (mode) + 4) / 8;
14222 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
14223 gcc_assert (size >= 2 && size <= 4);
14225 /* Optimize constant pool reference to immediates. This is used by fp
14226 moves, that force all constants to memory to allow combining. */
14227 if (MEM_P (operand) && MEM_READONLY_P (operand))
14229 rtx tmp = maybe_get_pool_constant (operand);
14234 if (MEM_P (operand) && !offsettable_memref_p (operand))
14236 /* The only non-offsetable memories we handle are pushes. */
14237 int ok = push_operand (operand, VOIDmode);
14241 operand = copy_rtx (operand);
14242 PUT_MODE (operand, Pmode);
14243 parts[0] = parts[1] = parts[2] = parts[3] = operand;
14247 if (GET_CODE (operand) == CONST_VECTOR)
14249 enum machine_mode imode = int_mode_for_mode (mode);
14250 /* Caution: if we looked through a constant pool memory above,
14251 the operand may actually have a different mode now. That's
14252 ok, since we want to pun this all the way back to an integer. */
14253 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
14254 gcc_assert (operand != NULL);
14260 if (mode == DImode)
14261 split_di (&operand, 1, &parts[0], &parts[1]);
14266 if (REG_P (operand))
14268 gcc_assert (reload_completed);
14269 for (i = 0; i < size; i++)
14270 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
14272 else if (offsettable_memref_p (operand))
14274 operand = adjust_address (operand, SImode, 0);
14275 parts[0] = operand;
14276 for (i = 1; i < size; i++)
14277 parts[i] = adjust_address (operand, SImode, 4 * i);
14279 else if (GET_CODE (operand) == CONST_DOUBLE)
14284 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14288 real_to_target (l, &r, mode);
14289 parts[3] = gen_int_mode (l[3], SImode);
14290 parts[2] = gen_int_mode (l[2], SImode);
14293 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
14294 parts[2] = gen_int_mode (l[2], SImode);
14297 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
14300 gcc_unreachable ();
14302 parts[1] = gen_int_mode (l[1], SImode);
14303 parts[0] = gen_int_mode (l[0], SImode);
14306 gcc_unreachable ();
14311 if (mode == TImode)
14312 split_ti (&operand, 1, &parts[0], &parts[1]);
14313 if (mode == XFmode || mode == TFmode)
14315 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
14316 if (REG_P (operand))
14318 gcc_assert (reload_completed);
14319 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
14320 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
14322 else if (offsettable_memref_p (operand))
14324 operand = adjust_address (operand, DImode, 0);
14325 parts[0] = operand;
14326 parts[1] = adjust_address (operand, upper_mode, 8);
14328 else if (GET_CODE (operand) == CONST_DOUBLE)
14333 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
14334 real_to_target (l, &r, mode);
14336 /* Do not use shift by 32 to avoid warning on 32bit systems. */
14337 if (HOST_BITS_PER_WIDE_INT >= 64)
14340 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
14341 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
14344 parts[0] = immed_double_const (l[0], l[1], DImode);
14346 if (upper_mode == SImode)
14347 parts[1] = gen_int_mode (l[2], SImode);
14348 else if (HOST_BITS_PER_WIDE_INT >= 64)
14351 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
14352 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
14355 parts[1] = immed_double_const (l[2], l[3], DImode);
14358 gcc_unreachable ();
14365 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
14366 Return false when normal moves are needed; true when all required
14367 insns have been emitted. Operands 2-4 contain the input values
14368 int the correct order; operands 5-7 contain the output values. */
14371 ix86_split_long_move (rtx operands[])
14376 int collisions = 0;
14377 enum machine_mode mode = GET_MODE (operands[0]);
14378 bool collisionparts[4];
14380 /* The DFmode expanders may ask us to move double.
14381 For 64bit target this is single move. By hiding the fact
14382 here we simplify i386.md splitters. */
14383 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
14385 /* Optimize constant pool reference to immediates. This is used by
14386 fp moves, that force all constants to memory to allow combining. */
14388 if (MEM_P (operands[1])
14389 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
14390 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
14391 operands[1] = get_pool_constant (XEXP (operands[1], 0));
14392 if (push_operand (operands[0], VOIDmode))
14394 operands[0] = copy_rtx (operands[0]);
14395 PUT_MODE (operands[0], Pmode);
14398 operands[0] = gen_lowpart (DImode, operands[0]);
14399 operands[1] = gen_lowpart (DImode, operands[1]);
14400 emit_move_insn (operands[0], operands[1]);
14404 /* The only non-offsettable memory we handle is push. */
14405 if (push_operand (operands[0], VOIDmode))
14408 gcc_assert (!MEM_P (operands[0])
14409 || offsettable_memref_p (operands[0]));
14411 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
14412 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
14414 /* When emitting push, take care for source operands on the stack. */
14415 if (push && MEM_P (operands[1])
14416 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
14417 for (i = 0; i < nparts - 1; i++)
14418 part[1][i] = change_address (part[1][i],
14419 GET_MODE (part[1][i]),
14420 XEXP (part[1][i + 1], 0));
14422 /* We need to do copy in the right order in case an address register
14423 of the source overlaps the destination. */
14424 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
14428 for (i = 0; i < nparts; i++)
14431 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
14432 if (collisionparts[i])
14436 /* Collision in the middle part can be handled by reordering. */
14437 if (collisions == 1 && nparts == 3 && collisionparts [1])
14439 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14440 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14442 else if (collisions == 1
14444 && (collisionparts [1] || collisionparts [2]))
14446 if (collisionparts [1])
14448 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
14449 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
14453 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
14454 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
14458 /* If there are more collisions, we can't handle it by reordering.
14459 Do an lea to the last part and use only one colliding move. */
14460 else if (collisions > 1)
14466 base = part[0][nparts - 1];
14468 /* Handle the case when the last part isn't valid for lea.
14469 Happens in 64-bit mode storing the 12-byte XFmode. */
14470 if (GET_MODE (base) != Pmode)
14471 base = gen_rtx_REG (Pmode, REGNO (base));
14473 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
14474 part[1][0] = replace_equiv_address (part[1][0], base);
14475 for (i = 1; i < nparts; i++)
14477 tmp = plus_constant (base, UNITS_PER_WORD * i);
14478 part[1][i] = replace_equiv_address (part[1][i], tmp);
14489 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
14490 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
14491 emit_move_insn (part[0][2], part[1][2]);
14493 else if (nparts == 4)
14495 emit_move_insn (part[0][3], part[1][3]);
14496 emit_move_insn (part[0][2], part[1][2]);
14501 /* In 64bit mode we don't have 32bit push available. In case this is
14502 register, it is OK - we will just use larger counterpart. We also
14503 retype memory - these comes from attempt to avoid REX prefix on
14504 moving of second half of TFmode value. */
14505 if (GET_MODE (part[1][1]) == SImode)
14507 switch (GET_CODE (part[1][1]))
14510 part[1][1] = adjust_address (part[1][1], DImode, 0);
14514 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
14518 gcc_unreachable ();
14521 if (GET_MODE (part[1][0]) == SImode)
14522 part[1][0] = part[1][1];
14525 emit_move_insn (part[0][1], part[1][1]);
14526 emit_move_insn (part[0][0], part[1][0]);
14530 /* Choose correct order to not overwrite the source before it is copied. */
14531 if ((REG_P (part[0][0])
14532 && REG_P (part[1][1])
14533 && (REGNO (part[0][0]) == REGNO (part[1][1])
14535 && REGNO (part[0][0]) == REGNO (part[1][2]))
14537 && REGNO (part[0][0]) == REGNO (part[1][3]))))
14539 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
14541 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
14543 operands[2 + i] = part[0][j];
14544 operands[6 + i] = part[1][j];
14549 for (i = 0; i < nparts; i++)
14551 operands[2 + i] = part[0][i];
14552 operands[6 + i] = part[1][i];
14556 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
14559 for (j = 0; j < nparts - 1; j++)
14560 if (CONST_INT_P (operands[6 + j])
14561 && operands[6 + j] != const0_rtx
14562 && REG_P (operands[2 + j]))
14563 for (i = j; i < nparts - 1; i++)
14564 if (CONST_INT_P (operands[7 + i])
14565 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
14566 operands[7 + i] = operands[2 + j];
14569 for (i = 0; i < nparts; i++)
14570 emit_move_insn (operands[2 + i], operands[6 + i]);
14575 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
14576 left shift by a constant, either using a single shift or
14577 a sequence of add instructions. */
14580 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
14584 emit_insn ((mode == DImode
14586 : gen_adddi3) (operand, operand, operand));
14588 else if (!optimize_size
14589 && count * ix86_cost->add <= ix86_cost->shift_const)
14592 for (i=0; i<count; i++)
14594 emit_insn ((mode == DImode
14596 : gen_adddi3) (operand, operand, operand));
14600 emit_insn ((mode == DImode
14602 : gen_ashldi3) (operand, operand, GEN_INT (count)));
14606 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
14608 rtx low[2], high[2];
14610 const int single_width = mode == DImode ? 32 : 64;
14612 if (CONST_INT_P (operands[2]))
14614 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14615 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14617 if (count >= single_width)
14619 emit_move_insn (high[0], low[1]);
14620 emit_move_insn (low[0], const0_rtx);
14622 if (count > single_width)
14623 ix86_expand_ashl_const (high[0], count - single_width, mode);
14627 if (!rtx_equal_p (operands[0], operands[1]))
14628 emit_move_insn (operands[0], operands[1]);
14629 emit_insn ((mode == DImode
14631 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
14632 ix86_expand_ashl_const (low[0], count, mode);
14637 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14639 if (operands[1] == const1_rtx)
14641 /* Assuming we've chosen a QImode capable registers, then 1 << N
14642 can be done with two 32/64-bit shifts, no branches, no cmoves. */
14643 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
14645 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
14647 ix86_expand_clear (low[0]);
14648 ix86_expand_clear (high[0]);
14649 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
14651 d = gen_lowpart (QImode, low[0]);
14652 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14653 s = gen_rtx_EQ (QImode, flags, const0_rtx);
14654 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14656 d = gen_lowpart (QImode, high[0]);
14657 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
14658 s = gen_rtx_NE (QImode, flags, const0_rtx);
14659 emit_insn (gen_rtx_SET (VOIDmode, d, s));
14662 /* Otherwise, we can get the same results by manually performing
14663 a bit extract operation on bit 5/6, and then performing the two
14664 shifts. The two methods of getting 0/1 into low/high are exactly
14665 the same size. Avoiding the shift in the bit extract case helps
14666 pentium4 a bit; no one else seems to care much either way. */
14671 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
14672 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
14674 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
14675 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
14677 emit_insn ((mode == DImode
14679 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
14680 emit_insn ((mode == DImode
14682 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
14683 emit_move_insn (low[0], high[0]);
14684 emit_insn ((mode == DImode
14686 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
14689 emit_insn ((mode == DImode
14691 : gen_ashldi3) (low[0], low[0], operands[2]));
14692 emit_insn ((mode == DImode
14694 : gen_ashldi3) (high[0], high[0], operands[2]));
14698 if (operands[1] == constm1_rtx)
14700 /* For -1 << N, we can avoid the shld instruction, because we
14701 know that we're shifting 0...31/63 ones into a -1. */
14702 emit_move_insn (low[0], constm1_rtx);
14704 emit_move_insn (high[0], low[0]);
14706 emit_move_insn (high[0], constm1_rtx);
14710 if (!rtx_equal_p (operands[0], operands[1]))
14711 emit_move_insn (operands[0], operands[1]);
14713 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14714 emit_insn ((mode == DImode
14716 : gen_x86_64_shld) (high[0], low[0], operands[2]));
14719 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
14721 if (TARGET_CMOVE && scratch)
14723 ix86_expand_clear (scratch);
14724 emit_insn ((mode == DImode
14725 ? gen_x86_shift_adj_1
14726 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
14730 emit_insn ((mode == DImode
14731 ? gen_x86_shift_adj_2
14732 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
14736 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
14738 rtx low[2], high[2];
14740 const int single_width = mode == DImode ? 32 : 64;
14742 if (CONST_INT_P (operands[2]))
14744 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14745 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14747 if (count == single_width * 2 - 1)
14749 emit_move_insn (high[0], high[1]);
14750 emit_insn ((mode == DImode
14752 : gen_ashrdi3) (high[0], high[0],
14753 GEN_INT (single_width - 1)));
14754 emit_move_insn (low[0], high[0]);
14757 else if (count >= single_width)
14759 emit_move_insn (low[0], high[1]);
14760 emit_move_insn (high[0], low[0]);
14761 emit_insn ((mode == DImode
14763 : gen_ashrdi3) (high[0], high[0],
14764 GEN_INT (single_width - 1)));
14765 if (count > single_width)
14766 emit_insn ((mode == DImode
14768 : gen_ashrdi3) (low[0], low[0],
14769 GEN_INT (count - single_width)));
14773 if (!rtx_equal_p (operands[0], operands[1]))
14774 emit_move_insn (operands[0], operands[1]);
14775 emit_insn ((mode == DImode
14777 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14778 emit_insn ((mode == DImode
14780 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
14785 if (!rtx_equal_p (operands[0], operands[1]))
14786 emit_move_insn (operands[0], operands[1]);
14788 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14790 emit_insn ((mode == DImode
14792 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14793 emit_insn ((mode == DImode
14795 : gen_ashrdi3) (high[0], high[0], operands[2]));
14797 if (TARGET_CMOVE && scratch)
14799 emit_move_insn (scratch, high[0]);
14800 emit_insn ((mode == DImode
14802 : gen_ashrdi3) (scratch, scratch,
14803 GEN_INT (single_width - 1)));
14804 emit_insn ((mode == DImode
14805 ? gen_x86_shift_adj_1
14806 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
14810 emit_insn ((mode == DImode
14811 ? gen_x86_shift_adj_3
14812 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
14817 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
14819 rtx low[2], high[2];
14821 const int single_width = mode == DImode ? 32 : 64;
14823 if (CONST_INT_P (operands[2]))
14825 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
14826 count = INTVAL (operands[2]) & (single_width * 2 - 1);
14828 if (count >= single_width)
14830 emit_move_insn (low[0], high[1]);
14831 ix86_expand_clear (high[0]);
14833 if (count > single_width)
14834 emit_insn ((mode == DImode
14836 : gen_lshrdi3) (low[0], low[0],
14837 GEN_INT (count - single_width)));
14841 if (!rtx_equal_p (operands[0], operands[1]))
14842 emit_move_insn (operands[0], operands[1]);
14843 emit_insn ((mode == DImode
14845 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
14846 emit_insn ((mode == DImode
14848 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
14853 if (!rtx_equal_p (operands[0], operands[1]))
14854 emit_move_insn (operands[0], operands[1]);
14856 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
14858 emit_insn ((mode == DImode
14860 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
14861 emit_insn ((mode == DImode
14863 : gen_lshrdi3) (high[0], high[0], operands[2]));
14865 /* Heh. By reversing the arguments, we can reuse this pattern. */
14866 if (TARGET_CMOVE && scratch)
14868 ix86_expand_clear (scratch);
14869 emit_insn ((mode == DImode
14870 ? gen_x86_shift_adj_1
14871 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
14875 emit_insn ((mode == DImode
14876 ? gen_x86_shift_adj_2
14877 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
14881 /* Predict just emitted jump instruction to be taken with probability PROB. */
14883 predict_jump (int prob)
14885 rtx insn = get_last_insn ();
14886 gcc_assert (JUMP_P (insn));
14888 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14893 /* Helper function for the string operations below. Dest VARIABLE whether
14894 it is aligned to VALUE bytes. If true, jump to the label. */
14896 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
14898 rtx label = gen_label_rtx ();
14899 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
14900 if (GET_MODE (variable) == DImode)
14901 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
14903 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
14904 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14907 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14909 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14913 /* Adjust COUNTER by the VALUE. */
14915 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14917 if (GET_MODE (countreg) == DImode)
14918 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14920 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14923 /* Zero extend possibly SImode EXP to Pmode register. */
14925 ix86_zero_extend_to_Pmode (rtx exp)
14928 if (GET_MODE (exp) == VOIDmode)
14929 return force_reg (Pmode, exp);
14930 if (GET_MODE (exp) == Pmode)
14931 return copy_to_mode_reg (Pmode, exp);
14932 r = gen_reg_rtx (Pmode);
14933 emit_insn (gen_zero_extendsidi2 (r, exp));
14937 /* Divide COUNTREG by SCALE. */
14939 scale_counter (rtx countreg, int scale)
14942 rtx piece_size_mask;
14946 if (CONST_INT_P (countreg))
14947 return GEN_INT (INTVAL (countreg) / scale);
14948 gcc_assert (REG_P (countreg));
14950 piece_size_mask = GEN_INT (scale - 1);
14951 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14952 GEN_INT (exact_log2 (scale)),
14953 NULL, 1, OPTAB_DIRECT);
14957 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14958 DImode for constant loop counts. */
14960 static enum machine_mode
14961 counter_mode (rtx count_exp)
14963 if (GET_MODE (count_exp) != VOIDmode)
14964 return GET_MODE (count_exp);
14965 if (GET_CODE (count_exp) != CONST_INT)
14967 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14972 /* When SRCPTR is non-NULL, output simple loop to move memory
14973 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14974 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14975 equivalent loop to set memory by VALUE (supposed to be in MODE).
14977 The size is rounded down to whole number of chunk size moved at once.
14978 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14982 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14983 rtx destptr, rtx srcptr, rtx value,
14984 rtx count, enum machine_mode mode, int unroll,
14987 rtx out_label, top_label, iter, tmp;
14988 enum machine_mode iter_mode = counter_mode (count);
14989 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14990 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14996 top_label = gen_label_rtx ();
14997 out_label = gen_label_rtx ();
14998 iter = gen_reg_rtx (iter_mode);
15000 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
15001 NULL, 1, OPTAB_DIRECT);
15002 /* Those two should combine. */
15003 if (piece_size == const1_rtx)
15005 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
15007 predict_jump (REG_BR_PROB_BASE * 10 / 100);
15009 emit_move_insn (iter, const0_rtx);
15011 emit_label (top_label);
15013 tmp = convert_modes (Pmode, iter_mode, iter, true);
15014 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
15015 destmem = change_address (destmem, mode, x_addr);
15019 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
15020 srcmem = change_address (srcmem, mode, y_addr);
15022 /* When unrolling for chips that reorder memory reads and writes,
15023 we can save registers by using single temporary.
15024 Also using 4 temporaries is overkill in 32bit mode. */
15025 if (!TARGET_64BIT && 0)
15027 for (i = 0; i < unroll; i++)
15032 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
15034 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
15036 emit_move_insn (destmem, srcmem);
15042 gcc_assert (unroll <= 4);
15043 for (i = 0; i < unroll; i++)
15045 tmpreg[i] = gen_reg_rtx (mode);
15049 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
15051 emit_move_insn (tmpreg[i], srcmem);
15053 for (i = 0; i < unroll; i++)
15058 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
15060 emit_move_insn (destmem, tmpreg[i]);
15065 for (i = 0; i < unroll; i++)
15069 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
15070 emit_move_insn (destmem, value);
15073 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
15074 true, OPTAB_LIB_WIDEN);
15076 emit_move_insn (iter, tmp);
15078 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
15080 if (expected_size != -1)
15082 expected_size /= GET_MODE_SIZE (mode) * unroll;
15083 if (expected_size == 0)
15085 else if (expected_size > REG_BR_PROB_BASE)
15086 predict_jump (REG_BR_PROB_BASE - 1);
15088 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
15091 predict_jump (REG_BR_PROB_BASE * 80 / 100);
15092 iter = ix86_zero_extend_to_Pmode (iter);
15093 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
15094 true, OPTAB_LIB_WIDEN);
15095 if (tmp != destptr)
15096 emit_move_insn (destptr, tmp);
15099 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
15100 true, OPTAB_LIB_WIDEN);
15102 emit_move_insn (srcptr, tmp);
15104 emit_label (out_label);
15107 /* Output "rep; mov" instruction.
15108 Arguments have same meaning as for previous function */
15110 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
15111 rtx destptr, rtx srcptr,
15113 enum machine_mode mode)
15119 /* If the size is known, it is shorter to use rep movs. */
15120 if (mode == QImode && CONST_INT_P (count)
15121 && !(INTVAL (count) & 3))
15124 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15125 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15126 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
15127 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
15128 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15129 if (mode != QImode)
15131 destexp = gen_rtx_ASHIFT (Pmode, countreg,
15132 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15133 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15134 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
15135 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15136 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
15140 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15141 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
15143 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
15147 /* Output "rep; stos" instruction.
15148 Arguments have same meaning as for previous function */
15150 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
15152 enum machine_mode mode)
15157 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
15158 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
15159 value = force_reg (mode, gen_lowpart (mode, value));
15160 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
15161 if (mode != QImode)
15163 destexp = gen_rtx_ASHIFT (Pmode, countreg,
15164 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
15165 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
15168 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
15169 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
15173 emit_strmov (rtx destmem, rtx srcmem,
15174 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
15176 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
15177 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
15178 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15181 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
15183 expand_movmem_epilogue (rtx destmem, rtx srcmem,
15184 rtx destptr, rtx srcptr, rtx count, int max_size)
15187 if (CONST_INT_P (count))
15189 HOST_WIDE_INT countval = INTVAL (count);
15192 if ((countval & 0x10) && max_size > 16)
15196 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
15197 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
15200 gcc_unreachable ();
15203 if ((countval & 0x08) && max_size > 8)
15206 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
15209 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
15210 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
15214 if ((countval & 0x04) && max_size > 4)
15216 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
15219 if ((countval & 0x02) && max_size > 2)
15221 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
15224 if ((countval & 0x01) && max_size > 1)
15226 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
15233 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
15234 count, 1, OPTAB_DIRECT);
15235 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
15236 count, QImode, 1, 4);
15240 /* When there are stringops, we can cheaply increase dest and src pointers.
15241 Otherwise we save code size by maintaining offset (zero is readily
15242 available from preceding rep operation) and using x86 addressing modes.
15244 if (TARGET_SINGLE_STRINGOP)
15248 rtx label = ix86_expand_aligntest (count, 4, true);
15249 src = change_address (srcmem, SImode, srcptr);
15250 dest = change_address (destmem, SImode, destptr);
15251 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15252 emit_label (label);
15253 LABEL_NUSES (label) = 1;
15257 rtx label = ix86_expand_aligntest (count, 2, true);
15258 src = change_address (srcmem, HImode, srcptr);
15259 dest = change_address (destmem, HImode, destptr);
15260 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15261 emit_label (label);
15262 LABEL_NUSES (label) = 1;
15266 rtx label = ix86_expand_aligntest (count, 1, true);
15267 src = change_address (srcmem, QImode, srcptr);
15268 dest = change_address (destmem, QImode, destptr);
15269 emit_insn (gen_strmov (destptr, dest, srcptr, src));
15270 emit_label (label);
15271 LABEL_NUSES (label) = 1;
15276 rtx offset = force_reg (Pmode, const0_rtx);
15281 rtx label = ix86_expand_aligntest (count, 4, true);
15282 src = change_address (srcmem, SImode, srcptr);
15283 dest = change_address (destmem, SImode, destptr);
15284 emit_move_insn (dest, src);
15285 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
15286 true, OPTAB_LIB_WIDEN);
15288 emit_move_insn (offset, tmp);
15289 emit_label (label);
15290 LABEL_NUSES (label) = 1;
15294 rtx label = ix86_expand_aligntest (count, 2, true);
15295 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15296 src = change_address (srcmem, HImode, tmp);
15297 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15298 dest = change_address (destmem, HImode, tmp);
15299 emit_move_insn (dest, src);
15300 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
15301 true, OPTAB_LIB_WIDEN);
15303 emit_move_insn (offset, tmp);
15304 emit_label (label);
15305 LABEL_NUSES (label) = 1;
15309 rtx label = ix86_expand_aligntest (count, 1, true);
15310 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
15311 src = change_address (srcmem, QImode, tmp);
15312 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
15313 dest = change_address (destmem, QImode, tmp);
15314 emit_move_insn (dest, src);
15315 emit_label (label);
15316 LABEL_NUSES (label) = 1;
15321 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15323 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
15324 rtx count, int max_size)
15327 expand_simple_binop (counter_mode (count), AND, count,
15328 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
15329 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
15330 gen_lowpart (QImode, value), count, QImode,
15334 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
15336 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
15340 if (CONST_INT_P (count))
15342 HOST_WIDE_INT countval = INTVAL (count);
15345 if ((countval & 0x10) && max_size > 16)
15349 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15350 emit_insn (gen_strset (destptr, dest, value));
15351 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
15352 emit_insn (gen_strset (destptr, dest, value));
15355 gcc_unreachable ();
15358 if ((countval & 0x08) && max_size > 8)
15362 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
15363 emit_insn (gen_strset (destptr, dest, value));
15367 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15368 emit_insn (gen_strset (destptr, dest, value));
15369 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
15370 emit_insn (gen_strset (destptr, dest, value));
15374 if ((countval & 0x04) && max_size > 4)
15376 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
15377 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15380 if ((countval & 0x02) && max_size > 2)
15382 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
15383 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15386 if ((countval & 0x01) && max_size > 1)
15388 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
15389 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15396 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
15401 rtx label = ix86_expand_aligntest (count, 16, true);
15404 dest = change_address (destmem, DImode, destptr);
15405 emit_insn (gen_strset (destptr, dest, value));
15406 emit_insn (gen_strset (destptr, dest, value));
15410 dest = change_address (destmem, SImode, destptr);
15411 emit_insn (gen_strset (destptr, dest, value));
15412 emit_insn (gen_strset (destptr, dest, value));
15413 emit_insn (gen_strset (destptr, dest, value));
15414 emit_insn (gen_strset (destptr, dest, value));
15416 emit_label (label);
15417 LABEL_NUSES (label) = 1;
15421 rtx label = ix86_expand_aligntest (count, 8, true);
15424 dest = change_address (destmem, DImode, destptr);
15425 emit_insn (gen_strset (destptr, dest, value));
15429 dest = change_address (destmem, SImode, destptr);
15430 emit_insn (gen_strset (destptr, dest, value));
15431 emit_insn (gen_strset (destptr, dest, value));
15433 emit_label (label);
15434 LABEL_NUSES (label) = 1;
15438 rtx label = ix86_expand_aligntest (count, 4, true);
15439 dest = change_address (destmem, SImode, destptr);
15440 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
15441 emit_label (label);
15442 LABEL_NUSES (label) = 1;
15446 rtx label = ix86_expand_aligntest (count, 2, true);
15447 dest = change_address (destmem, HImode, destptr);
15448 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
15449 emit_label (label);
15450 LABEL_NUSES (label) = 1;
15454 rtx label = ix86_expand_aligntest (count, 1, true);
15455 dest = change_address (destmem, QImode, destptr);
15456 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
15457 emit_label (label);
15458 LABEL_NUSES (label) = 1;
15462 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
15463 DESIRED_ALIGNMENT. */
15465 expand_movmem_prologue (rtx destmem, rtx srcmem,
15466 rtx destptr, rtx srcptr, rtx count,
15467 int align, int desired_alignment)
15469 if (align <= 1 && desired_alignment > 1)
15471 rtx label = ix86_expand_aligntest (destptr, 1, false);
15472 srcmem = change_address (srcmem, QImode, srcptr);
15473 destmem = change_address (destmem, QImode, destptr);
15474 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15475 ix86_adjust_counter (count, 1);
15476 emit_label (label);
15477 LABEL_NUSES (label) = 1;
15479 if (align <= 2 && desired_alignment > 2)
15481 rtx label = ix86_expand_aligntest (destptr, 2, false);
15482 srcmem = change_address (srcmem, HImode, srcptr);
15483 destmem = change_address (destmem, HImode, destptr);
15484 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15485 ix86_adjust_counter (count, 2);
15486 emit_label (label);
15487 LABEL_NUSES (label) = 1;
15489 if (align <= 4 && desired_alignment > 4)
15491 rtx label = ix86_expand_aligntest (destptr, 4, false);
15492 srcmem = change_address (srcmem, SImode, srcptr);
15493 destmem = change_address (destmem, SImode, destptr);
15494 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
15495 ix86_adjust_counter (count, 4);
15496 emit_label (label);
15497 LABEL_NUSES (label) = 1;
15499 gcc_assert (desired_alignment <= 8);
15502 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
15503 DESIRED_ALIGNMENT. */
15505 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
15506 int align, int desired_alignment)
15508 if (align <= 1 && desired_alignment > 1)
15510 rtx label = ix86_expand_aligntest (destptr, 1, false);
15511 destmem = change_address (destmem, QImode, destptr);
15512 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
15513 ix86_adjust_counter (count, 1);
15514 emit_label (label);
15515 LABEL_NUSES (label) = 1;
15517 if (align <= 2 && desired_alignment > 2)
15519 rtx label = ix86_expand_aligntest (destptr, 2, false);
15520 destmem = change_address (destmem, HImode, destptr);
15521 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
15522 ix86_adjust_counter (count, 2);
15523 emit_label (label);
15524 LABEL_NUSES (label) = 1;
15526 if (align <= 4 && desired_alignment > 4)
15528 rtx label = ix86_expand_aligntest (destptr, 4, false);
15529 destmem = change_address (destmem, SImode, destptr);
15530 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
15531 ix86_adjust_counter (count, 4);
15532 emit_label (label);
15533 LABEL_NUSES (label) = 1;
15535 gcc_assert (desired_alignment <= 8);
15538 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
15539 static enum stringop_alg
15540 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
15541 int *dynamic_check)
15543 const struct stringop_algs * algs;
15544 /* Algorithms using the rep prefix want at least edi and ecx;
15545 additionally, memset wants eax and memcpy wants esi. Don't
15546 consider such algorithms if the user has appropriated those
15547 registers for their own purposes. */
15548 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
15550 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
15552 #define ALG_USABLE_P(alg) (rep_prefix_usable \
15553 || (alg != rep_prefix_1_byte \
15554 && alg != rep_prefix_4_byte \
15555 && alg != rep_prefix_8_byte))
15557 *dynamic_check = -1;
15559 algs = &ix86_cost->memset[TARGET_64BIT != 0];
15561 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
15562 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
15563 return stringop_alg;
15564 /* rep; movq or rep; movl is the smallest variant. */
15565 else if (optimize_size)
15567 if (!count || (count & 3))
15568 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
15570 return rep_prefix_usable ? rep_prefix_4_byte : loop;
15572 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
15574 else if (expected_size != -1 && expected_size < 4)
15575 return loop_1_byte;
15576 else if (expected_size != -1)
15579 enum stringop_alg alg = libcall;
15580 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15582 /* We get here if the algorithms that were not libcall-based
15583 were rep-prefix based and we are unable to use rep prefixes
15584 based on global register usage. Break out of the loop and
15585 use the heuristic below. */
15586 if (algs->size[i].max == 0)
15588 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
15590 enum stringop_alg candidate = algs->size[i].alg;
15592 if (candidate != libcall && ALG_USABLE_P (candidate))
15594 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
15595 last non-libcall inline algorithm. */
15596 if (TARGET_INLINE_ALL_STRINGOPS)
15598 /* When the current size is best to be copied by a libcall,
15599 but we are still forced to inline, run the heuristic below
15600 that will pick code for medium sized blocks. */
15601 if (alg != libcall)
15605 else if (ALG_USABLE_P (candidate))
15609 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
15611 /* When asked to inline the call anyway, try to pick meaningful choice.
15612 We look for maximal size of block that is faster to copy by hand and
15613 take blocks of at most of that size guessing that average size will
15614 be roughly half of the block.
15616 If this turns out to be bad, we might simply specify the preferred
15617 choice in ix86_costs. */
15618 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15619 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
15622 enum stringop_alg alg;
15624 bool any_alg_usable_p = true;
15626 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
15628 enum stringop_alg candidate = algs->size[i].alg;
15629 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
15631 if (candidate != libcall && candidate
15632 && ALG_USABLE_P (candidate))
15633 max = algs->size[i].max;
15635 /* If there aren't any usable algorithms, then recursing on
15636 smaller sizes isn't going to find anything. Just return the
15637 simple byte-at-a-time copy loop. */
15638 if (!any_alg_usable_p)
15640 /* Pick something reasonable. */
15641 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15642 *dynamic_check = 128;
15643 return loop_1_byte;
15647 alg = decide_alg (count, max / 2, memset, dynamic_check);
15648 gcc_assert (*dynamic_check == -1);
15649 gcc_assert (alg != libcall);
15650 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
15651 *dynamic_check = max;
15654 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
15655 #undef ALG_USABLE_P
15658 /* Decide on alignment. We know that the operand is already aligned to ALIGN
15659 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
15661 decide_alignment (int align,
15662 enum stringop_alg alg,
15665 int desired_align = 0;
15669 gcc_unreachable ();
15671 case unrolled_loop:
15672 desired_align = GET_MODE_SIZE (Pmode);
15674 case rep_prefix_8_byte:
15677 case rep_prefix_4_byte:
15678 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15679 copying whole cacheline at once. */
15680 if (TARGET_PENTIUMPRO)
15685 case rep_prefix_1_byte:
15686 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
15687 copying whole cacheline at once. */
15688 if (TARGET_PENTIUMPRO)
15702 if (desired_align < align)
15703 desired_align = align;
15704 if (expected_size != -1 && expected_size < 4)
15705 desired_align = align;
15706 return desired_align;
15709 /* Return the smallest power of 2 greater than VAL. */
15711 smallest_pow2_greater_than (int val)
15719 /* Expand string move (memcpy) operation. Use i386 string operations when
15720 profitable. expand_setmem contains similar code. The code depends upon
15721 architecture, block size and alignment, but always has the same
15724 1) Prologue guard: Conditional that jumps up to epilogues for small
15725 blocks that can be handled by epilogue alone. This is faster but
15726 also needed for correctness, since prologue assume the block is larger
15727 than the desired alignment.
15729 Optional dynamic check for size and libcall for large
15730 blocks is emitted here too, with -minline-stringops-dynamically.
15732 2) Prologue: copy first few bytes in order to get destination aligned
15733 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
15734 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
15735 We emit either a jump tree on power of two sized blocks, or a byte loop.
15737 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
15738 with specified algorithm.
15740 4) Epilogue: code copying tail of the block that is too small to be
15741 handled by main body (or up to size guarded by prologue guard). */
15744 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
15745 rtx expected_align_exp, rtx expected_size_exp)
15751 rtx jump_around_label = NULL;
15752 HOST_WIDE_INT align = 1;
15753 unsigned HOST_WIDE_INT count = 0;
15754 HOST_WIDE_INT expected_size = -1;
15755 int size_needed = 0, epilogue_size_needed;
15756 int desired_align = 0;
15757 enum stringop_alg alg;
15760 if (CONST_INT_P (align_exp))
15761 align = INTVAL (align_exp);
15762 /* i386 can do misaligned access on reasonably increased cost. */
15763 if (CONST_INT_P (expected_align_exp)
15764 && INTVAL (expected_align_exp) > align)
15765 align = INTVAL (expected_align_exp);
15766 if (CONST_INT_P (count_exp))
15767 count = expected_size = INTVAL (count_exp);
15768 if (CONST_INT_P (expected_size_exp) && count == 0)
15769 expected_size = INTVAL (expected_size_exp);
15771 /* Make sure we don't need to care about overflow later on. */
15772 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
15775 /* Step 0: Decide on preferred algorithm, desired alignment and
15776 size of chunks to be copied by main loop. */
15778 alg = decide_alg (count, expected_size, false, &dynamic_check);
15779 desired_align = decide_alignment (align, alg, expected_size);
15781 if (!TARGET_ALIGN_STRINGOPS)
15782 align = desired_align;
15784 if (alg == libcall)
15786 gcc_assert (alg != no_stringop);
15788 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
15789 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15790 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
15795 gcc_unreachable ();
15797 size_needed = GET_MODE_SIZE (Pmode);
15799 case unrolled_loop:
15800 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
15802 case rep_prefix_8_byte:
15805 case rep_prefix_4_byte:
15808 case rep_prefix_1_byte:
15814 epilogue_size_needed = size_needed;
15816 /* Step 1: Prologue guard. */
15818 /* Alignment code needs count to be in register. */
15819 if (CONST_INT_P (count_exp) && desired_align > align)
15820 count_exp = force_reg (counter_mode (count_exp), count_exp);
15821 gcc_assert (desired_align >= 1 && align >= 1);
15823 /* Ensure that alignment prologue won't copy past end of block. */
15824 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15826 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15827 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15828 Make sure it is power of 2. */
15829 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15831 if (CONST_INT_P (count_exp))
15833 if (UINTVAL (count_exp) < (unsigned HOST_WIDE_INT)epilogue_size_needed)
15838 label = gen_label_rtx ();
15839 emit_cmp_and_jump_insns (count_exp,
15840 GEN_INT (epilogue_size_needed),
15841 LTU, 0, counter_mode (count_exp), 1, label);
15842 if (expected_size == -1 || expected_size < epilogue_size_needed)
15843 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15845 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15849 /* Emit code to decide on runtime whether library call or inline should be
15851 if (dynamic_check != -1)
15853 if (CONST_INT_P (count_exp))
15855 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
15857 emit_block_move_via_libcall (dst, src, count_exp, false);
15858 count_exp = const0_rtx;
15864 rtx hot_label = gen_label_rtx ();
15865 jump_around_label = gen_label_rtx ();
15866 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15867 LEU, 0, GET_MODE (count_exp), 1, hot_label);
15868 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15869 emit_block_move_via_libcall (dst, src, count_exp, false);
15870 emit_jump (jump_around_label);
15871 emit_label (hot_label);
15875 /* Step 2: Alignment prologue. */
15877 if (desired_align > align)
15879 /* Except for the first move in epilogue, we no longer know
15880 constant offset in aliasing info. It don't seems to worth
15881 the pain to maintain it for the first move, so throw away
15883 src = change_address (src, BLKmode, srcreg);
15884 dst = change_address (dst, BLKmode, destreg);
15885 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
15888 if (label && size_needed == 1)
15890 emit_label (label);
15891 LABEL_NUSES (label) = 1;
15895 /* Step 3: Main loop. */
15901 gcc_unreachable ();
15903 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15904 count_exp, QImode, 1, expected_size);
15907 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15908 count_exp, Pmode, 1, expected_size);
15910 case unrolled_loop:
15911 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
15912 registers for 4 temporaries anyway. */
15913 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
15914 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
15917 case rep_prefix_8_byte:
15918 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15921 case rep_prefix_4_byte:
15922 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15925 case rep_prefix_1_byte:
15926 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
15930 /* Adjust properly the offset of src and dest memory for aliasing. */
15931 if (CONST_INT_P (count_exp))
15933 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
15934 (count / size_needed) * size_needed);
15935 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15936 (count / size_needed) * size_needed);
15940 src = change_address (src, BLKmode, srcreg);
15941 dst = change_address (dst, BLKmode, destreg);
15944 /* Step 4: Epilogue to copy the remaining bytes. */
15948 /* When the main loop is done, COUNT_EXP might hold original count,
15949 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15950 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15951 bytes. Compensate if needed. */
15953 if (size_needed < epilogue_size_needed)
15956 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15957 GEN_INT (size_needed - 1), count_exp, 1,
15959 if (tmp != count_exp)
15960 emit_move_insn (count_exp, tmp);
15962 emit_label (label);
15963 LABEL_NUSES (label) = 1;
15966 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15967 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15968 epilogue_size_needed);
15969 if (jump_around_label)
15970 emit_label (jump_around_label);
15974 /* Helper function for memcpy. For QImode value 0xXY produce
15975 0xXYXYXYXY of wide specified by MODE. This is essentially
15976 a * 0x10101010, but we can do slightly better than
15977 synth_mult by unwinding the sequence by hand on CPUs with
15980 promote_duplicated_reg (enum machine_mode mode, rtx val)
15982 enum machine_mode valmode = GET_MODE (val);
15984 int nops = mode == DImode ? 3 : 2;
15986 gcc_assert (mode == SImode || mode == DImode);
15987 if (val == const0_rtx)
15988 return copy_to_mode_reg (mode, const0_rtx);
15989 if (CONST_INT_P (val))
15991 HOST_WIDE_INT v = INTVAL (val) & 255;
15995 if (mode == DImode)
15996 v |= (v << 16) << 16;
15997 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
16000 if (valmode == VOIDmode)
16002 if (valmode != QImode)
16003 val = gen_lowpart (QImode, val);
16004 if (mode == QImode)
16006 if (!TARGET_PARTIAL_REG_STALL)
16008 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
16009 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
16010 <= (ix86_cost->shift_const + ix86_cost->add) * nops
16011 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
16013 rtx reg = convert_modes (mode, QImode, val, true);
16014 tmp = promote_duplicated_reg (mode, const1_rtx);
16015 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
16020 rtx reg = convert_modes (mode, QImode, val, true);
16022 if (!TARGET_PARTIAL_REG_STALL)
16023 if (mode == SImode)
16024 emit_insn (gen_movsi_insv_1 (reg, reg));
16026 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
16029 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
16030 NULL, 1, OPTAB_DIRECT);
16032 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
16034 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
16035 NULL, 1, OPTAB_DIRECT);
16036 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
16037 if (mode == SImode)
16039 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
16040 NULL, 1, OPTAB_DIRECT);
16041 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
16046 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
16047 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
16048 alignment from ALIGN to DESIRED_ALIGN. */
16050 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
16055 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
16056 promoted_val = promote_duplicated_reg (DImode, val);
16057 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
16058 promoted_val = promote_duplicated_reg (SImode, val);
16059 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
16060 promoted_val = promote_duplicated_reg (HImode, val);
16062 promoted_val = val;
16064 return promoted_val;
16067 /* Expand string clear operation (bzero). Use i386 string operations when
16068 profitable. See expand_movmem comment for explanation of individual
16069 steps performed. */
16071 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
16072 rtx expected_align_exp, rtx expected_size_exp)
16077 rtx jump_around_label = NULL;
16078 HOST_WIDE_INT align = 1;
16079 unsigned HOST_WIDE_INT count = 0;
16080 HOST_WIDE_INT expected_size = -1;
16081 int size_needed = 0, epilogue_size_needed;
16082 int desired_align = 0;
16083 enum stringop_alg alg;
16084 rtx promoted_val = NULL;
16085 bool force_loopy_epilogue = false;
16088 if (CONST_INT_P (align_exp))
16089 align = INTVAL (align_exp);
16090 /* i386 can do misaligned access on reasonably increased cost. */
16091 if (CONST_INT_P (expected_align_exp)
16092 && INTVAL (expected_align_exp) > align)
16093 align = INTVAL (expected_align_exp);
16094 if (CONST_INT_P (count_exp))
16095 count = expected_size = INTVAL (count_exp);
16096 if (CONST_INT_P (expected_size_exp) && count == 0)
16097 expected_size = INTVAL (expected_size_exp);
16099 /* Make sure we don't need to care about overflow later on. */
16100 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
16103 /* Step 0: Decide on preferred algorithm, desired alignment and
16104 size of chunks to be copied by main loop. */
16106 alg = decide_alg (count, expected_size, true, &dynamic_check);
16107 desired_align = decide_alignment (align, alg, expected_size);
16109 if (!TARGET_ALIGN_STRINGOPS)
16110 align = desired_align;
16112 if (alg == libcall)
16114 gcc_assert (alg != no_stringop);
16116 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
16117 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
16122 gcc_unreachable ();
16124 size_needed = GET_MODE_SIZE (Pmode);
16126 case unrolled_loop:
16127 size_needed = GET_MODE_SIZE (Pmode) * 4;
16129 case rep_prefix_8_byte:
16132 case rep_prefix_4_byte:
16135 case rep_prefix_1_byte:
16140 epilogue_size_needed = size_needed;
16142 /* Step 1: Prologue guard. */
16144 /* Alignment code needs count to be in register. */
16145 if (CONST_INT_P (count_exp) && desired_align > align)
16147 enum machine_mode mode = SImode;
16148 if (TARGET_64BIT && (count & ~0xffffffff))
16150 count_exp = force_reg (mode, count_exp);
16152 /* Do the cheap promotion to allow better CSE across the
16153 main loop and epilogue (ie one load of the big constant in the
16154 front of all code. */
16155 if (CONST_INT_P (val_exp))
16156 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16157 desired_align, align);
16158 /* Ensure that alignment prologue won't copy past end of block. */
16159 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
16161 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
16162 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
16163 Make sure it is power of 2. */
16164 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
16166 /* To improve performance of small blocks, we jump around the VAL
16167 promoting mode. This mean that if the promoted VAL is not constant,
16168 we might not use it in the epilogue and have to use byte
16170 if (epilogue_size_needed > 2 && !promoted_val)
16171 force_loopy_epilogue = true;
16172 label = gen_label_rtx ();
16173 emit_cmp_and_jump_insns (count_exp,
16174 GEN_INT (epilogue_size_needed),
16175 LTU, 0, counter_mode (count_exp), 1, label);
16176 if (GET_CODE (count_exp) == CONST_INT)
16178 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
16179 predict_jump (REG_BR_PROB_BASE * 60 / 100);
16181 predict_jump (REG_BR_PROB_BASE * 20 / 100);
16183 if (dynamic_check != -1)
16185 rtx hot_label = gen_label_rtx ();
16186 jump_around_label = gen_label_rtx ();
16187 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
16188 LEU, 0, counter_mode (count_exp), 1, hot_label);
16189 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16190 set_storage_via_libcall (dst, count_exp, val_exp, false);
16191 emit_jump (jump_around_label);
16192 emit_label (hot_label);
16195 /* Step 2: Alignment prologue. */
16197 /* Do the expensive promotion once we branched off the small blocks. */
16199 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
16200 desired_align, align);
16201 gcc_assert (desired_align >= 1 && align >= 1);
16203 if (desired_align > align)
16205 /* Except for the first move in epilogue, we no longer know
16206 constant offset in aliasing info. It don't seems to worth
16207 the pain to maintain it for the first move, so throw away
16209 dst = change_address (dst, BLKmode, destreg);
16210 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
16213 if (label && size_needed == 1)
16215 emit_label (label);
16216 LABEL_NUSES (label) = 1;
16220 /* Step 3: Main loop. */
16226 gcc_unreachable ();
16228 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16229 count_exp, QImode, 1, expected_size);
16232 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16233 count_exp, Pmode, 1, expected_size);
16235 case unrolled_loop:
16236 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
16237 count_exp, Pmode, 4, expected_size);
16239 case rep_prefix_8_byte:
16240 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16243 case rep_prefix_4_byte:
16244 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16247 case rep_prefix_1_byte:
16248 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
16252 /* Adjust properly the offset of src and dest memory for aliasing. */
16253 if (CONST_INT_P (count_exp))
16254 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
16255 (count / size_needed) * size_needed);
16257 dst = change_address (dst, BLKmode, destreg);
16259 /* Step 4: Epilogue to copy the remaining bytes. */
16263 /* When the main loop is done, COUNT_EXP might hold original count,
16264 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
16265 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
16266 bytes. Compensate if needed. */
16268 if (size_needed < desired_align - align)
16271 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
16272 GEN_INT (size_needed - 1), count_exp, 1,
16274 size_needed = desired_align - align + 1;
16275 if (tmp != count_exp)
16276 emit_move_insn (count_exp, tmp);
16278 emit_label (label);
16279 LABEL_NUSES (label) = 1;
16281 if (count_exp != const0_rtx && epilogue_size_needed > 1)
16283 if (force_loopy_epilogue)
16284 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
16287 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
16290 if (jump_around_label)
16291 emit_label (jump_around_label);
16295 /* Expand the appropriate insns for doing strlen if not just doing
16298 out = result, initialized with the start address
16299 align_rtx = alignment of the address.
16300 scratch = scratch register, initialized with the startaddress when
16301 not aligned, otherwise undefined
16303 This is just the body. It needs the initializations mentioned above and
16304 some address computing at the end. These things are done in i386.md. */
16307 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
16311 rtx align_2_label = NULL_RTX;
16312 rtx align_3_label = NULL_RTX;
16313 rtx align_4_label = gen_label_rtx ();
16314 rtx end_0_label = gen_label_rtx ();
16316 rtx tmpreg = gen_reg_rtx (SImode);
16317 rtx scratch = gen_reg_rtx (SImode);
16321 if (CONST_INT_P (align_rtx))
16322 align = INTVAL (align_rtx);
16324 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
16326 /* Is there a known alignment and is it less than 4? */
16329 rtx scratch1 = gen_reg_rtx (Pmode);
16330 emit_move_insn (scratch1, out);
16331 /* Is there a known alignment and is it not 2? */
16334 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
16335 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
16337 /* Leave just the 3 lower bits. */
16338 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
16339 NULL_RTX, 0, OPTAB_WIDEN);
16341 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16342 Pmode, 1, align_4_label);
16343 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
16344 Pmode, 1, align_2_label);
16345 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
16346 Pmode, 1, align_3_label);
16350 /* Since the alignment is 2, we have to check 2 or 0 bytes;
16351 check if is aligned to 4 - byte. */
16353 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
16354 NULL_RTX, 0, OPTAB_WIDEN);
16356 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
16357 Pmode, 1, align_4_label);
16360 mem = change_address (src, QImode, out);
16362 /* Now compare the bytes. */
16364 /* Compare the first n unaligned byte on a byte per byte basis. */
16365 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
16366 QImode, 1, end_0_label);
16368 /* Increment the address. */
16369 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
16371 /* Not needed with an alignment of 2 */
16374 emit_label (align_2_label);
16376 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16379 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
16381 emit_label (align_3_label);
16384 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
16387 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
16390 /* Generate loop to check 4 bytes at a time. It is not a good idea to
16391 align this loop. It gives only huge programs, but does not help to
16393 emit_label (align_4_label);
16395 mem = change_address (src, SImode, out);
16396 emit_move_insn (scratch, mem);
16397 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
16399 /* This formula yields a nonzero result iff one of the bytes is zero.
16400 This saves three branches inside loop and many cycles. */
16402 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
16403 emit_insn (gen_one_cmplsi2 (scratch, scratch));
16404 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
16405 emit_insn (gen_andsi3 (tmpreg, tmpreg,
16406 gen_int_mode (0x80808080, SImode)));
16407 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
16412 rtx reg = gen_reg_rtx (SImode);
16413 rtx reg2 = gen_reg_rtx (Pmode);
16414 emit_move_insn (reg, tmpreg);
16415 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
16417 /* If zero is not in the first two bytes, move two bytes forward. */
16418 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16419 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16420 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16421 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
16422 gen_rtx_IF_THEN_ELSE (SImode, tmp,
16425 /* Emit lea manually to avoid clobbering of flags. */
16426 emit_insn (gen_rtx_SET (SImode, reg2,
16427 gen_rtx_PLUS (Pmode, out, const2_rtx)));
16429 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16430 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
16431 emit_insn (gen_rtx_SET (VOIDmode, out,
16432 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
16439 rtx end_2_label = gen_label_rtx ();
16440 /* Is zero in the first two bytes? */
16442 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
16443 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
16444 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
16445 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16446 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
16448 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16449 JUMP_LABEL (tmp) = end_2_label;
16451 /* Not in the first two. Move two bytes forward. */
16452 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
16453 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
16455 emit_label (end_2_label);
16459 /* Avoid branch in fixing the byte. */
16460 tmpreg = gen_lowpart (QImode, tmpreg);
16461 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
16462 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
16463 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
16465 emit_label (end_0_label);
16468 /* Expand strlen. */
16471 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
16473 rtx addr, scratch1, scratch2, scratch3, scratch4;
16475 /* The generic case of strlen expander is long. Avoid it's
16476 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
16478 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16479 && !TARGET_INLINE_ALL_STRINGOPS
16481 && (!CONST_INT_P (align) || INTVAL (align) < 4))
16484 addr = force_reg (Pmode, XEXP (src, 0));
16485 scratch1 = gen_reg_rtx (Pmode);
16487 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
16490 /* Well it seems that some optimizer does not combine a call like
16491 foo(strlen(bar), strlen(bar));
16492 when the move and the subtraction is done here. It does calculate
16493 the length just once when these instructions are done inside of
16494 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
16495 often used and I use one fewer register for the lifetime of
16496 output_strlen_unroll() this is better. */
16498 emit_move_insn (out, addr);
16500 ix86_expand_strlensi_unroll_1 (out, src, align);
16502 /* strlensi_unroll_1 returns the address of the zero at the end of
16503 the string, like memchr(), so compute the length by subtracting
16504 the start address. */
16505 emit_insn ((*ix86_gen_sub3) (out, out, addr));
16511 /* Can't use this if the user has appropriated eax, ecx, or edi. */
16512 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
16515 scratch2 = gen_reg_rtx (Pmode);
16516 scratch3 = gen_reg_rtx (Pmode);
16517 scratch4 = force_reg (Pmode, constm1_rtx);
16519 emit_move_insn (scratch3, addr);
16520 eoschar = force_reg (QImode, eoschar);
16522 src = replace_equiv_address_nv (src, scratch3);
16524 /* If .md starts supporting :P, this can be done in .md. */
16525 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
16526 scratch4), UNSPEC_SCAS);
16527 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
16528 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
16529 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
16534 /* For given symbol (function) construct code to compute address of it's PLT
16535 entry in large x86-64 PIC model. */
16537 construct_plt_address (rtx symbol)
16539 rtx tmp = gen_reg_rtx (Pmode);
16540 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
16542 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
16543 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
16545 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
16546 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
16551 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
16552 rtx callarg2 ATTRIBUTE_UNUSED,
16553 rtx pop, int sibcall)
16555 rtx use = NULL, call;
16557 if (pop == const0_rtx)
16559 gcc_assert (!TARGET_64BIT || !pop);
16561 if (TARGET_MACHO && !TARGET_64BIT)
16564 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
16565 fnaddr = machopic_indirect_call_target (fnaddr);
16570 /* Static functions and indirect calls don't need the pic register. */
16571 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
16572 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16573 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
16574 use_reg (&use, pic_offset_table_rtx);
16577 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
16579 rtx al = gen_rtx_REG (QImode, AX_REG);
16580 emit_move_insn (al, callarg2);
16581 use_reg (&use, al);
16584 if (ix86_cmodel == CM_LARGE_PIC
16585 && GET_CODE (fnaddr) == MEM
16586 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
16587 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
16588 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
16589 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
16591 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16592 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16594 if (sibcall && TARGET_64BIT
16595 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
16598 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
16599 fnaddr = gen_rtx_REG (Pmode, R11_REG);
16600 emit_move_insn (fnaddr, addr);
16601 fnaddr = gen_rtx_MEM (QImode, fnaddr);
16604 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
16606 call = gen_rtx_SET (VOIDmode, retval, call);
16609 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
16610 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
16611 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
16614 call = emit_call_insn (call);
16616 CALL_INSN_FUNCTION_USAGE (call) = use;
16620 /* Clear stack slot assignments remembered from previous functions.
16621 This is called from INIT_EXPANDERS once before RTL is emitted for each
16624 static struct machine_function *
16625 ix86_init_machine_status (void)
16627 struct machine_function *f;
16629 f = GGC_CNEW (struct machine_function);
16630 f->use_fast_prologue_epilogue_nregs = -1;
16631 f->tls_descriptor_call_expanded_p = 0;
16632 f->call_abi = DEFAULT_ABI;
16637 /* Return a MEM corresponding to a stack slot with mode MODE.
16638 Allocate a new slot if necessary.
16640 The RTL for a function can have several slots available: N is
16641 which slot to use. */
16644 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
16646 struct stack_local_entry *s;
16648 gcc_assert (n < MAX_386_STACK_LOCALS);
16650 /* Virtual slot is valid only before vregs are instantiated. */
16651 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
16653 for (s = ix86_stack_locals; s; s = s->next)
16654 if (s->mode == mode && s->n == n)
16655 return copy_rtx (s->rtl);
16657 s = (struct stack_local_entry *)
16658 ggc_alloc (sizeof (struct stack_local_entry));
16661 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16663 s->next = ix86_stack_locals;
16664 ix86_stack_locals = s;
16668 /* Construct the SYMBOL_REF for the tls_get_addr function. */
16670 static GTY(()) rtx ix86_tls_symbol;
16672 ix86_tls_get_addr (void)
16675 if (!ix86_tls_symbol)
16677 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
16678 (TARGET_ANY_GNU_TLS
16680 ? "___tls_get_addr"
16681 : "__tls_get_addr");
16684 return ix86_tls_symbol;
16687 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
16689 static GTY(()) rtx ix86_tls_module_base_symbol;
16691 ix86_tls_module_base (void)
16694 if (!ix86_tls_module_base_symbol)
16696 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
16697 "_TLS_MODULE_BASE_");
16698 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
16699 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
16702 return ix86_tls_module_base_symbol;
16705 /* Calculate the length of the memory address in the instruction
16706 encoding. Does not include the one-byte modrm, opcode, or prefix. */
16709 memory_address_length (rtx addr)
16711 struct ix86_address parts;
16712 rtx base, index, disp;
16716 if (GET_CODE (addr) == PRE_DEC
16717 || GET_CODE (addr) == POST_INC
16718 || GET_CODE (addr) == PRE_MODIFY
16719 || GET_CODE (addr) == POST_MODIFY)
16722 ok = ix86_decompose_address (addr, &parts);
16725 if (parts.base && GET_CODE (parts.base) == SUBREG)
16726 parts.base = SUBREG_REG (parts.base);
16727 if (parts.index && GET_CODE (parts.index) == SUBREG)
16728 parts.index = SUBREG_REG (parts.index);
16731 index = parts.index;
16736 - esp as the base always wants an index,
16737 - ebp as the base always wants a displacement. */
16739 /* Register Indirect. */
16740 if (base && !index && !disp)
16742 /* esp (for its index) and ebp (for its displacement) need
16743 the two-byte modrm form. */
16744 if (addr == stack_pointer_rtx
16745 || addr == arg_pointer_rtx
16746 || addr == frame_pointer_rtx
16747 || addr == hard_frame_pointer_rtx)
16751 /* Direct Addressing. */
16752 else if (disp && !base && !index)
16757 /* Find the length of the displacement constant. */
16760 if (base && satisfies_constraint_K (disp))
16765 /* ebp always wants a displacement. */
16766 else if (base == hard_frame_pointer_rtx)
16769 /* An index requires the two-byte modrm form.... */
16771 /* ...like esp, which always wants an index. */
16772 || base == stack_pointer_rtx
16773 || base == arg_pointer_rtx
16774 || base == frame_pointer_rtx)
16781 /* Compute default value for "length_immediate" attribute. When SHORTFORM
16782 is set, expect that insn have 8bit immediate alternative. */
16784 ix86_attr_length_immediate_default (rtx insn, int shortform)
16788 extract_insn_cached (insn);
16789 for (i = recog_data.n_operands - 1; i >= 0; --i)
16790 if (CONSTANT_P (recog_data.operand[i]))
16793 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
16797 switch (get_attr_mode (insn))
16808 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
16813 fatal_insn ("unknown insn mode", insn);
16819 /* Compute default value for "length_address" attribute. */
16821 ix86_attr_length_address_default (rtx insn)
16825 if (get_attr_type (insn) == TYPE_LEA)
16827 rtx set = PATTERN (insn);
16829 if (GET_CODE (set) == PARALLEL)
16830 set = XVECEXP (set, 0, 0);
16832 gcc_assert (GET_CODE (set) == SET);
16834 return memory_address_length (SET_SRC (set));
16837 extract_insn_cached (insn);
16838 for (i = recog_data.n_operands - 1; i >= 0; --i)
16839 if (MEM_P (recog_data.operand[i]))
16841 return memory_address_length (XEXP (recog_data.operand[i], 0));
16847 /* Return the maximum number of instructions a cpu can issue. */
16850 ix86_issue_rate (void)
16854 case PROCESSOR_PENTIUM:
16858 case PROCESSOR_PENTIUMPRO:
16859 case PROCESSOR_PENTIUM4:
16860 case PROCESSOR_ATHLON:
16862 case PROCESSOR_AMDFAM10:
16863 case PROCESSOR_NOCONA:
16864 case PROCESSOR_GENERIC32:
16865 case PROCESSOR_GENERIC64:
16868 case PROCESSOR_CORE2:
16876 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
16877 by DEP_INSN and nothing set by DEP_INSN. */
16880 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16884 /* Simplify the test for uninteresting insns. */
16885 if (insn_type != TYPE_SETCC
16886 && insn_type != TYPE_ICMOV
16887 && insn_type != TYPE_FCMOV
16888 && insn_type != TYPE_IBR)
16891 if ((set = single_set (dep_insn)) != 0)
16893 set = SET_DEST (set);
16896 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
16897 && XVECLEN (PATTERN (dep_insn), 0) == 2
16898 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
16899 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
16901 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16902 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
16907 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
16910 /* This test is true if the dependent insn reads the flags but
16911 not any other potentially set register. */
16912 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
16915 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
16921 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
16922 address with operands set by DEP_INSN. */
16925 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
16929 if (insn_type == TYPE_LEA
16932 addr = PATTERN (insn);
16934 if (GET_CODE (addr) == PARALLEL)
16935 addr = XVECEXP (addr, 0, 0);
16937 gcc_assert (GET_CODE (addr) == SET);
16939 addr = SET_SRC (addr);
16944 extract_insn_cached (insn);
16945 for (i = recog_data.n_operands - 1; i >= 0; --i)
16946 if (MEM_P (recog_data.operand[i]))
16948 addr = XEXP (recog_data.operand[i], 0);
16955 return modified_in_p (addr, dep_insn);
16959 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16961 enum attr_type insn_type, dep_insn_type;
16962 enum attr_memory memory;
16964 int dep_insn_code_number;
16966 /* Anti and output dependencies have zero cost on all CPUs. */
16967 if (REG_NOTE_KIND (link) != 0)
16970 dep_insn_code_number = recog_memoized (dep_insn);
16972 /* If we can't recognize the insns, we can't really do anything. */
16973 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16976 insn_type = get_attr_type (insn);
16977 dep_insn_type = get_attr_type (dep_insn);
16981 case PROCESSOR_PENTIUM:
16982 /* Address Generation Interlock adds a cycle of latency. */
16983 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16986 /* ??? Compares pair with jump/setcc. */
16987 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16990 /* Floating point stores require value to be ready one cycle earlier. */
16991 if (insn_type == TYPE_FMOV
16992 && get_attr_memory (insn) == MEMORY_STORE
16993 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16997 case PROCESSOR_PENTIUMPRO:
16998 memory = get_attr_memory (insn);
17000 /* INT->FP conversion is expensive. */
17001 if (get_attr_fp_int_src (dep_insn))
17004 /* There is one cycle extra latency between an FP op and a store. */
17005 if (insn_type == TYPE_FMOV
17006 && (set = single_set (dep_insn)) != NULL_RTX
17007 && (set2 = single_set (insn)) != NULL_RTX
17008 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
17009 && MEM_P (SET_DEST (set2)))
17012 /* Show ability of reorder buffer to hide latency of load by executing
17013 in parallel with previous instruction in case
17014 previous instruction is not needed to compute the address. */
17015 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
17016 && !ix86_agi_dependent (insn, dep_insn, insn_type))
17018 /* Claim moves to take one cycle, as core can issue one load
17019 at time and the next load can start cycle later. */
17020 if (dep_insn_type == TYPE_IMOV
17021 || dep_insn_type == TYPE_FMOV)
17029 memory = get_attr_memory (insn);
17031 /* The esp dependency is resolved before the instruction is really
17033 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
17034 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
17037 /* INT->FP conversion is expensive. */
17038 if (get_attr_fp_int_src (dep_insn))
17041 /* Show ability of reorder buffer to hide latency of load by executing
17042 in parallel with previous instruction in case
17043 previous instruction is not needed to compute the address. */
17044 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
17045 && !ix86_agi_dependent (insn, dep_insn, insn_type))
17047 /* Claim moves to take one cycle, as core can issue one load
17048 at time and the next load can start cycle later. */
17049 if (dep_insn_type == TYPE_IMOV
17050 || dep_insn_type == TYPE_FMOV)
17059 case PROCESSOR_ATHLON:
17061 case PROCESSOR_AMDFAM10:
17062 case PROCESSOR_GENERIC32:
17063 case PROCESSOR_GENERIC64:
17064 memory = get_attr_memory (insn);
17066 /* Show ability of reorder buffer to hide latency of load by executing
17067 in parallel with previous instruction in case
17068 previous instruction is not needed to compute the address. */
17069 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
17070 && !ix86_agi_dependent (insn, dep_insn, insn_type))
17072 enum attr_unit unit = get_attr_unit (insn);
17075 /* Because of the difference between the length of integer and
17076 floating unit pipeline preparation stages, the memory operands
17077 for floating point are cheaper.
17079 ??? For Athlon it the difference is most probably 2. */
17080 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
17083 loadcost = TARGET_ATHLON ? 2 : 0;
17085 if (cost >= loadcost)
17098 /* How many alternative schedules to try. This should be as wide as the
17099 scheduling freedom in the DFA, but no wider. Making this value too
17100 large results extra work for the scheduler. */
17103 ia32_multipass_dfa_lookahead (void)
17107 case PROCESSOR_PENTIUM:
17110 case PROCESSOR_PENTIUMPRO:
17120 /* Compute the alignment given to a constant that is being placed in memory.
17121 EXP is the constant and ALIGN is the alignment that the object would
17123 The value of this function is used instead of that alignment to align
17127 ix86_constant_alignment (tree exp, int align)
17129 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
17130 || TREE_CODE (exp) == INTEGER_CST)
17132 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
17134 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
17137 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
17138 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
17139 return BITS_PER_WORD;
17144 /* Compute the alignment for a static variable.
17145 TYPE is the data type, and ALIGN is the alignment that
17146 the object would ordinarily have. The value of this function is used
17147 instead of that alignment to align the object. */
17150 ix86_data_alignment (tree type, int align)
17152 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
17154 if (AGGREGATE_TYPE_P (type)
17155 && TYPE_SIZE (type)
17156 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17157 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
17158 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
17159 && align < max_align)
17162 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17163 to 16byte boundary. */
17166 if (AGGREGATE_TYPE_P (type)
17167 && TYPE_SIZE (type)
17168 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17169 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
17170 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17174 if (TREE_CODE (type) == ARRAY_TYPE)
17176 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17178 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17181 else if (TREE_CODE (type) == COMPLEX_TYPE)
17184 if (TYPE_MODE (type) == DCmode && align < 64)
17186 if ((TYPE_MODE (type) == XCmode
17187 || TYPE_MODE (type) == TCmode) && align < 128)
17190 else if ((TREE_CODE (type) == RECORD_TYPE
17191 || TREE_CODE (type) == UNION_TYPE
17192 || TREE_CODE (type) == QUAL_UNION_TYPE)
17193 && TYPE_FIELDS (type))
17195 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17197 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17200 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17201 || TREE_CODE (type) == INTEGER_TYPE)
17203 if (TYPE_MODE (type) == DFmode && align < 64)
17205 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17212 /* Compute the alignment for a local variable or a stack slot. TYPE is
17213 the data type, MODE is the widest mode available and ALIGN is the
17214 alignment that the object would ordinarily have. The value of this
17215 macro is used instead of that alignment to align the object. */
17218 ix86_local_alignment (tree type, enum machine_mode mode,
17219 unsigned int align)
17221 /* If TYPE is NULL, we are allocating a stack slot for caller-save
17222 register in MODE. We will return the largest alignment of XF
17226 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
17227 align = GET_MODE_ALIGNMENT (DFmode);
17231 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
17232 to 16byte boundary. */
17235 if (AGGREGATE_TYPE_P (type)
17236 && TYPE_SIZE (type)
17237 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
17238 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
17239 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
17242 if (TREE_CODE (type) == ARRAY_TYPE)
17244 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
17246 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
17249 else if (TREE_CODE (type) == COMPLEX_TYPE)
17251 if (TYPE_MODE (type) == DCmode && align < 64)
17253 if ((TYPE_MODE (type) == XCmode
17254 || TYPE_MODE (type) == TCmode) && align < 128)
17257 else if ((TREE_CODE (type) == RECORD_TYPE
17258 || TREE_CODE (type) == UNION_TYPE
17259 || TREE_CODE (type) == QUAL_UNION_TYPE)
17260 && TYPE_FIELDS (type))
17262 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
17264 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
17267 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
17268 || TREE_CODE (type) == INTEGER_TYPE)
17271 if (TYPE_MODE (type) == DFmode && align < 64)
17273 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
17279 /* Emit RTL insns to initialize the variable parts of a trampoline.
17280 FNADDR is an RTX for the address of the function's pure code.
17281 CXT is an RTX for the static chain value for the function. */
17283 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
17287 /* Compute offset from the end of the jmp to the target function. */
17288 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
17289 plus_constant (tramp, 10),
17290 NULL_RTX, 1, OPTAB_DIRECT);
17291 emit_move_insn (gen_rtx_MEM (QImode, tramp),
17292 gen_int_mode (0xb9, QImode));
17293 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
17294 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
17295 gen_int_mode (0xe9, QImode));
17296 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
17301 /* Try to load address using shorter movl instead of movabs.
17302 We may want to support movq for kernel mode, but kernel does not use
17303 trampolines at the moment. */
17304 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
17306 fnaddr = copy_to_mode_reg (DImode, fnaddr);
17307 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17308 gen_int_mode (0xbb41, HImode));
17309 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
17310 gen_lowpart (SImode, fnaddr));
17315 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17316 gen_int_mode (0xbb49, HImode));
17317 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17321 /* Load static chain using movabs to r10. */
17322 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17323 gen_int_mode (0xba49, HImode));
17324 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
17327 /* Jump to the r11 */
17328 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
17329 gen_int_mode (0xff49, HImode));
17330 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
17331 gen_int_mode (0xe3, QImode));
17333 gcc_assert (offset <= TRAMPOLINE_SIZE);
17336 #ifdef ENABLE_EXECUTE_STACK
17337 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
17338 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
17342 /* Codes for all the SSE/MMX builtins. */
17345 IX86_BUILTIN_ADDPS,
17346 IX86_BUILTIN_ADDSS,
17347 IX86_BUILTIN_DIVPS,
17348 IX86_BUILTIN_DIVSS,
17349 IX86_BUILTIN_MULPS,
17350 IX86_BUILTIN_MULSS,
17351 IX86_BUILTIN_SUBPS,
17352 IX86_BUILTIN_SUBSS,
17354 IX86_BUILTIN_CMPEQPS,
17355 IX86_BUILTIN_CMPLTPS,
17356 IX86_BUILTIN_CMPLEPS,
17357 IX86_BUILTIN_CMPGTPS,
17358 IX86_BUILTIN_CMPGEPS,
17359 IX86_BUILTIN_CMPNEQPS,
17360 IX86_BUILTIN_CMPNLTPS,
17361 IX86_BUILTIN_CMPNLEPS,
17362 IX86_BUILTIN_CMPNGTPS,
17363 IX86_BUILTIN_CMPNGEPS,
17364 IX86_BUILTIN_CMPORDPS,
17365 IX86_BUILTIN_CMPUNORDPS,
17366 IX86_BUILTIN_CMPEQSS,
17367 IX86_BUILTIN_CMPLTSS,
17368 IX86_BUILTIN_CMPLESS,
17369 IX86_BUILTIN_CMPNEQSS,
17370 IX86_BUILTIN_CMPNLTSS,
17371 IX86_BUILTIN_CMPNLESS,
17372 IX86_BUILTIN_CMPNGTSS,
17373 IX86_BUILTIN_CMPNGESS,
17374 IX86_BUILTIN_CMPORDSS,
17375 IX86_BUILTIN_CMPUNORDSS,
17377 IX86_BUILTIN_COMIEQSS,
17378 IX86_BUILTIN_COMILTSS,
17379 IX86_BUILTIN_COMILESS,
17380 IX86_BUILTIN_COMIGTSS,
17381 IX86_BUILTIN_COMIGESS,
17382 IX86_BUILTIN_COMINEQSS,
17383 IX86_BUILTIN_UCOMIEQSS,
17384 IX86_BUILTIN_UCOMILTSS,
17385 IX86_BUILTIN_UCOMILESS,
17386 IX86_BUILTIN_UCOMIGTSS,
17387 IX86_BUILTIN_UCOMIGESS,
17388 IX86_BUILTIN_UCOMINEQSS,
17390 IX86_BUILTIN_CVTPI2PS,
17391 IX86_BUILTIN_CVTPS2PI,
17392 IX86_BUILTIN_CVTSI2SS,
17393 IX86_BUILTIN_CVTSI642SS,
17394 IX86_BUILTIN_CVTSS2SI,
17395 IX86_BUILTIN_CVTSS2SI64,
17396 IX86_BUILTIN_CVTTPS2PI,
17397 IX86_BUILTIN_CVTTSS2SI,
17398 IX86_BUILTIN_CVTTSS2SI64,
17400 IX86_BUILTIN_MAXPS,
17401 IX86_BUILTIN_MAXSS,
17402 IX86_BUILTIN_MINPS,
17403 IX86_BUILTIN_MINSS,
17405 IX86_BUILTIN_LOADUPS,
17406 IX86_BUILTIN_STOREUPS,
17407 IX86_BUILTIN_MOVSS,
17409 IX86_BUILTIN_MOVHLPS,
17410 IX86_BUILTIN_MOVLHPS,
17411 IX86_BUILTIN_LOADHPS,
17412 IX86_BUILTIN_LOADLPS,
17413 IX86_BUILTIN_STOREHPS,
17414 IX86_BUILTIN_STORELPS,
17416 IX86_BUILTIN_MASKMOVQ,
17417 IX86_BUILTIN_MOVMSKPS,
17418 IX86_BUILTIN_PMOVMSKB,
17420 IX86_BUILTIN_MOVNTPS,
17421 IX86_BUILTIN_MOVNTQ,
17423 IX86_BUILTIN_LOADDQU,
17424 IX86_BUILTIN_STOREDQU,
17426 IX86_BUILTIN_PACKSSWB,
17427 IX86_BUILTIN_PACKSSDW,
17428 IX86_BUILTIN_PACKUSWB,
17430 IX86_BUILTIN_PADDB,
17431 IX86_BUILTIN_PADDW,
17432 IX86_BUILTIN_PADDD,
17433 IX86_BUILTIN_PADDQ,
17434 IX86_BUILTIN_PADDSB,
17435 IX86_BUILTIN_PADDSW,
17436 IX86_BUILTIN_PADDUSB,
17437 IX86_BUILTIN_PADDUSW,
17438 IX86_BUILTIN_PSUBB,
17439 IX86_BUILTIN_PSUBW,
17440 IX86_BUILTIN_PSUBD,
17441 IX86_BUILTIN_PSUBQ,
17442 IX86_BUILTIN_PSUBSB,
17443 IX86_BUILTIN_PSUBSW,
17444 IX86_BUILTIN_PSUBUSB,
17445 IX86_BUILTIN_PSUBUSW,
17448 IX86_BUILTIN_PANDN,
17452 IX86_BUILTIN_PAVGB,
17453 IX86_BUILTIN_PAVGW,
17455 IX86_BUILTIN_PCMPEQB,
17456 IX86_BUILTIN_PCMPEQW,
17457 IX86_BUILTIN_PCMPEQD,
17458 IX86_BUILTIN_PCMPGTB,
17459 IX86_BUILTIN_PCMPGTW,
17460 IX86_BUILTIN_PCMPGTD,
17462 IX86_BUILTIN_PMADDWD,
17464 IX86_BUILTIN_PMAXSW,
17465 IX86_BUILTIN_PMAXUB,
17466 IX86_BUILTIN_PMINSW,
17467 IX86_BUILTIN_PMINUB,
17469 IX86_BUILTIN_PMULHUW,
17470 IX86_BUILTIN_PMULHW,
17471 IX86_BUILTIN_PMULLW,
17473 IX86_BUILTIN_PSADBW,
17474 IX86_BUILTIN_PSHUFW,
17476 IX86_BUILTIN_PSLLW,
17477 IX86_BUILTIN_PSLLD,
17478 IX86_BUILTIN_PSLLQ,
17479 IX86_BUILTIN_PSRAW,
17480 IX86_BUILTIN_PSRAD,
17481 IX86_BUILTIN_PSRLW,
17482 IX86_BUILTIN_PSRLD,
17483 IX86_BUILTIN_PSRLQ,
17484 IX86_BUILTIN_PSLLWI,
17485 IX86_BUILTIN_PSLLDI,
17486 IX86_BUILTIN_PSLLQI,
17487 IX86_BUILTIN_PSRAWI,
17488 IX86_BUILTIN_PSRADI,
17489 IX86_BUILTIN_PSRLWI,
17490 IX86_BUILTIN_PSRLDI,
17491 IX86_BUILTIN_PSRLQI,
17493 IX86_BUILTIN_PUNPCKHBW,
17494 IX86_BUILTIN_PUNPCKHWD,
17495 IX86_BUILTIN_PUNPCKHDQ,
17496 IX86_BUILTIN_PUNPCKLBW,
17497 IX86_BUILTIN_PUNPCKLWD,
17498 IX86_BUILTIN_PUNPCKLDQ,
17500 IX86_BUILTIN_SHUFPS,
17502 IX86_BUILTIN_RCPPS,
17503 IX86_BUILTIN_RCPSS,
17504 IX86_BUILTIN_RSQRTPS,
17505 IX86_BUILTIN_RSQRTPS_NR,
17506 IX86_BUILTIN_RSQRTSS,
17507 IX86_BUILTIN_RSQRTF,
17508 IX86_BUILTIN_SQRTPS,
17509 IX86_BUILTIN_SQRTPS_NR,
17510 IX86_BUILTIN_SQRTSS,
17512 IX86_BUILTIN_UNPCKHPS,
17513 IX86_BUILTIN_UNPCKLPS,
17515 IX86_BUILTIN_ANDPS,
17516 IX86_BUILTIN_ANDNPS,
17518 IX86_BUILTIN_XORPS,
17521 IX86_BUILTIN_LDMXCSR,
17522 IX86_BUILTIN_STMXCSR,
17523 IX86_BUILTIN_SFENCE,
17525 /* 3DNow! Original */
17526 IX86_BUILTIN_FEMMS,
17527 IX86_BUILTIN_PAVGUSB,
17528 IX86_BUILTIN_PF2ID,
17529 IX86_BUILTIN_PFACC,
17530 IX86_BUILTIN_PFADD,
17531 IX86_BUILTIN_PFCMPEQ,
17532 IX86_BUILTIN_PFCMPGE,
17533 IX86_BUILTIN_PFCMPGT,
17534 IX86_BUILTIN_PFMAX,
17535 IX86_BUILTIN_PFMIN,
17536 IX86_BUILTIN_PFMUL,
17537 IX86_BUILTIN_PFRCP,
17538 IX86_BUILTIN_PFRCPIT1,
17539 IX86_BUILTIN_PFRCPIT2,
17540 IX86_BUILTIN_PFRSQIT1,
17541 IX86_BUILTIN_PFRSQRT,
17542 IX86_BUILTIN_PFSUB,
17543 IX86_BUILTIN_PFSUBR,
17544 IX86_BUILTIN_PI2FD,
17545 IX86_BUILTIN_PMULHRW,
17547 /* 3DNow! Athlon Extensions */
17548 IX86_BUILTIN_PF2IW,
17549 IX86_BUILTIN_PFNACC,
17550 IX86_BUILTIN_PFPNACC,
17551 IX86_BUILTIN_PI2FW,
17552 IX86_BUILTIN_PSWAPDSI,
17553 IX86_BUILTIN_PSWAPDSF,
17556 IX86_BUILTIN_ADDPD,
17557 IX86_BUILTIN_ADDSD,
17558 IX86_BUILTIN_DIVPD,
17559 IX86_BUILTIN_DIVSD,
17560 IX86_BUILTIN_MULPD,
17561 IX86_BUILTIN_MULSD,
17562 IX86_BUILTIN_SUBPD,
17563 IX86_BUILTIN_SUBSD,
17565 IX86_BUILTIN_CMPEQPD,
17566 IX86_BUILTIN_CMPLTPD,
17567 IX86_BUILTIN_CMPLEPD,
17568 IX86_BUILTIN_CMPGTPD,
17569 IX86_BUILTIN_CMPGEPD,
17570 IX86_BUILTIN_CMPNEQPD,
17571 IX86_BUILTIN_CMPNLTPD,
17572 IX86_BUILTIN_CMPNLEPD,
17573 IX86_BUILTIN_CMPNGTPD,
17574 IX86_BUILTIN_CMPNGEPD,
17575 IX86_BUILTIN_CMPORDPD,
17576 IX86_BUILTIN_CMPUNORDPD,
17577 IX86_BUILTIN_CMPEQSD,
17578 IX86_BUILTIN_CMPLTSD,
17579 IX86_BUILTIN_CMPLESD,
17580 IX86_BUILTIN_CMPNEQSD,
17581 IX86_BUILTIN_CMPNLTSD,
17582 IX86_BUILTIN_CMPNLESD,
17583 IX86_BUILTIN_CMPORDSD,
17584 IX86_BUILTIN_CMPUNORDSD,
17586 IX86_BUILTIN_COMIEQSD,
17587 IX86_BUILTIN_COMILTSD,
17588 IX86_BUILTIN_COMILESD,
17589 IX86_BUILTIN_COMIGTSD,
17590 IX86_BUILTIN_COMIGESD,
17591 IX86_BUILTIN_COMINEQSD,
17592 IX86_BUILTIN_UCOMIEQSD,
17593 IX86_BUILTIN_UCOMILTSD,
17594 IX86_BUILTIN_UCOMILESD,
17595 IX86_BUILTIN_UCOMIGTSD,
17596 IX86_BUILTIN_UCOMIGESD,
17597 IX86_BUILTIN_UCOMINEQSD,
17599 IX86_BUILTIN_MAXPD,
17600 IX86_BUILTIN_MAXSD,
17601 IX86_BUILTIN_MINPD,
17602 IX86_BUILTIN_MINSD,
17604 IX86_BUILTIN_ANDPD,
17605 IX86_BUILTIN_ANDNPD,
17607 IX86_BUILTIN_XORPD,
17609 IX86_BUILTIN_SQRTPD,
17610 IX86_BUILTIN_SQRTSD,
17612 IX86_BUILTIN_UNPCKHPD,
17613 IX86_BUILTIN_UNPCKLPD,
17615 IX86_BUILTIN_SHUFPD,
17617 IX86_BUILTIN_LOADUPD,
17618 IX86_BUILTIN_STOREUPD,
17619 IX86_BUILTIN_MOVSD,
17621 IX86_BUILTIN_LOADHPD,
17622 IX86_BUILTIN_LOADLPD,
17624 IX86_BUILTIN_CVTDQ2PD,
17625 IX86_BUILTIN_CVTDQ2PS,
17627 IX86_BUILTIN_CVTPD2DQ,
17628 IX86_BUILTIN_CVTPD2PI,
17629 IX86_BUILTIN_CVTPD2PS,
17630 IX86_BUILTIN_CVTTPD2DQ,
17631 IX86_BUILTIN_CVTTPD2PI,
17633 IX86_BUILTIN_CVTPI2PD,
17634 IX86_BUILTIN_CVTSI2SD,
17635 IX86_BUILTIN_CVTSI642SD,
17637 IX86_BUILTIN_CVTSD2SI,
17638 IX86_BUILTIN_CVTSD2SI64,
17639 IX86_BUILTIN_CVTSD2SS,
17640 IX86_BUILTIN_CVTSS2SD,
17641 IX86_BUILTIN_CVTTSD2SI,
17642 IX86_BUILTIN_CVTTSD2SI64,
17644 IX86_BUILTIN_CVTPS2DQ,
17645 IX86_BUILTIN_CVTPS2PD,
17646 IX86_BUILTIN_CVTTPS2DQ,
17648 IX86_BUILTIN_MOVNTI,
17649 IX86_BUILTIN_MOVNTPD,
17650 IX86_BUILTIN_MOVNTDQ,
17653 IX86_BUILTIN_MASKMOVDQU,
17654 IX86_BUILTIN_MOVMSKPD,
17655 IX86_BUILTIN_PMOVMSKB128,
17657 IX86_BUILTIN_PACKSSWB128,
17658 IX86_BUILTIN_PACKSSDW128,
17659 IX86_BUILTIN_PACKUSWB128,
17661 IX86_BUILTIN_PADDB128,
17662 IX86_BUILTIN_PADDW128,
17663 IX86_BUILTIN_PADDD128,
17664 IX86_BUILTIN_PADDQ128,
17665 IX86_BUILTIN_PADDSB128,
17666 IX86_BUILTIN_PADDSW128,
17667 IX86_BUILTIN_PADDUSB128,
17668 IX86_BUILTIN_PADDUSW128,
17669 IX86_BUILTIN_PSUBB128,
17670 IX86_BUILTIN_PSUBW128,
17671 IX86_BUILTIN_PSUBD128,
17672 IX86_BUILTIN_PSUBQ128,
17673 IX86_BUILTIN_PSUBSB128,
17674 IX86_BUILTIN_PSUBSW128,
17675 IX86_BUILTIN_PSUBUSB128,
17676 IX86_BUILTIN_PSUBUSW128,
17678 IX86_BUILTIN_PAND128,
17679 IX86_BUILTIN_PANDN128,
17680 IX86_BUILTIN_POR128,
17681 IX86_BUILTIN_PXOR128,
17683 IX86_BUILTIN_PAVGB128,
17684 IX86_BUILTIN_PAVGW128,
17686 IX86_BUILTIN_PCMPEQB128,
17687 IX86_BUILTIN_PCMPEQW128,
17688 IX86_BUILTIN_PCMPEQD128,
17689 IX86_BUILTIN_PCMPGTB128,
17690 IX86_BUILTIN_PCMPGTW128,
17691 IX86_BUILTIN_PCMPGTD128,
17693 IX86_BUILTIN_PMADDWD128,
17695 IX86_BUILTIN_PMAXSW128,
17696 IX86_BUILTIN_PMAXUB128,
17697 IX86_BUILTIN_PMINSW128,
17698 IX86_BUILTIN_PMINUB128,
17700 IX86_BUILTIN_PMULUDQ,
17701 IX86_BUILTIN_PMULUDQ128,
17702 IX86_BUILTIN_PMULHUW128,
17703 IX86_BUILTIN_PMULHW128,
17704 IX86_BUILTIN_PMULLW128,
17706 IX86_BUILTIN_PSADBW128,
17707 IX86_BUILTIN_PSHUFHW,
17708 IX86_BUILTIN_PSHUFLW,
17709 IX86_BUILTIN_PSHUFD,
17711 IX86_BUILTIN_PSLLDQI128,
17712 IX86_BUILTIN_PSLLWI128,
17713 IX86_BUILTIN_PSLLDI128,
17714 IX86_BUILTIN_PSLLQI128,
17715 IX86_BUILTIN_PSRAWI128,
17716 IX86_BUILTIN_PSRADI128,
17717 IX86_BUILTIN_PSRLDQI128,
17718 IX86_BUILTIN_PSRLWI128,
17719 IX86_BUILTIN_PSRLDI128,
17720 IX86_BUILTIN_PSRLQI128,
17722 IX86_BUILTIN_PSLLDQ128,
17723 IX86_BUILTIN_PSLLW128,
17724 IX86_BUILTIN_PSLLD128,
17725 IX86_BUILTIN_PSLLQ128,
17726 IX86_BUILTIN_PSRAW128,
17727 IX86_BUILTIN_PSRAD128,
17728 IX86_BUILTIN_PSRLW128,
17729 IX86_BUILTIN_PSRLD128,
17730 IX86_BUILTIN_PSRLQ128,
17732 IX86_BUILTIN_PUNPCKHBW128,
17733 IX86_BUILTIN_PUNPCKHWD128,
17734 IX86_BUILTIN_PUNPCKHDQ128,
17735 IX86_BUILTIN_PUNPCKHQDQ128,
17736 IX86_BUILTIN_PUNPCKLBW128,
17737 IX86_BUILTIN_PUNPCKLWD128,
17738 IX86_BUILTIN_PUNPCKLDQ128,
17739 IX86_BUILTIN_PUNPCKLQDQ128,
17741 IX86_BUILTIN_CLFLUSH,
17742 IX86_BUILTIN_MFENCE,
17743 IX86_BUILTIN_LFENCE,
17746 IX86_BUILTIN_ADDSUBPS,
17747 IX86_BUILTIN_HADDPS,
17748 IX86_BUILTIN_HSUBPS,
17749 IX86_BUILTIN_MOVSHDUP,
17750 IX86_BUILTIN_MOVSLDUP,
17751 IX86_BUILTIN_ADDSUBPD,
17752 IX86_BUILTIN_HADDPD,
17753 IX86_BUILTIN_HSUBPD,
17754 IX86_BUILTIN_LDDQU,
17756 IX86_BUILTIN_MONITOR,
17757 IX86_BUILTIN_MWAIT,
17760 IX86_BUILTIN_PHADDW,
17761 IX86_BUILTIN_PHADDD,
17762 IX86_BUILTIN_PHADDSW,
17763 IX86_BUILTIN_PHSUBW,
17764 IX86_BUILTIN_PHSUBD,
17765 IX86_BUILTIN_PHSUBSW,
17766 IX86_BUILTIN_PMADDUBSW,
17767 IX86_BUILTIN_PMULHRSW,
17768 IX86_BUILTIN_PSHUFB,
17769 IX86_BUILTIN_PSIGNB,
17770 IX86_BUILTIN_PSIGNW,
17771 IX86_BUILTIN_PSIGND,
17772 IX86_BUILTIN_PALIGNR,
17773 IX86_BUILTIN_PABSB,
17774 IX86_BUILTIN_PABSW,
17775 IX86_BUILTIN_PABSD,
17777 IX86_BUILTIN_PHADDW128,
17778 IX86_BUILTIN_PHADDD128,
17779 IX86_BUILTIN_PHADDSW128,
17780 IX86_BUILTIN_PHSUBW128,
17781 IX86_BUILTIN_PHSUBD128,
17782 IX86_BUILTIN_PHSUBSW128,
17783 IX86_BUILTIN_PMADDUBSW128,
17784 IX86_BUILTIN_PMULHRSW128,
17785 IX86_BUILTIN_PSHUFB128,
17786 IX86_BUILTIN_PSIGNB128,
17787 IX86_BUILTIN_PSIGNW128,
17788 IX86_BUILTIN_PSIGND128,
17789 IX86_BUILTIN_PALIGNR128,
17790 IX86_BUILTIN_PABSB128,
17791 IX86_BUILTIN_PABSW128,
17792 IX86_BUILTIN_PABSD128,
17794 /* AMDFAM10 - SSE4A New Instructions. */
17795 IX86_BUILTIN_MOVNTSD,
17796 IX86_BUILTIN_MOVNTSS,
17797 IX86_BUILTIN_EXTRQI,
17798 IX86_BUILTIN_EXTRQ,
17799 IX86_BUILTIN_INSERTQI,
17800 IX86_BUILTIN_INSERTQ,
17803 IX86_BUILTIN_BLENDPD,
17804 IX86_BUILTIN_BLENDPS,
17805 IX86_BUILTIN_BLENDVPD,
17806 IX86_BUILTIN_BLENDVPS,
17807 IX86_BUILTIN_PBLENDVB128,
17808 IX86_BUILTIN_PBLENDW128,
17813 IX86_BUILTIN_INSERTPS128,
17815 IX86_BUILTIN_MOVNTDQA,
17816 IX86_BUILTIN_MPSADBW128,
17817 IX86_BUILTIN_PACKUSDW128,
17818 IX86_BUILTIN_PCMPEQQ,
17819 IX86_BUILTIN_PHMINPOSUW128,
17821 IX86_BUILTIN_PMAXSB128,
17822 IX86_BUILTIN_PMAXSD128,
17823 IX86_BUILTIN_PMAXUD128,
17824 IX86_BUILTIN_PMAXUW128,
17826 IX86_BUILTIN_PMINSB128,
17827 IX86_BUILTIN_PMINSD128,
17828 IX86_BUILTIN_PMINUD128,
17829 IX86_BUILTIN_PMINUW128,
17831 IX86_BUILTIN_PMOVSXBW128,
17832 IX86_BUILTIN_PMOVSXBD128,
17833 IX86_BUILTIN_PMOVSXBQ128,
17834 IX86_BUILTIN_PMOVSXWD128,
17835 IX86_BUILTIN_PMOVSXWQ128,
17836 IX86_BUILTIN_PMOVSXDQ128,
17838 IX86_BUILTIN_PMOVZXBW128,
17839 IX86_BUILTIN_PMOVZXBD128,
17840 IX86_BUILTIN_PMOVZXBQ128,
17841 IX86_BUILTIN_PMOVZXWD128,
17842 IX86_BUILTIN_PMOVZXWQ128,
17843 IX86_BUILTIN_PMOVZXDQ128,
17845 IX86_BUILTIN_PMULDQ128,
17846 IX86_BUILTIN_PMULLD128,
17848 IX86_BUILTIN_ROUNDPD,
17849 IX86_BUILTIN_ROUNDPS,
17850 IX86_BUILTIN_ROUNDSD,
17851 IX86_BUILTIN_ROUNDSS,
17853 IX86_BUILTIN_PTESTZ,
17854 IX86_BUILTIN_PTESTC,
17855 IX86_BUILTIN_PTESTNZC,
17857 IX86_BUILTIN_VEC_INIT_V2SI,
17858 IX86_BUILTIN_VEC_INIT_V4HI,
17859 IX86_BUILTIN_VEC_INIT_V8QI,
17860 IX86_BUILTIN_VEC_EXT_V2DF,
17861 IX86_BUILTIN_VEC_EXT_V2DI,
17862 IX86_BUILTIN_VEC_EXT_V4SF,
17863 IX86_BUILTIN_VEC_EXT_V4SI,
17864 IX86_BUILTIN_VEC_EXT_V8HI,
17865 IX86_BUILTIN_VEC_EXT_V2SI,
17866 IX86_BUILTIN_VEC_EXT_V4HI,
17867 IX86_BUILTIN_VEC_EXT_V16QI,
17868 IX86_BUILTIN_VEC_SET_V2DI,
17869 IX86_BUILTIN_VEC_SET_V4SF,
17870 IX86_BUILTIN_VEC_SET_V4SI,
17871 IX86_BUILTIN_VEC_SET_V8HI,
17872 IX86_BUILTIN_VEC_SET_V4HI,
17873 IX86_BUILTIN_VEC_SET_V16QI,
17875 IX86_BUILTIN_VEC_PACK_SFIX,
17878 IX86_BUILTIN_CRC32QI,
17879 IX86_BUILTIN_CRC32HI,
17880 IX86_BUILTIN_CRC32SI,
17881 IX86_BUILTIN_CRC32DI,
17883 IX86_BUILTIN_PCMPESTRI128,
17884 IX86_BUILTIN_PCMPESTRM128,
17885 IX86_BUILTIN_PCMPESTRA128,
17886 IX86_BUILTIN_PCMPESTRC128,
17887 IX86_BUILTIN_PCMPESTRO128,
17888 IX86_BUILTIN_PCMPESTRS128,
17889 IX86_BUILTIN_PCMPESTRZ128,
17890 IX86_BUILTIN_PCMPISTRI128,
17891 IX86_BUILTIN_PCMPISTRM128,
17892 IX86_BUILTIN_PCMPISTRA128,
17893 IX86_BUILTIN_PCMPISTRC128,
17894 IX86_BUILTIN_PCMPISTRO128,
17895 IX86_BUILTIN_PCMPISTRS128,
17896 IX86_BUILTIN_PCMPISTRZ128,
17898 IX86_BUILTIN_PCMPGTQ,
17900 /* AES instructions */
17901 IX86_BUILTIN_AESENC128,
17902 IX86_BUILTIN_AESENCLAST128,
17903 IX86_BUILTIN_AESDEC128,
17904 IX86_BUILTIN_AESDECLAST128,
17905 IX86_BUILTIN_AESIMC128,
17906 IX86_BUILTIN_AESKEYGENASSIST128,
17908 /* PCLMUL instruction */
17909 IX86_BUILTIN_PCLMULQDQ128,
17911 /* TFmode support builtins. */
17913 IX86_BUILTIN_FABSQ,
17914 IX86_BUILTIN_COPYSIGNQ,
17916 /* SSE5 instructions */
17917 IX86_BUILTIN_FMADDSS,
17918 IX86_BUILTIN_FMADDSD,
17919 IX86_BUILTIN_FMADDPS,
17920 IX86_BUILTIN_FMADDPD,
17921 IX86_BUILTIN_FMSUBSS,
17922 IX86_BUILTIN_FMSUBSD,
17923 IX86_BUILTIN_FMSUBPS,
17924 IX86_BUILTIN_FMSUBPD,
17925 IX86_BUILTIN_FNMADDSS,
17926 IX86_BUILTIN_FNMADDSD,
17927 IX86_BUILTIN_FNMADDPS,
17928 IX86_BUILTIN_FNMADDPD,
17929 IX86_BUILTIN_FNMSUBSS,
17930 IX86_BUILTIN_FNMSUBSD,
17931 IX86_BUILTIN_FNMSUBPS,
17932 IX86_BUILTIN_FNMSUBPD,
17933 IX86_BUILTIN_PCMOV_V2DI,
17934 IX86_BUILTIN_PCMOV_V4SI,
17935 IX86_BUILTIN_PCMOV_V8HI,
17936 IX86_BUILTIN_PCMOV_V16QI,
17937 IX86_BUILTIN_PCMOV_V4SF,
17938 IX86_BUILTIN_PCMOV_V2DF,
17939 IX86_BUILTIN_PPERM,
17940 IX86_BUILTIN_PERMPS,
17941 IX86_BUILTIN_PERMPD,
17942 IX86_BUILTIN_PMACSSWW,
17943 IX86_BUILTIN_PMACSWW,
17944 IX86_BUILTIN_PMACSSWD,
17945 IX86_BUILTIN_PMACSWD,
17946 IX86_BUILTIN_PMACSSDD,
17947 IX86_BUILTIN_PMACSDD,
17948 IX86_BUILTIN_PMACSSDQL,
17949 IX86_BUILTIN_PMACSSDQH,
17950 IX86_BUILTIN_PMACSDQL,
17951 IX86_BUILTIN_PMACSDQH,
17952 IX86_BUILTIN_PMADCSSWD,
17953 IX86_BUILTIN_PMADCSWD,
17954 IX86_BUILTIN_PHADDBW,
17955 IX86_BUILTIN_PHADDBD,
17956 IX86_BUILTIN_PHADDBQ,
17957 IX86_BUILTIN_PHADDWD,
17958 IX86_BUILTIN_PHADDWQ,
17959 IX86_BUILTIN_PHADDDQ,
17960 IX86_BUILTIN_PHADDUBW,
17961 IX86_BUILTIN_PHADDUBD,
17962 IX86_BUILTIN_PHADDUBQ,
17963 IX86_BUILTIN_PHADDUWD,
17964 IX86_BUILTIN_PHADDUWQ,
17965 IX86_BUILTIN_PHADDUDQ,
17966 IX86_BUILTIN_PHSUBBW,
17967 IX86_BUILTIN_PHSUBWD,
17968 IX86_BUILTIN_PHSUBDQ,
17969 IX86_BUILTIN_PROTB,
17970 IX86_BUILTIN_PROTW,
17971 IX86_BUILTIN_PROTD,
17972 IX86_BUILTIN_PROTQ,
17973 IX86_BUILTIN_PROTB_IMM,
17974 IX86_BUILTIN_PROTW_IMM,
17975 IX86_BUILTIN_PROTD_IMM,
17976 IX86_BUILTIN_PROTQ_IMM,
17977 IX86_BUILTIN_PSHLB,
17978 IX86_BUILTIN_PSHLW,
17979 IX86_BUILTIN_PSHLD,
17980 IX86_BUILTIN_PSHLQ,
17981 IX86_BUILTIN_PSHAB,
17982 IX86_BUILTIN_PSHAW,
17983 IX86_BUILTIN_PSHAD,
17984 IX86_BUILTIN_PSHAQ,
17985 IX86_BUILTIN_FRCZSS,
17986 IX86_BUILTIN_FRCZSD,
17987 IX86_BUILTIN_FRCZPS,
17988 IX86_BUILTIN_FRCZPD,
17989 IX86_BUILTIN_CVTPH2PS,
17990 IX86_BUILTIN_CVTPS2PH,
17992 IX86_BUILTIN_COMEQSS,
17993 IX86_BUILTIN_COMNESS,
17994 IX86_BUILTIN_COMLTSS,
17995 IX86_BUILTIN_COMLESS,
17996 IX86_BUILTIN_COMGTSS,
17997 IX86_BUILTIN_COMGESS,
17998 IX86_BUILTIN_COMUEQSS,
17999 IX86_BUILTIN_COMUNESS,
18000 IX86_BUILTIN_COMULTSS,
18001 IX86_BUILTIN_COMULESS,
18002 IX86_BUILTIN_COMUGTSS,
18003 IX86_BUILTIN_COMUGESS,
18004 IX86_BUILTIN_COMORDSS,
18005 IX86_BUILTIN_COMUNORDSS,
18006 IX86_BUILTIN_COMFALSESS,
18007 IX86_BUILTIN_COMTRUESS,
18009 IX86_BUILTIN_COMEQSD,
18010 IX86_BUILTIN_COMNESD,
18011 IX86_BUILTIN_COMLTSD,
18012 IX86_BUILTIN_COMLESD,
18013 IX86_BUILTIN_COMGTSD,
18014 IX86_BUILTIN_COMGESD,
18015 IX86_BUILTIN_COMUEQSD,
18016 IX86_BUILTIN_COMUNESD,
18017 IX86_BUILTIN_COMULTSD,
18018 IX86_BUILTIN_COMULESD,
18019 IX86_BUILTIN_COMUGTSD,
18020 IX86_BUILTIN_COMUGESD,
18021 IX86_BUILTIN_COMORDSD,
18022 IX86_BUILTIN_COMUNORDSD,
18023 IX86_BUILTIN_COMFALSESD,
18024 IX86_BUILTIN_COMTRUESD,
18026 IX86_BUILTIN_COMEQPS,
18027 IX86_BUILTIN_COMNEPS,
18028 IX86_BUILTIN_COMLTPS,
18029 IX86_BUILTIN_COMLEPS,
18030 IX86_BUILTIN_COMGTPS,
18031 IX86_BUILTIN_COMGEPS,
18032 IX86_BUILTIN_COMUEQPS,
18033 IX86_BUILTIN_COMUNEPS,
18034 IX86_BUILTIN_COMULTPS,
18035 IX86_BUILTIN_COMULEPS,
18036 IX86_BUILTIN_COMUGTPS,
18037 IX86_BUILTIN_COMUGEPS,
18038 IX86_BUILTIN_COMORDPS,
18039 IX86_BUILTIN_COMUNORDPS,
18040 IX86_BUILTIN_COMFALSEPS,
18041 IX86_BUILTIN_COMTRUEPS,
18043 IX86_BUILTIN_COMEQPD,
18044 IX86_BUILTIN_COMNEPD,
18045 IX86_BUILTIN_COMLTPD,
18046 IX86_BUILTIN_COMLEPD,
18047 IX86_BUILTIN_COMGTPD,
18048 IX86_BUILTIN_COMGEPD,
18049 IX86_BUILTIN_COMUEQPD,
18050 IX86_BUILTIN_COMUNEPD,
18051 IX86_BUILTIN_COMULTPD,
18052 IX86_BUILTIN_COMULEPD,
18053 IX86_BUILTIN_COMUGTPD,
18054 IX86_BUILTIN_COMUGEPD,
18055 IX86_BUILTIN_COMORDPD,
18056 IX86_BUILTIN_COMUNORDPD,
18057 IX86_BUILTIN_COMFALSEPD,
18058 IX86_BUILTIN_COMTRUEPD,
18060 IX86_BUILTIN_PCOMEQUB,
18061 IX86_BUILTIN_PCOMNEUB,
18062 IX86_BUILTIN_PCOMLTUB,
18063 IX86_BUILTIN_PCOMLEUB,
18064 IX86_BUILTIN_PCOMGTUB,
18065 IX86_BUILTIN_PCOMGEUB,
18066 IX86_BUILTIN_PCOMFALSEUB,
18067 IX86_BUILTIN_PCOMTRUEUB,
18068 IX86_BUILTIN_PCOMEQUW,
18069 IX86_BUILTIN_PCOMNEUW,
18070 IX86_BUILTIN_PCOMLTUW,
18071 IX86_BUILTIN_PCOMLEUW,
18072 IX86_BUILTIN_PCOMGTUW,
18073 IX86_BUILTIN_PCOMGEUW,
18074 IX86_BUILTIN_PCOMFALSEUW,
18075 IX86_BUILTIN_PCOMTRUEUW,
18076 IX86_BUILTIN_PCOMEQUD,
18077 IX86_BUILTIN_PCOMNEUD,
18078 IX86_BUILTIN_PCOMLTUD,
18079 IX86_BUILTIN_PCOMLEUD,
18080 IX86_BUILTIN_PCOMGTUD,
18081 IX86_BUILTIN_PCOMGEUD,
18082 IX86_BUILTIN_PCOMFALSEUD,
18083 IX86_BUILTIN_PCOMTRUEUD,
18084 IX86_BUILTIN_PCOMEQUQ,
18085 IX86_BUILTIN_PCOMNEUQ,
18086 IX86_BUILTIN_PCOMLTUQ,
18087 IX86_BUILTIN_PCOMLEUQ,
18088 IX86_BUILTIN_PCOMGTUQ,
18089 IX86_BUILTIN_PCOMGEUQ,
18090 IX86_BUILTIN_PCOMFALSEUQ,
18091 IX86_BUILTIN_PCOMTRUEUQ,
18093 IX86_BUILTIN_PCOMEQB,
18094 IX86_BUILTIN_PCOMNEB,
18095 IX86_BUILTIN_PCOMLTB,
18096 IX86_BUILTIN_PCOMLEB,
18097 IX86_BUILTIN_PCOMGTB,
18098 IX86_BUILTIN_PCOMGEB,
18099 IX86_BUILTIN_PCOMFALSEB,
18100 IX86_BUILTIN_PCOMTRUEB,
18101 IX86_BUILTIN_PCOMEQW,
18102 IX86_BUILTIN_PCOMNEW,
18103 IX86_BUILTIN_PCOMLTW,
18104 IX86_BUILTIN_PCOMLEW,
18105 IX86_BUILTIN_PCOMGTW,
18106 IX86_BUILTIN_PCOMGEW,
18107 IX86_BUILTIN_PCOMFALSEW,
18108 IX86_BUILTIN_PCOMTRUEW,
18109 IX86_BUILTIN_PCOMEQD,
18110 IX86_BUILTIN_PCOMNED,
18111 IX86_BUILTIN_PCOMLTD,
18112 IX86_BUILTIN_PCOMLED,
18113 IX86_BUILTIN_PCOMGTD,
18114 IX86_BUILTIN_PCOMGED,
18115 IX86_BUILTIN_PCOMFALSED,
18116 IX86_BUILTIN_PCOMTRUED,
18117 IX86_BUILTIN_PCOMEQQ,
18118 IX86_BUILTIN_PCOMNEQ,
18119 IX86_BUILTIN_PCOMLTQ,
18120 IX86_BUILTIN_PCOMLEQ,
18121 IX86_BUILTIN_PCOMGTQ,
18122 IX86_BUILTIN_PCOMGEQ,
18123 IX86_BUILTIN_PCOMFALSEQ,
18124 IX86_BUILTIN_PCOMTRUEQ,
18129 /* Table for the ix86 builtin decls. */
18130 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
18132 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
18133 * if the target_flags include one of MASK. Stores the function decl
18134 * in the ix86_builtins array.
18135 * Returns the function decl or NULL_TREE, if the builtin was not added. */
18138 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
18140 tree decl = NULL_TREE;
18142 if (mask & ix86_isa_flags
18143 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
18145 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
18147 ix86_builtins[(int) code] = decl;
18153 /* Like def_builtin, but also marks the function decl "const". */
18156 def_builtin_const (int mask, const char *name, tree type,
18157 enum ix86_builtins code)
18159 tree decl = def_builtin (mask, name, type, code);
18161 TREE_READONLY (decl) = 1;
18165 /* Bits for builtin_description.flag. */
18167 /* Set when we don't support the comparison natively, and should
18168 swap_comparison in order to support it. */
18169 #define BUILTIN_DESC_SWAP_OPERANDS 1
18171 struct builtin_description
18173 const unsigned int mask;
18174 const enum insn_code icode;
18175 const char *const name;
18176 const enum ix86_builtins code;
18177 const enum rtx_code comparison;
18181 static const struct builtin_description bdesc_comi[] =
18183 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
18184 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
18185 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
18186 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
18187 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
18188 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
18189 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
18190 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
18191 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
18192 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
18193 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
18194 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
18195 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
18196 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
18197 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
18198 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
18199 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
18200 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
18201 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
18202 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
18203 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
18204 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
18205 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
18206 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
18209 static const struct builtin_description bdesc_pcmpestr[] =
18212 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
18213 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
18214 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
18215 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
18216 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
18217 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
18218 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
18221 static const struct builtin_description bdesc_pcmpistr[] =
18224 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
18225 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
18226 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
18227 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
18228 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
18229 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
18230 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
18233 /* Special builtin types */
18234 enum ix86_special_builtin_type
18236 SPECIAL_FTYPE_UNKNOWN,
18238 V16QI_FTYPE_PCCHAR,
18239 V4SF_FTYPE_PCFLOAT,
18240 V2DF_FTYPE_PCDOUBLE,
18241 V4SF_FTYPE_V4SF_PCV2SF,
18242 V2DF_FTYPE_V2DF_PCDOUBLE,
18244 VOID_FTYPE_PV2SF_V4SF,
18245 VOID_FTYPE_PV2DI_V2DI,
18246 VOID_FTYPE_PCHAR_V16QI,
18247 VOID_FTYPE_PFLOAT_V4SF,
18248 VOID_FTYPE_PDOUBLE_V2DF,
18250 VOID_FTYPE_PINT_INT
18253 /* Builtin types */
18254 enum ix86_builtin_type
18257 FLOAT128_FTYPE_FLOAT128,
18259 FLOAT128_FTYPE_FLOAT128_FLOAT128,
18260 INT_FTYPE_V2DI_V2DI_PTEST,
18278 V4SF_FTYPE_V4SF_VEC_MERGE,
18286 V2DF_FTYPE_V2DF_VEC_MERGE,
18296 V16QI_FTYPE_V16QI_V16QI,
18297 V16QI_FTYPE_V8HI_V8HI,
18298 V8QI_FTYPE_V8QI_V8QI,
18299 V8QI_FTYPE_V4HI_V4HI,
18300 V8HI_FTYPE_V8HI_V8HI,
18301 V8HI_FTYPE_V8HI_V8HI_COUNT,
18302 V8HI_FTYPE_V16QI_V16QI,
18303 V8HI_FTYPE_V4SI_V4SI,
18304 V8HI_FTYPE_V8HI_SI_COUNT,
18305 V4SI_FTYPE_V4SI_V4SI,
18306 V4SI_FTYPE_V4SI_V4SI_COUNT,
18307 V4SI_FTYPE_V8HI_V8HI,
18308 V4SI_FTYPE_V4SF_V4SF,
18309 V4SI_FTYPE_V2DF_V2DF,
18310 V4SI_FTYPE_V4SI_SI_COUNT,
18311 V4HI_FTYPE_V4HI_V4HI,
18312 V4HI_FTYPE_V4HI_V4HI_COUNT,
18313 V4HI_FTYPE_V8QI_V8QI,
18314 V4HI_FTYPE_V2SI_V2SI,
18315 V4HI_FTYPE_V4HI_SI_COUNT,
18316 V4SF_FTYPE_V4SF_V4SF,
18317 V4SF_FTYPE_V4SF_V4SF_SWAP,
18318 V4SF_FTYPE_V4SF_V2SI,
18319 V4SF_FTYPE_V4SF_V2DF,
18320 V4SF_FTYPE_V4SF_DI,
18321 V4SF_FTYPE_V4SF_SI,
18322 V2DI_FTYPE_V2DI_V2DI,
18323 V2DI_FTYPE_V2DI_V2DI_COUNT,
18324 V2DI_FTYPE_V16QI_V16QI,
18325 V2DI_FTYPE_V4SI_V4SI,
18326 V2DI_FTYPE_V2DI_V16QI,
18327 V2DI_FTYPE_V2DF_V2DF,
18328 V2DI_FTYPE_V2DI_SI_COUNT,
18329 V2SI_FTYPE_V2SI_V2SI,
18330 V2SI_FTYPE_V2SI_V2SI_COUNT,
18331 V2SI_FTYPE_V4HI_V4HI,
18332 V2SI_FTYPE_V2SF_V2SF,
18333 V2SI_FTYPE_V2SI_SI_COUNT,
18334 V2DF_FTYPE_V2DF_V2DF,
18335 V2DF_FTYPE_V2DF_V2DF_SWAP,
18336 V2DF_FTYPE_V2DF_V4SF,
18337 V2DF_FTYPE_V2DF_DI,
18338 V2DF_FTYPE_V2DF_SI,
18339 V2SF_FTYPE_V2SF_V2SF,
18340 V1DI_FTYPE_V1DI_V1DI,
18341 V1DI_FTYPE_V1DI_V1DI_COUNT,
18342 V1DI_FTYPE_V8QI_V8QI,
18343 V1DI_FTYPE_V2SI_V2SI,
18344 V1DI_FTYPE_V1DI_SI_COUNT,
18345 UINT64_FTYPE_UINT64_UINT64,
18346 UINT_FTYPE_UINT_UINT,
18347 UINT_FTYPE_UINT_USHORT,
18348 UINT_FTYPE_UINT_UCHAR,
18349 V8HI_FTYPE_V8HI_INT,
18350 V4SI_FTYPE_V4SI_INT,
18351 V4HI_FTYPE_V4HI_INT,
18352 V4SF_FTYPE_V4SF_INT,
18353 V2DI_FTYPE_V2DI_INT,
18354 V2DI2TI_FTYPE_V2DI_INT,
18355 V2DF_FTYPE_V2DF_INT,
18356 V16QI_FTYPE_V16QI_V16QI_V16QI,
18357 V4SF_FTYPE_V4SF_V4SF_V4SF,
18358 V2DF_FTYPE_V2DF_V2DF_V2DF,
18359 V16QI_FTYPE_V16QI_V16QI_INT,
18360 V8HI_FTYPE_V8HI_V8HI_INT,
18361 V4SI_FTYPE_V4SI_V4SI_INT,
18362 V4SF_FTYPE_V4SF_V4SF_INT,
18363 V2DI_FTYPE_V2DI_V2DI_INT,
18364 V2DI2TI_FTYPE_V2DI_V2DI_INT,
18365 V1DI2DI_FTYPE_V1DI_V1DI_INT,
18366 V2DF_FTYPE_V2DF_V2DF_INT,
18367 V2DI_FTYPE_V2DI_UINT_UINT,
18368 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
18371 /* Special builtins with variable number of arguments. */
18372 static const struct builtin_description bdesc_special_args[] =
18375 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18378 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
18381 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18382 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18383 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
18385 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18386 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
18387 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18388 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
18390 /* SSE or 3DNow!A */
18391 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18392 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
18395 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18396 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
18397 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
18399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
18401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
18402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
18403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18406 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
18409 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
18412 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
18415 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
18416 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
18419 /* Builtins with variable number of arguments. */
18420 static const struct builtin_description bdesc_args[] =
18423 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18424 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18425 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18426 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18427 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18428 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18430 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18431 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18432 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18433 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18434 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18435 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18436 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18437 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18439 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18440 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18442 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18443 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18444 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18445 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18447 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18448 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18449 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18450 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18451 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18452 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18454 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18455 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18456 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18457 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18458 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
18459 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
18461 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18462 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
18463 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
18465 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
18467 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18468 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18469 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18470 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18471 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18472 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18474 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18475 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18476 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
18477 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18478 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18479 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
18481 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
18482 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
18483 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
18484 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
18487 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18488 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18489 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18490 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18492 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18493 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18494 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18495 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18496 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18497 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
18498 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18499 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18500 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18501 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18502 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18503 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18504 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18505 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18506 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18509 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
18510 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
18511 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18512 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
18513 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18514 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
18517 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
18518 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18519 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18521 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18522 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18523 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18524 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18525 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18526 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
18527 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
18528 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
18530 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18532 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18533 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18534 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18535 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18536 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18537 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18538 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18539 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18541 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18542 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18543 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18544 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18545 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18546 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18547 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18548 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18549 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18550 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18551 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
18552 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18553 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
18554 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
18555 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
18556 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18557 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
18558 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
18559 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
18560 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18561 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
18562 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
18564 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18565 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18569 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18571 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18572 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18575 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18576 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18577 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18578 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18580 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
18581 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
18582 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
18584 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
18586 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
18590 /* SSE MMX or 3Dnow!A */
18591 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18592 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18593 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18595 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18596 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18597 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18598 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18600 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
18601 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
18603 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
18606 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
18609 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
18610 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
18611 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
18612 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
18614 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18615 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18616 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
18617 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
18618 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
18620 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
18622 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18623 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
18624 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18625 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
18627 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18628 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
18629 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
18631 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18632 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18633 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18634 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18635 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18636 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18637 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18638 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18640 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18641 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18642 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18643 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18644 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
18645 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18646 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18647 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18648 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18649 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18650 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
18651 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
18653 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
18654 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
18655 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18656 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
18657 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
18658 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
18659 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
18661 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18662 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18663 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18664 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18666 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18668 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18669 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18673 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18675 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
18677 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18678 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18679 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18680 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18681 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18682 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18683 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18684 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18686 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18690 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18691 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18692 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18693 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18695 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18696 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
18698 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18699 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18700 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18701 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18713 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18714 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18715 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18716 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18722 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18725 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18728 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18729 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
18731 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18732 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
18734 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
18735 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18737 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
18739 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
18740 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
18741 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
18742 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
18744 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18745 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18746 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18747 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18748 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18749 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18750 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
18753 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18754 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18755 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
18756 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18757 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18758 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
18760 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
18761 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
18762 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
18763 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
18765 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
18766 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18767 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
18769 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
18771 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
18772 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
18775 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18776 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
18779 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
18780 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
18782 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18783 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18784 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18785 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18786 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
18787 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
18790 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
18791 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
18792 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18793 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
18794 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
18795 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
18797 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18798 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18799 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18800 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18801 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18802 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18803 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18804 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18805 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18806 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18807 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18808 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18809 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
18810 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
18811 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18812 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18813 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18814 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18815 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18816 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
18817 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18818 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
18819 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18820 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
18823 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
18824 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
18827 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18828 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18829 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
18830 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
18831 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18832 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18833 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18834 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
18835 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
18836 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
18838 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18839 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18840 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18841 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18842 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18843 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18844 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
18845 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
18846 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
18847 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
18848 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
18849 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
18850 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
18852 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
18853 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18854 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18855 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18856 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18857 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18858 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
18859 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18860 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18861 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
18862 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
18863 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
18865 /* SSE4.1 and SSE5 */
18866 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
18867 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
18868 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
18869 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
18871 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18872 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18873 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
18876 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18877 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
18878 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
18879 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
18880 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
18883 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
18884 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
18885 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
18886 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18889 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
18890 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
18892 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18893 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18894 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18895 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
18898 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
18902 enum multi_arg_type {
18912 MULTI_ARG_3_PERMPS,
18913 MULTI_ARG_3_PERMPD,
18920 MULTI_ARG_2_DI_IMM,
18921 MULTI_ARG_2_SI_IMM,
18922 MULTI_ARG_2_HI_IMM,
18923 MULTI_ARG_2_QI_IMM,
18924 MULTI_ARG_2_SF_CMP,
18925 MULTI_ARG_2_DF_CMP,
18926 MULTI_ARG_2_DI_CMP,
18927 MULTI_ARG_2_SI_CMP,
18928 MULTI_ARG_2_HI_CMP,
18929 MULTI_ARG_2_QI_CMP,
18952 static const struct builtin_description bdesc_multi_arg[] =
18954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
18955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
18956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
18957 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
18958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
18959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
18960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
18961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
18962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
18963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
18964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
18965 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
18966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
18967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
18968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
18969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
18970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
18972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
18973 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
18974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
18975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
18976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
18977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
18978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
18979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
18980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
18981 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
18982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
18984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
18985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
18986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
18989 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
18990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
18991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
18992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
18993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
18994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
18995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
18996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
18997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
18998 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
18999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
19000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
19001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
19002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
19003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
19004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
19005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
19006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
19007 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
19008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
19009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
19010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
19011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
19012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
19013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
19014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
19015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
19016 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
19017 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
19018 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
19019 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
19020 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
19021 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
19022 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
19023 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
19024 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
19025 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
19026 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
19027 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
19028 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
19030 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
19031 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
19032 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
19033 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
19034 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
19035 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
19036 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
19037 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
19038 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
19039 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
19040 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
19041 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
19042 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
19043 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
19044 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
19045 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
19047 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
19048 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
19049 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
19050 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
19051 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
19052 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
19053 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
19054 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
19055 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19056 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19057 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
19058 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
19059 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
19060 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
19061 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
19062 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
19064 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
19065 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
19066 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
19067 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
19068 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
19069 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
19070 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
19071 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
19072 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
19073 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
19074 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
19075 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
19076 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
19077 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
19078 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
19079 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
19081 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
19082 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
19083 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
19084 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
19085 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
19086 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
19087 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
19088 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
19089 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19090 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
19091 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
19092 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
19093 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
19094 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
19095 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
19096 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
19098 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
19099 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19100 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
19101 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
19102 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
19103 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
19104 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
19106 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
19107 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19108 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
19109 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
19110 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
19111 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
19112 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
19114 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
19115 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19116 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
19117 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
19118 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
19119 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
19120 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
19122 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
19123 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
19124 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
19125 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
19126 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
19127 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
19128 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
19130 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
19131 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
19132 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
19133 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
19134 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
19135 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
19136 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
19138 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
19139 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
19140 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
19141 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
19142 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
19143 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
19144 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
19146 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
19147 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
19148 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
19149 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
19150 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
19151 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
19152 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
19154 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
19155 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
19156 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
19157 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
19158 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
19159 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
19160 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
19162 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
19163 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
19164 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
19165 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
19166 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
19167 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
19168 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
19169 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
19171 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
19172 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
19173 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
19174 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
19175 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
19176 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
19177 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
19178 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
19180 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
19181 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
19182 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
19183 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
19184 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
19185 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
19186 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
19187 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
19190 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
19191 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
19194 ix86_init_mmx_sse_builtins (void)
19196 const struct builtin_description * d;
19199 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
19200 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
19201 tree V1DI_type_node
19202 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
19203 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
19204 tree V2DI_type_node
19205 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
19206 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
19207 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
19208 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
19209 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
19210 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
19211 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
19213 tree pchar_type_node = build_pointer_type (char_type_node);
19214 tree pcchar_type_node
19215 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
19216 tree pfloat_type_node = build_pointer_type (float_type_node);
19217 tree pcfloat_type_node
19218 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
19219 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
19220 tree pcv2sf_type_node
19221 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
19222 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
19223 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
19226 tree int_ftype_v4sf_v4sf
19227 = build_function_type_list (integer_type_node,
19228 V4SF_type_node, V4SF_type_node, NULL_TREE);
19229 tree v4si_ftype_v4sf_v4sf
19230 = build_function_type_list (V4SI_type_node,
19231 V4SF_type_node, V4SF_type_node, NULL_TREE);
19232 /* MMX/SSE/integer conversions. */
19233 tree int_ftype_v4sf
19234 = build_function_type_list (integer_type_node,
19235 V4SF_type_node, NULL_TREE);
19236 tree int64_ftype_v4sf
19237 = build_function_type_list (long_long_integer_type_node,
19238 V4SF_type_node, NULL_TREE);
19239 tree int_ftype_v8qi
19240 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
19241 tree v4sf_ftype_v4sf_int
19242 = build_function_type_list (V4SF_type_node,
19243 V4SF_type_node, integer_type_node, NULL_TREE);
19244 tree v4sf_ftype_v4sf_int64
19245 = build_function_type_list (V4SF_type_node,
19246 V4SF_type_node, long_long_integer_type_node,
19248 tree v4sf_ftype_v4sf_v2si
19249 = build_function_type_list (V4SF_type_node,
19250 V4SF_type_node, V2SI_type_node, NULL_TREE);
19252 /* Miscellaneous. */
19253 tree v8qi_ftype_v4hi_v4hi
19254 = build_function_type_list (V8QI_type_node,
19255 V4HI_type_node, V4HI_type_node, NULL_TREE);
19256 tree v4hi_ftype_v2si_v2si
19257 = build_function_type_list (V4HI_type_node,
19258 V2SI_type_node, V2SI_type_node, NULL_TREE);
19259 tree v4sf_ftype_v4sf_v4sf_int
19260 = build_function_type_list (V4SF_type_node,
19261 V4SF_type_node, V4SF_type_node,
19262 integer_type_node, NULL_TREE);
19263 tree v2si_ftype_v4hi_v4hi
19264 = build_function_type_list (V2SI_type_node,
19265 V4HI_type_node, V4HI_type_node, NULL_TREE);
19266 tree v4hi_ftype_v4hi_int
19267 = build_function_type_list (V4HI_type_node,
19268 V4HI_type_node, integer_type_node, NULL_TREE);
19269 tree v2si_ftype_v2si_int
19270 = build_function_type_list (V2SI_type_node,
19271 V2SI_type_node, integer_type_node, NULL_TREE);
19272 tree v1di_ftype_v1di_int
19273 = build_function_type_list (V1DI_type_node,
19274 V1DI_type_node, integer_type_node, NULL_TREE);
19276 tree void_ftype_void
19277 = build_function_type (void_type_node, void_list_node);
19278 tree void_ftype_unsigned
19279 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
19280 tree void_ftype_unsigned_unsigned
19281 = build_function_type_list (void_type_node, unsigned_type_node,
19282 unsigned_type_node, NULL_TREE);
19283 tree void_ftype_pcvoid_unsigned_unsigned
19284 = build_function_type_list (void_type_node, const_ptr_type_node,
19285 unsigned_type_node, unsigned_type_node,
19287 tree unsigned_ftype_void
19288 = build_function_type (unsigned_type_node, void_list_node);
19289 tree v2si_ftype_v4sf
19290 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
19291 /* Loads/stores. */
19292 tree void_ftype_v8qi_v8qi_pchar
19293 = build_function_type_list (void_type_node,
19294 V8QI_type_node, V8QI_type_node,
19295 pchar_type_node, NULL_TREE);
19296 tree v4sf_ftype_pcfloat
19297 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
19298 tree v4sf_ftype_v4sf_pcv2sf
19299 = build_function_type_list (V4SF_type_node,
19300 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
19301 tree void_ftype_pv2sf_v4sf
19302 = build_function_type_list (void_type_node,
19303 pv2sf_type_node, V4SF_type_node, NULL_TREE);
19304 tree void_ftype_pfloat_v4sf
19305 = build_function_type_list (void_type_node,
19306 pfloat_type_node, V4SF_type_node, NULL_TREE);
19307 tree void_ftype_pdi_di
19308 = build_function_type_list (void_type_node,
19309 pdi_type_node, long_long_unsigned_type_node,
19311 tree void_ftype_pv2di_v2di
19312 = build_function_type_list (void_type_node,
19313 pv2di_type_node, V2DI_type_node, NULL_TREE);
19314 /* Normal vector unops. */
19315 tree v4sf_ftype_v4sf
19316 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
19317 tree v16qi_ftype_v16qi
19318 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
19319 tree v8hi_ftype_v8hi
19320 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
19321 tree v4si_ftype_v4si
19322 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
19323 tree v8qi_ftype_v8qi
19324 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
19325 tree v4hi_ftype_v4hi
19326 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
19328 /* Normal vector binops. */
19329 tree v4sf_ftype_v4sf_v4sf
19330 = build_function_type_list (V4SF_type_node,
19331 V4SF_type_node, V4SF_type_node, NULL_TREE);
19332 tree v8qi_ftype_v8qi_v8qi
19333 = build_function_type_list (V8QI_type_node,
19334 V8QI_type_node, V8QI_type_node, NULL_TREE);
19335 tree v4hi_ftype_v4hi_v4hi
19336 = build_function_type_list (V4HI_type_node,
19337 V4HI_type_node, V4HI_type_node, NULL_TREE);
19338 tree v2si_ftype_v2si_v2si
19339 = build_function_type_list (V2SI_type_node,
19340 V2SI_type_node, V2SI_type_node, NULL_TREE);
19341 tree v1di_ftype_v1di_v1di
19342 = build_function_type_list (V1DI_type_node,
19343 V1DI_type_node, V1DI_type_node, NULL_TREE);
19344 tree v1di_ftype_v1di_v1di_int
19345 = build_function_type_list (V1DI_type_node,
19346 V1DI_type_node, V1DI_type_node,
19347 integer_type_node, NULL_TREE);
19348 tree v2si_ftype_v2sf
19349 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
19350 tree v2sf_ftype_v2si
19351 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
19352 tree v2si_ftype_v2si
19353 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
19354 tree v2sf_ftype_v2sf
19355 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
19356 tree v2sf_ftype_v2sf_v2sf
19357 = build_function_type_list (V2SF_type_node,
19358 V2SF_type_node, V2SF_type_node, NULL_TREE);
19359 tree v2si_ftype_v2sf_v2sf
19360 = build_function_type_list (V2SI_type_node,
19361 V2SF_type_node, V2SF_type_node, NULL_TREE);
19362 tree pint_type_node = build_pointer_type (integer_type_node);
19363 tree pdouble_type_node = build_pointer_type (double_type_node);
19364 tree pcdouble_type_node = build_pointer_type (
19365 build_type_variant (double_type_node, 1, 0));
19366 tree int_ftype_v2df_v2df
19367 = build_function_type_list (integer_type_node,
19368 V2DF_type_node, V2DF_type_node, NULL_TREE);
19370 tree void_ftype_pcvoid
19371 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
19372 tree v4sf_ftype_v4si
19373 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
19374 tree v4si_ftype_v4sf
19375 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
19376 tree v2df_ftype_v4si
19377 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
19378 tree v4si_ftype_v2df
19379 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
19380 tree v4si_ftype_v2df_v2df
19381 = build_function_type_list (V4SI_type_node,
19382 V2DF_type_node, V2DF_type_node, NULL_TREE);
19383 tree v2si_ftype_v2df
19384 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
19385 tree v4sf_ftype_v2df
19386 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
19387 tree v2df_ftype_v2si
19388 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
19389 tree v2df_ftype_v4sf
19390 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
19391 tree int_ftype_v2df
19392 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
19393 tree int64_ftype_v2df
19394 = build_function_type_list (long_long_integer_type_node,
19395 V2DF_type_node, NULL_TREE);
19396 tree v2df_ftype_v2df_int
19397 = build_function_type_list (V2DF_type_node,
19398 V2DF_type_node, integer_type_node, NULL_TREE);
19399 tree v2df_ftype_v2df_int64
19400 = build_function_type_list (V2DF_type_node,
19401 V2DF_type_node, long_long_integer_type_node,
19403 tree v4sf_ftype_v4sf_v2df
19404 = build_function_type_list (V4SF_type_node,
19405 V4SF_type_node, V2DF_type_node, NULL_TREE);
19406 tree v2df_ftype_v2df_v4sf
19407 = build_function_type_list (V2DF_type_node,
19408 V2DF_type_node, V4SF_type_node, NULL_TREE);
19409 tree v2df_ftype_v2df_v2df_int
19410 = build_function_type_list (V2DF_type_node,
19411 V2DF_type_node, V2DF_type_node,
19414 tree v2df_ftype_v2df_pcdouble
19415 = build_function_type_list (V2DF_type_node,
19416 V2DF_type_node, pcdouble_type_node, NULL_TREE);
19417 tree void_ftype_pdouble_v2df
19418 = build_function_type_list (void_type_node,
19419 pdouble_type_node, V2DF_type_node, NULL_TREE);
19420 tree void_ftype_pint_int
19421 = build_function_type_list (void_type_node,
19422 pint_type_node, integer_type_node, NULL_TREE);
19423 tree void_ftype_v16qi_v16qi_pchar
19424 = build_function_type_list (void_type_node,
19425 V16QI_type_node, V16QI_type_node,
19426 pchar_type_node, NULL_TREE);
19427 tree v2df_ftype_pcdouble
19428 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
19429 tree v2df_ftype_v2df_v2df
19430 = build_function_type_list (V2DF_type_node,
19431 V2DF_type_node, V2DF_type_node, NULL_TREE);
19432 tree v16qi_ftype_v16qi_v16qi
19433 = build_function_type_list (V16QI_type_node,
19434 V16QI_type_node, V16QI_type_node, NULL_TREE);
19435 tree v8hi_ftype_v8hi_v8hi
19436 = build_function_type_list (V8HI_type_node,
19437 V8HI_type_node, V8HI_type_node, NULL_TREE);
19438 tree v4si_ftype_v4si_v4si
19439 = build_function_type_list (V4SI_type_node,
19440 V4SI_type_node, V4SI_type_node, NULL_TREE);
19441 tree v2di_ftype_v2di_v2di
19442 = build_function_type_list (V2DI_type_node,
19443 V2DI_type_node, V2DI_type_node, NULL_TREE);
19444 tree v2di_ftype_v2df_v2df
19445 = build_function_type_list (V2DI_type_node,
19446 V2DF_type_node, V2DF_type_node, NULL_TREE);
19447 tree v2df_ftype_v2df
19448 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
19449 tree v2di_ftype_v2di_int
19450 = build_function_type_list (V2DI_type_node,
19451 V2DI_type_node, integer_type_node, NULL_TREE);
19452 tree v2di_ftype_v2di_v2di_int
19453 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19454 V2DI_type_node, integer_type_node, NULL_TREE);
19455 tree v4si_ftype_v4si_int
19456 = build_function_type_list (V4SI_type_node,
19457 V4SI_type_node, integer_type_node, NULL_TREE);
19458 tree v8hi_ftype_v8hi_int
19459 = build_function_type_list (V8HI_type_node,
19460 V8HI_type_node, integer_type_node, NULL_TREE);
19461 tree v4si_ftype_v8hi_v8hi
19462 = build_function_type_list (V4SI_type_node,
19463 V8HI_type_node, V8HI_type_node, NULL_TREE);
19464 tree v1di_ftype_v8qi_v8qi
19465 = build_function_type_list (V1DI_type_node,
19466 V8QI_type_node, V8QI_type_node, NULL_TREE);
19467 tree v1di_ftype_v2si_v2si
19468 = build_function_type_list (V1DI_type_node,
19469 V2SI_type_node, V2SI_type_node, NULL_TREE);
19470 tree v2di_ftype_v16qi_v16qi
19471 = build_function_type_list (V2DI_type_node,
19472 V16QI_type_node, V16QI_type_node, NULL_TREE);
19473 tree v2di_ftype_v4si_v4si
19474 = build_function_type_list (V2DI_type_node,
19475 V4SI_type_node, V4SI_type_node, NULL_TREE);
19476 tree int_ftype_v16qi
19477 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
19478 tree v16qi_ftype_pcchar
19479 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
19480 tree void_ftype_pchar_v16qi
19481 = build_function_type_list (void_type_node,
19482 pchar_type_node, V16QI_type_node, NULL_TREE);
19484 tree v2di_ftype_v2di_unsigned_unsigned
19485 = build_function_type_list (V2DI_type_node, V2DI_type_node,
19486 unsigned_type_node, unsigned_type_node,
19488 tree v2di_ftype_v2di_v2di_unsigned_unsigned
19489 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
19490 unsigned_type_node, unsigned_type_node,
19492 tree v2di_ftype_v2di_v16qi
19493 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
19495 tree v2df_ftype_v2df_v2df_v2df
19496 = build_function_type_list (V2DF_type_node,
19497 V2DF_type_node, V2DF_type_node,
19498 V2DF_type_node, NULL_TREE);
19499 tree v4sf_ftype_v4sf_v4sf_v4sf
19500 = build_function_type_list (V4SF_type_node,
19501 V4SF_type_node, V4SF_type_node,
19502 V4SF_type_node, NULL_TREE);
19503 tree v8hi_ftype_v16qi
19504 = build_function_type_list (V8HI_type_node, V16QI_type_node,
19506 tree v4si_ftype_v16qi
19507 = build_function_type_list (V4SI_type_node, V16QI_type_node,
19509 tree v2di_ftype_v16qi
19510 = build_function_type_list (V2DI_type_node, V16QI_type_node,
19512 tree v4si_ftype_v8hi
19513 = build_function_type_list (V4SI_type_node, V8HI_type_node,
19515 tree v2di_ftype_v8hi
19516 = build_function_type_list (V2DI_type_node, V8HI_type_node,
19518 tree v2di_ftype_v4si
19519 = build_function_type_list (V2DI_type_node, V4SI_type_node,
19521 tree v2di_ftype_pv2di
19522 = build_function_type_list (V2DI_type_node, pv2di_type_node,
19524 tree v16qi_ftype_v16qi_v16qi_int
19525 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19526 V16QI_type_node, integer_type_node,
19528 tree v16qi_ftype_v16qi_v16qi_v16qi
19529 = build_function_type_list (V16QI_type_node, V16QI_type_node,
19530 V16QI_type_node, V16QI_type_node,
19532 tree v8hi_ftype_v8hi_v8hi_int
19533 = build_function_type_list (V8HI_type_node, V8HI_type_node,
19534 V8HI_type_node, integer_type_node,
19536 tree v4si_ftype_v4si_v4si_int
19537 = build_function_type_list (V4SI_type_node, V4SI_type_node,
19538 V4SI_type_node, integer_type_node,
19540 tree int_ftype_v2di_v2di
19541 = build_function_type_list (integer_type_node,
19542 V2DI_type_node, V2DI_type_node,
19544 tree int_ftype_v16qi_int_v16qi_int_int
19545 = build_function_type_list (integer_type_node,
19552 tree v16qi_ftype_v16qi_int_v16qi_int_int
19553 = build_function_type_list (V16QI_type_node,
19560 tree int_ftype_v16qi_v16qi_int
19561 = build_function_type_list (integer_type_node,
19567 /* SSE5 instructions */
19568 tree v2di_ftype_v2di_v2di_v2di
19569 = build_function_type_list (V2DI_type_node,
19575 tree v4si_ftype_v4si_v4si_v4si
19576 = build_function_type_list (V4SI_type_node,
19582 tree v4si_ftype_v4si_v4si_v2di
19583 = build_function_type_list (V4SI_type_node,
19589 tree v8hi_ftype_v8hi_v8hi_v8hi
19590 = build_function_type_list (V8HI_type_node,
19596 tree v8hi_ftype_v8hi_v8hi_v4si
19597 = build_function_type_list (V8HI_type_node,
19603 tree v2df_ftype_v2df_v2df_v16qi
19604 = build_function_type_list (V2DF_type_node,
19610 tree v4sf_ftype_v4sf_v4sf_v16qi
19611 = build_function_type_list (V4SF_type_node,
19617 tree v2di_ftype_v2di_si
19618 = build_function_type_list (V2DI_type_node,
19623 tree v4si_ftype_v4si_si
19624 = build_function_type_list (V4SI_type_node,
19629 tree v8hi_ftype_v8hi_si
19630 = build_function_type_list (V8HI_type_node,
19635 tree v16qi_ftype_v16qi_si
19636 = build_function_type_list (V16QI_type_node,
19640 tree v4sf_ftype_v4hi
19641 = build_function_type_list (V4SF_type_node,
19645 tree v4hi_ftype_v4sf
19646 = build_function_type_list (V4HI_type_node,
19650 tree v2di_ftype_v2di
19651 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
19653 tree v16qi_ftype_v8hi_v8hi
19654 = build_function_type_list (V16QI_type_node,
19655 V8HI_type_node, V8HI_type_node,
19657 tree v8hi_ftype_v4si_v4si
19658 = build_function_type_list (V8HI_type_node,
19659 V4SI_type_node, V4SI_type_node,
19661 tree v8hi_ftype_v16qi_v16qi
19662 = build_function_type_list (V8HI_type_node,
19663 V16QI_type_node, V16QI_type_node,
19665 tree v4hi_ftype_v8qi_v8qi
19666 = build_function_type_list (V4HI_type_node,
19667 V8QI_type_node, V8QI_type_node,
19669 tree unsigned_ftype_unsigned_uchar
19670 = build_function_type_list (unsigned_type_node,
19671 unsigned_type_node,
19672 unsigned_char_type_node,
19674 tree unsigned_ftype_unsigned_ushort
19675 = build_function_type_list (unsigned_type_node,
19676 unsigned_type_node,
19677 short_unsigned_type_node,
19679 tree unsigned_ftype_unsigned_unsigned
19680 = build_function_type_list (unsigned_type_node,
19681 unsigned_type_node,
19682 unsigned_type_node,
19684 tree uint64_ftype_uint64_uint64
19685 = build_function_type_list (long_long_unsigned_type_node,
19686 long_long_unsigned_type_node,
19687 long_long_unsigned_type_node,
19689 tree float_ftype_float
19690 = build_function_type_list (float_type_node,
19696 /* Add all special builtins with variable number of operands. */
19697 for (i = 0, d = bdesc_special_args;
19698 i < ARRAY_SIZE (bdesc_special_args);
19706 switch ((enum ix86_special_builtin_type) d->flag)
19708 case VOID_FTYPE_VOID:
19709 type = void_ftype_void;
19711 case V16QI_FTYPE_PCCHAR:
19712 type = v16qi_ftype_pcchar;
19714 case V4SF_FTYPE_PCFLOAT:
19715 type = v4sf_ftype_pcfloat;
19717 case V2DI_FTYPE_PV2DI:
19718 type = v2di_ftype_pv2di;
19720 case V2DF_FTYPE_PCDOUBLE:
19721 type = v2df_ftype_pcdouble;
19723 case V4SF_FTYPE_V4SF_PCV2SF:
19724 type = v4sf_ftype_v4sf_pcv2sf;
19726 case V2DF_FTYPE_V2DF_PCDOUBLE:
19727 type = v2df_ftype_v2df_pcdouble;
19729 case VOID_FTYPE_PV2SF_V4SF:
19730 type = void_ftype_pv2sf_v4sf;
19732 case VOID_FTYPE_PV2DI_V2DI:
19733 type = void_ftype_pv2di_v2di;
19735 case VOID_FTYPE_PCHAR_V16QI:
19736 type = void_ftype_pchar_v16qi;
19738 case VOID_FTYPE_PFLOAT_V4SF:
19739 type = void_ftype_pfloat_v4sf;
19741 case VOID_FTYPE_PDOUBLE_V2DF:
19742 type = void_ftype_pdouble_v2df;
19744 case VOID_FTYPE_PDI_DI:
19745 type = void_ftype_pdi_di;
19747 case VOID_FTYPE_PINT_INT:
19748 type = void_ftype_pint_int;
19751 gcc_unreachable ();
19754 def_builtin (d->mask, d->name, type, d->code);
19757 /* Add all builtins with variable number of operands. */
19758 for (i = 0, d = bdesc_args;
19759 i < ARRAY_SIZE (bdesc_args);
19767 switch ((enum ix86_builtin_type) d->flag)
19769 case FLOAT_FTYPE_FLOAT:
19770 type = float_ftype_float;
19772 case INT_FTYPE_V2DI_V2DI_PTEST:
19773 type = int_ftype_v2di_v2di;
19775 case INT64_FTYPE_V4SF:
19776 type = int64_ftype_v4sf;
19778 case INT64_FTYPE_V2DF:
19779 type = int64_ftype_v2df;
19781 case INT_FTYPE_V16QI:
19782 type = int_ftype_v16qi;
19784 case INT_FTYPE_V8QI:
19785 type = int_ftype_v8qi;
19787 case INT_FTYPE_V4SF:
19788 type = int_ftype_v4sf;
19790 case INT_FTYPE_V2DF:
19791 type = int_ftype_v2df;
19793 case V16QI_FTYPE_V16QI:
19794 type = v16qi_ftype_v16qi;
19796 case V8HI_FTYPE_V8HI:
19797 type = v8hi_ftype_v8hi;
19799 case V8HI_FTYPE_V16QI:
19800 type = v8hi_ftype_v16qi;
19802 case V8QI_FTYPE_V8QI:
19803 type = v8qi_ftype_v8qi;
19805 case V4SI_FTYPE_V4SI:
19806 type = v4si_ftype_v4si;
19808 case V4SI_FTYPE_V16QI:
19809 type = v4si_ftype_v16qi;
19811 case V4SI_FTYPE_V8HI:
19812 type = v4si_ftype_v8hi;
19814 case V4SI_FTYPE_V4SF:
19815 type = v4si_ftype_v4sf;
19817 case V4SI_FTYPE_V2DF:
19818 type = v4si_ftype_v2df;
19820 case V4HI_FTYPE_V4HI:
19821 type = v4hi_ftype_v4hi;
19823 case V4SF_FTYPE_V4SF:
19824 case V4SF_FTYPE_V4SF_VEC_MERGE:
19825 type = v4sf_ftype_v4sf;
19827 case V4SF_FTYPE_V4SI:
19828 type = v4sf_ftype_v4si;
19830 case V4SF_FTYPE_V2DF:
19831 type = v4sf_ftype_v2df;
19833 case V2DI_FTYPE_V2DI:
19834 type = v2di_ftype_v2di;
19836 case V2DI_FTYPE_V16QI:
19837 type = v2di_ftype_v16qi;
19839 case V2DI_FTYPE_V8HI:
19840 type = v2di_ftype_v8hi;
19842 case V2DI_FTYPE_V4SI:
19843 type = v2di_ftype_v4si;
19845 case V2SI_FTYPE_V2SI:
19846 type = v2si_ftype_v2si;
19848 case V2SI_FTYPE_V4SF:
19849 type = v2si_ftype_v4sf;
19851 case V2SI_FTYPE_V2DF:
19852 type = v2si_ftype_v2df;
19854 case V2SI_FTYPE_V2SF:
19855 type = v2si_ftype_v2sf;
19857 case V2DF_FTYPE_V4SF:
19858 type = v2df_ftype_v4sf;
19860 case V2DF_FTYPE_V2DF:
19861 case V2DF_FTYPE_V2DF_VEC_MERGE:
19862 type = v2df_ftype_v2df;
19864 case V2DF_FTYPE_V2SI:
19865 type = v2df_ftype_v2si;
19867 case V2DF_FTYPE_V4SI:
19868 type = v2df_ftype_v4si;
19870 case V2SF_FTYPE_V2SF:
19871 type = v2sf_ftype_v2sf;
19873 case V2SF_FTYPE_V2SI:
19874 type = v2sf_ftype_v2si;
19876 case V16QI_FTYPE_V16QI_V16QI:
19877 type = v16qi_ftype_v16qi_v16qi;
19879 case V16QI_FTYPE_V8HI_V8HI:
19880 type = v16qi_ftype_v8hi_v8hi;
19882 case V8QI_FTYPE_V8QI_V8QI:
19883 type = v8qi_ftype_v8qi_v8qi;
19885 case V8QI_FTYPE_V4HI_V4HI:
19886 type = v8qi_ftype_v4hi_v4hi;
19888 case V8HI_FTYPE_V8HI_V8HI:
19889 case V8HI_FTYPE_V8HI_V8HI_COUNT:
19890 type = v8hi_ftype_v8hi_v8hi;
19892 case V8HI_FTYPE_V16QI_V16QI:
19893 type = v8hi_ftype_v16qi_v16qi;
19895 case V8HI_FTYPE_V4SI_V4SI:
19896 type = v8hi_ftype_v4si_v4si;
19898 case V8HI_FTYPE_V8HI_SI_COUNT:
19899 type = v8hi_ftype_v8hi_int;
19901 case V4SI_FTYPE_V4SI_V4SI:
19902 case V4SI_FTYPE_V4SI_V4SI_COUNT:
19903 type = v4si_ftype_v4si_v4si;
19905 case V4SI_FTYPE_V8HI_V8HI:
19906 type = v4si_ftype_v8hi_v8hi;
19908 case V4SI_FTYPE_V4SF_V4SF:
19909 type = v4si_ftype_v4sf_v4sf;
19911 case V4SI_FTYPE_V2DF_V2DF:
19912 type = v4si_ftype_v2df_v2df;
19914 case V4SI_FTYPE_V4SI_SI_COUNT:
19915 type = v4si_ftype_v4si_int;
19917 case V4HI_FTYPE_V4HI_V4HI:
19918 case V4HI_FTYPE_V4HI_V4HI_COUNT:
19919 type = v4hi_ftype_v4hi_v4hi;
19921 case V4HI_FTYPE_V8QI_V8QI:
19922 type = v4hi_ftype_v8qi_v8qi;
19924 case V4HI_FTYPE_V2SI_V2SI:
19925 type = v4hi_ftype_v2si_v2si;
19927 case V4HI_FTYPE_V4HI_SI_COUNT:
19928 type = v4hi_ftype_v4hi_int;
19930 case V4SF_FTYPE_V4SF_V4SF:
19931 case V4SF_FTYPE_V4SF_V4SF_SWAP:
19932 type = v4sf_ftype_v4sf_v4sf;
19934 case V4SF_FTYPE_V4SF_V2SI:
19935 type = v4sf_ftype_v4sf_v2si;
19937 case V4SF_FTYPE_V4SF_V2DF:
19938 type = v4sf_ftype_v4sf_v2df;
19940 case V4SF_FTYPE_V4SF_DI:
19941 type = v4sf_ftype_v4sf_int64;
19943 case V4SF_FTYPE_V4SF_SI:
19944 type = v4sf_ftype_v4sf_int;
19946 case V2DI_FTYPE_V2DI_V2DI:
19947 case V2DI_FTYPE_V2DI_V2DI_COUNT:
19948 type = v2di_ftype_v2di_v2di;
19950 case V2DI_FTYPE_V16QI_V16QI:
19951 type = v2di_ftype_v16qi_v16qi;
19953 case V2DI_FTYPE_V4SI_V4SI:
19954 type = v2di_ftype_v4si_v4si;
19956 case V2DI_FTYPE_V2DI_V16QI:
19957 type = v2di_ftype_v2di_v16qi;
19959 case V2DI_FTYPE_V2DF_V2DF:
19960 type = v2di_ftype_v2df_v2df;
19962 case V2DI_FTYPE_V2DI_SI_COUNT:
19963 type = v2di_ftype_v2di_int;
19965 case V2SI_FTYPE_V2SI_V2SI:
19966 case V2SI_FTYPE_V2SI_V2SI_COUNT:
19967 type = v2si_ftype_v2si_v2si;
19969 case V2SI_FTYPE_V4HI_V4HI:
19970 type = v2si_ftype_v4hi_v4hi;
19972 case V2SI_FTYPE_V2SF_V2SF:
19973 type = v2si_ftype_v2sf_v2sf;
19975 case V2SI_FTYPE_V2SI_SI_COUNT:
19976 type = v2si_ftype_v2si_int;
19978 case V2DF_FTYPE_V2DF_V2DF:
19979 case V2DF_FTYPE_V2DF_V2DF_SWAP:
19980 type = v2df_ftype_v2df_v2df;
19982 case V2DF_FTYPE_V2DF_V4SF:
19983 type = v2df_ftype_v2df_v4sf;
19985 case V2DF_FTYPE_V2DF_DI:
19986 type = v2df_ftype_v2df_int64;
19988 case V2DF_FTYPE_V2DF_SI:
19989 type = v2df_ftype_v2df_int;
19991 case V2SF_FTYPE_V2SF_V2SF:
19992 type = v2sf_ftype_v2sf_v2sf;
19994 case V1DI_FTYPE_V1DI_V1DI:
19995 case V1DI_FTYPE_V1DI_V1DI_COUNT:
19996 type = v1di_ftype_v1di_v1di;
19998 case V1DI_FTYPE_V8QI_V8QI:
19999 type = v1di_ftype_v8qi_v8qi;
20001 case V1DI_FTYPE_V2SI_V2SI:
20002 type = v1di_ftype_v2si_v2si;
20004 case V1DI_FTYPE_V1DI_SI_COUNT:
20005 type = v1di_ftype_v1di_int;
20007 case UINT64_FTYPE_UINT64_UINT64:
20008 type = uint64_ftype_uint64_uint64;
20010 case UINT_FTYPE_UINT_UINT:
20011 type = unsigned_ftype_unsigned_unsigned;
20013 case UINT_FTYPE_UINT_USHORT:
20014 type = unsigned_ftype_unsigned_ushort;
20016 case UINT_FTYPE_UINT_UCHAR:
20017 type = unsigned_ftype_unsigned_uchar;
20019 case V8HI_FTYPE_V8HI_INT:
20020 type = v8hi_ftype_v8hi_int;
20022 case V4SI_FTYPE_V4SI_INT:
20023 type = v4si_ftype_v4si_int;
20025 case V4HI_FTYPE_V4HI_INT:
20026 type = v4hi_ftype_v4hi_int;
20028 case V4SF_FTYPE_V4SF_INT:
20029 type = v4sf_ftype_v4sf_int;
20031 case V2DI_FTYPE_V2DI_INT:
20032 case V2DI2TI_FTYPE_V2DI_INT:
20033 type = v2di_ftype_v2di_int;
20035 case V2DF_FTYPE_V2DF_INT:
20036 type = v2df_ftype_v2df_int;
20038 case V16QI_FTYPE_V16QI_V16QI_V16QI:
20039 type = v16qi_ftype_v16qi_v16qi_v16qi;
20041 case V4SF_FTYPE_V4SF_V4SF_V4SF:
20042 type = v4sf_ftype_v4sf_v4sf_v4sf;
20044 case V2DF_FTYPE_V2DF_V2DF_V2DF:
20045 type = v2df_ftype_v2df_v2df_v2df;
20047 case V16QI_FTYPE_V16QI_V16QI_INT:
20048 type = v16qi_ftype_v16qi_v16qi_int;
20050 case V8HI_FTYPE_V8HI_V8HI_INT:
20051 type = v8hi_ftype_v8hi_v8hi_int;
20053 case V4SI_FTYPE_V4SI_V4SI_INT:
20054 type = v4si_ftype_v4si_v4si_int;
20056 case V4SF_FTYPE_V4SF_V4SF_INT:
20057 type = v4sf_ftype_v4sf_v4sf_int;
20059 case V2DI_FTYPE_V2DI_V2DI_INT:
20060 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
20061 type = v2di_ftype_v2di_v2di_int;
20063 case V2DF_FTYPE_V2DF_V2DF_INT:
20064 type = v2df_ftype_v2df_v2df_int;
20066 case V2DI_FTYPE_V2DI_UINT_UINT:
20067 type = v2di_ftype_v2di_unsigned_unsigned;
20069 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
20070 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
20072 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
20073 type = v1di_ftype_v1di_v1di_int;
20076 gcc_unreachable ();
20079 def_builtin_const (d->mask, d->name, type, d->code);
20082 /* pcmpestr[im] insns. */
20083 for (i = 0, d = bdesc_pcmpestr;
20084 i < ARRAY_SIZE (bdesc_pcmpestr);
20087 if (d->code == IX86_BUILTIN_PCMPESTRM128)
20088 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
20090 ftype = int_ftype_v16qi_int_v16qi_int_int;
20091 def_builtin_const (d->mask, d->name, ftype, d->code);
20094 /* pcmpistr[im] insns. */
20095 for (i = 0, d = bdesc_pcmpistr;
20096 i < ARRAY_SIZE (bdesc_pcmpistr);
20099 if (d->code == IX86_BUILTIN_PCMPISTRM128)
20100 ftype = v16qi_ftype_v16qi_v16qi_int;
20102 ftype = int_ftype_v16qi_v16qi_int;
20103 def_builtin_const (d->mask, d->name, ftype, d->code);
20106 /* comi/ucomi insns. */
20107 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
20108 if (d->mask == OPTION_MASK_ISA_SSE2)
20109 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
20111 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
20114 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
20115 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
20117 /* SSE or 3DNow!A */
20118 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
20121 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
20123 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
20124 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
20127 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
20128 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
20133 /* Define AES built-in functions only if AES is enabled. */
20134 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
20135 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
20136 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
20137 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
20138 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
20139 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
20145 /* Define PCLMUL built-in function only if PCLMUL is enabled. */
20146 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
20149 /* Access to the vec_init patterns. */
20150 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
20151 integer_type_node, NULL_TREE);
20152 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
20154 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
20155 short_integer_type_node,
20156 short_integer_type_node,
20157 short_integer_type_node, NULL_TREE);
20158 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
20160 ftype = build_function_type_list (V8QI_type_node, char_type_node,
20161 char_type_node, char_type_node,
20162 char_type_node, char_type_node,
20163 char_type_node, char_type_node,
20164 char_type_node, NULL_TREE);
20165 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
20167 /* Access to the vec_extract patterns. */
20168 ftype = build_function_type_list (double_type_node, V2DF_type_node,
20169 integer_type_node, NULL_TREE);
20170 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
20172 ftype = build_function_type_list (long_long_integer_type_node,
20173 V2DI_type_node, integer_type_node,
20175 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
20177 ftype = build_function_type_list (float_type_node, V4SF_type_node,
20178 integer_type_node, NULL_TREE);
20179 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
20181 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
20182 integer_type_node, NULL_TREE);
20183 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
20185 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
20186 integer_type_node, NULL_TREE);
20187 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
20189 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
20190 integer_type_node, NULL_TREE);
20191 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
20193 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
20194 integer_type_node, NULL_TREE);
20195 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
20197 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
20198 integer_type_node, NULL_TREE);
20199 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
20201 /* Access to the vec_set patterns. */
20202 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
20204 integer_type_node, NULL_TREE);
20205 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
20207 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
20209 integer_type_node, NULL_TREE);
20210 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
20212 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
20214 integer_type_node, NULL_TREE);
20215 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
20217 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
20219 integer_type_node, NULL_TREE);
20220 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
20222 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
20224 integer_type_node, NULL_TREE);
20225 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
20227 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
20229 integer_type_node, NULL_TREE);
20230 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
20232 /* Add SSE5 multi-arg argument instructions */
20233 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
20235 tree mtype = NULL_TREE;
20240 switch ((enum multi_arg_type)d->flag)
20242 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
20243 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
20244 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
20245 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
20246 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
20247 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
20248 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
20249 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
20250 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
20251 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
20252 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
20253 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
20254 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
20255 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
20256 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
20257 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
20258 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
20259 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
20260 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
20261 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
20262 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
20263 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
20264 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
20265 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
20266 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
20267 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
20268 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
20269 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
20270 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
20271 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
20272 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
20273 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
20274 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
20275 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
20276 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
20277 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
20278 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
20279 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
20280 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
20281 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
20282 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
20283 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
20284 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
20285 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
20286 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
20287 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
20288 case MULTI_ARG_UNKNOWN:
20290 gcc_unreachable ();
20294 def_builtin_const (d->mask, d->name, mtype, d->code);
20298 /* Internal method for ix86_init_builtins. */
20301 ix86_init_builtins_va_builtins_abi (void)
20303 tree ms_va_ref, sysv_va_ref;
20304 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
20305 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
20306 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
20307 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
20311 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
20312 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
20313 ms_va_ref = build_reference_type (ms_va_list_type_node);
20315 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
20318 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
20319 fnvoid_va_start_ms =
20320 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
20321 fnvoid_va_end_sysv =
20322 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
20323 fnvoid_va_start_sysv =
20324 build_varargs_function_type_list (void_type_node, sysv_va_ref,
20326 fnvoid_va_copy_ms =
20327 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
20329 fnvoid_va_copy_sysv =
20330 build_function_type_list (void_type_node, sysv_va_ref,
20331 sysv_va_ref, NULL_TREE);
20333 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
20334 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
20335 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
20336 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
20337 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
20338 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
20339 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
20340 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
20341 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
20342 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
20343 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
20344 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
20348 ix86_init_builtins (void)
20350 tree float128_type_node = make_node (REAL_TYPE);
20353 /* The __float80 type. */
20354 if (TYPE_MODE (long_double_type_node) == XFmode)
20355 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
20359 /* The __float80 type. */
20360 tree float80_type_node = make_node (REAL_TYPE);
20362 TYPE_PRECISION (float80_type_node) = 80;
20363 layout_type (float80_type_node);
20364 (*lang_hooks.types.register_builtin_type) (float80_type_node,
20368 /* The __float128 type. */
20369 TYPE_PRECISION (float128_type_node) = 128;
20370 layout_type (float128_type_node);
20371 (*lang_hooks.types.register_builtin_type) (float128_type_node,
20374 /* TFmode support builtins. */
20375 ftype = build_function_type (float128_type_node, void_list_node);
20376 decl = add_builtin_function ("__builtin_infq", ftype,
20377 IX86_BUILTIN_INFQ, BUILT_IN_MD,
20379 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
20381 /* We will expand them to normal call if SSE2 isn't available since
20382 they are used by libgcc. */
20383 ftype = build_function_type_list (float128_type_node,
20384 float128_type_node,
20386 decl = add_builtin_function ("__builtin_fabsq", ftype,
20387 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
20388 "__fabstf2", NULL_TREE);
20389 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
20390 TREE_READONLY (decl) = 1;
20392 ftype = build_function_type_list (float128_type_node,
20393 float128_type_node,
20394 float128_type_node,
20396 decl = add_builtin_function ("__builtin_copysignq", ftype,
20397 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
20398 "__copysigntf3", NULL_TREE);
20399 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
20400 TREE_READONLY (decl) = 1;
20403 ix86_init_mmx_sse_builtins ();
20405 ix86_init_builtins_va_builtins_abi ();
20408 /* Errors in the source file can cause expand_expr to return const0_rtx
20409 where we expect a vector. To avoid crashing, use one of the vector
20410 clear instructions. */
20412 safe_vector_operand (rtx x, enum machine_mode mode)
20414 if (x == const0_rtx)
20415 x = CONST0_RTX (mode);
20419 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
20422 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
20425 tree arg0 = CALL_EXPR_ARG (exp, 0);
20426 tree arg1 = CALL_EXPR_ARG (exp, 1);
20427 rtx op0 = expand_normal (arg0);
20428 rtx op1 = expand_normal (arg1);
20429 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20430 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20431 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
20433 if (VECTOR_MODE_P (mode0))
20434 op0 = safe_vector_operand (op0, mode0);
20435 if (VECTOR_MODE_P (mode1))
20436 op1 = safe_vector_operand (op1, mode1);
20438 if (optimize || !target
20439 || GET_MODE (target) != tmode
20440 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20441 target = gen_reg_rtx (tmode);
20443 if (GET_MODE (op1) == SImode && mode1 == TImode)
20445 rtx x = gen_reg_rtx (V4SImode);
20446 emit_insn (gen_sse2_loadd (x, op1));
20447 op1 = gen_lowpart (TImode, x);
20450 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
20451 op0 = copy_to_mode_reg (mode0, op0);
20452 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
20453 op1 = copy_to_mode_reg (mode1, op1);
20455 pat = GEN_FCN (icode) (target, op0, op1);
20464 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
20467 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
20468 enum multi_arg_type m_type,
20469 enum insn_code sub_code)
20474 bool comparison_p = false;
20476 bool last_arg_constant = false;
20477 int num_memory = 0;
20480 enum machine_mode mode;
20483 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20487 case MULTI_ARG_3_SF:
20488 case MULTI_ARG_3_DF:
20489 case MULTI_ARG_3_DI:
20490 case MULTI_ARG_3_SI:
20491 case MULTI_ARG_3_SI_DI:
20492 case MULTI_ARG_3_HI:
20493 case MULTI_ARG_3_HI_SI:
20494 case MULTI_ARG_3_QI:
20495 case MULTI_ARG_3_PERMPS:
20496 case MULTI_ARG_3_PERMPD:
20500 case MULTI_ARG_2_SF:
20501 case MULTI_ARG_2_DF:
20502 case MULTI_ARG_2_DI:
20503 case MULTI_ARG_2_SI:
20504 case MULTI_ARG_2_HI:
20505 case MULTI_ARG_2_QI:
20509 case MULTI_ARG_2_DI_IMM:
20510 case MULTI_ARG_2_SI_IMM:
20511 case MULTI_ARG_2_HI_IMM:
20512 case MULTI_ARG_2_QI_IMM:
20514 last_arg_constant = true;
20517 case MULTI_ARG_1_SF:
20518 case MULTI_ARG_1_DF:
20519 case MULTI_ARG_1_DI:
20520 case MULTI_ARG_1_SI:
20521 case MULTI_ARG_1_HI:
20522 case MULTI_ARG_1_QI:
20523 case MULTI_ARG_1_SI_DI:
20524 case MULTI_ARG_1_HI_DI:
20525 case MULTI_ARG_1_HI_SI:
20526 case MULTI_ARG_1_QI_DI:
20527 case MULTI_ARG_1_QI_SI:
20528 case MULTI_ARG_1_QI_HI:
20529 case MULTI_ARG_1_PH2PS:
20530 case MULTI_ARG_1_PS2PH:
20534 case MULTI_ARG_2_SF_CMP:
20535 case MULTI_ARG_2_DF_CMP:
20536 case MULTI_ARG_2_DI_CMP:
20537 case MULTI_ARG_2_SI_CMP:
20538 case MULTI_ARG_2_HI_CMP:
20539 case MULTI_ARG_2_QI_CMP:
20541 comparison_p = true;
20544 case MULTI_ARG_2_SF_TF:
20545 case MULTI_ARG_2_DF_TF:
20546 case MULTI_ARG_2_DI_TF:
20547 case MULTI_ARG_2_SI_TF:
20548 case MULTI_ARG_2_HI_TF:
20549 case MULTI_ARG_2_QI_TF:
20554 case MULTI_ARG_UNKNOWN:
20556 gcc_unreachable ();
20559 if (optimize || !target
20560 || GET_MODE (target) != tmode
20561 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20562 target = gen_reg_rtx (tmode);
20564 gcc_assert (nargs <= 4);
20566 for (i = 0; i < nargs; i++)
20568 tree arg = CALL_EXPR_ARG (exp, i);
20569 rtx op = expand_normal (arg);
20570 int adjust = (comparison_p) ? 1 : 0;
20571 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
20573 if (last_arg_constant && i == nargs-1)
20575 if (GET_CODE (op) != CONST_INT)
20577 error ("last argument must be an immediate");
20578 return gen_reg_rtx (tmode);
20583 if (VECTOR_MODE_P (mode))
20584 op = safe_vector_operand (op, mode);
20586 /* If we aren't optimizing, only allow one memory operand to be
20588 if (memory_operand (op, mode))
20591 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
20594 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
20596 op = force_reg (mode, op);
20600 args[i].mode = mode;
20606 pat = GEN_FCN (icode) (target, args[0].op);
20611 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
20612 GEN_INT ((int)sub_code));
20613 else if (! comparison_p)
20614 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
20617 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
20621 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
20626 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
20630 gcc_unreachable ();
20640 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
20641 insns with vec_merge. */
20644 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
20648 tree arg0 = CALL_EXPR_ARG (exp, 0);
20649 rtx op1, op0 = expand_normal (arg0);
20650 enum machine_mode tmode = insn_data[icode].operand[0].mode;
20651 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
20653 if (optimize || !target
20654 || GET_MODE (target) != tmode
20655 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
20656 target = gen_reg_rtx (tmode);
20658 if (VECTOR_MODE_P (mode0))
20659 op0 = safe_vector_operand (op0, mode0);
20661 if ((optimize && !register_operand (op0, mode0))
20662 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
20663 op0 = copy_to_mode_reg (mode0, op0);
20666 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
20667 op1 = copy_to_mode_reg (mode0, op1);
20669 pat = GEN_FCN (icode) (target, op0, op1);
20676 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
20679 ix86_expand_sse_compare (const struct builtin_description *d,
20680 tree exp, rtx target, bool swap)
20683 tree arg0 = CALL_EXPR_ARG (exp, 0);
20684 tree arg1 = CALL_EXPR_ARG (exp, 1);
20685 rtx op0 = expand_normal (arg0);
20686 rtx op1 = expand_normal (arg1);
20688 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
20689 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
20690 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
20691 enum rtx_code comparison = d->comparison;
20693 if (VECTOR_MODE_P (mode0))
20694 op0 = safe_vector_operand (op0, mode0);
20695 if (VECTOR_MODE_P (mode1))
20696 op1 = safe_vector_operand (op1, mode1);
20698 /* Swap operands if we have a comparison that isn't available in
20702 rtx tmp = gen_reg_rtx (mode1);
20703 emit_move_insn (tmp, op1);
20708 if (optimize || !target
20709 || GET_MODE (target) != tmode
20710 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
20711 target = gen_reg_rtx (tmode);
20713 if ((optimize && !register_operand (op0, mode0))
20714 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
20715 op0 = copy_to_mode_reg (mode0, op0);
20716 if ((optimize && !register_operand (op1, mode1))
20717 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
20718 op1 = copy_to_mode_reg (mode1, op1);
20720 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
20721 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
20728 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
20731 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
20735 tree arg0 = CALL_EXPR_ARG (exp, 0);
20736 tree arg1 = CALL_EXPR_ARG (exp, 1);
20737 rtx op0 = expand_normal (arg0);
20738 rtx op1 = expand_normal (arg1);
20739 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20740 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20741 enum rtx_code comparison = d->comparison;
20743 if (VECTOR_MODE_P (mode0))
20744 op0 = safe_vector_operand (op0, mode0);
20745 if (VECTOR_MODE_P (mode1))
20746 op1 = safe_vector_operand (op1, mode1);
20748 /* Swap operands if we have a comparison that isn't available in
20750 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
20757 target = gen_reg_rtx (SImode);
20758 emit_move_insn (target, const0_rtx);
20759 target = gen_rtx_SUBREG (QImode, target, 0);
20761 if ((optimize && !register_operand (op0, mode0))
20762 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20763 op0 = copy_to_mode_reg (mode0, op0);
20764 if ((optimize && !register_operand (op1, mode1))
20765 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20766 op1 = copy_to_mode_reg (mode1, op1);
20768 pat = GEN_FCN (d->icode) (op0, op1);
20772 emit_insn (gen_rtx_SET (VOIDmode,
20773 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20774 gen_rtx_fmt_ee (comparison, QImode,
20778 return SUBREG_REG (target);
20781 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
20784 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
20788 tree arg0 = CALL_EXPR_ARG (exp, 0);
20789 tree arg1 = CALL_EXPR_ARG (exp, 1);
20790 rtx op0 = expand_normal (arg0);
20791 rtx op1 = expand_normal (arg1);
20792 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
20793 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
20794 enum rtx_code comparison = d->comparison;
20796 if (VECTOR_MODE_P (mode0))
20797 op0 = safe_vector_operand (op0, mode0);
20798 if (VECTOR_MODE_P (mode1))
20799 op1 = safe_vector_operand (op1, mode1);
20801 target = gen_reg_rtx (SImode);
20802 emit_move_insn (target, const0_rtx);
20803 target = gen_rtx_SUBREG (QImode, target, 0);
20805 if ((optimize && !register_operand (op0, mode0))
20806 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
20807 op0 = copy_to_mode_reg (mode0, op0);
20808 if ((optimize && !register_operand (op1, mode1))
20809 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
20810 op1 = copy_to_mode_reg (mode1, op1);
20812 pat = GEN_FCN (d->icode) (op0, op1);
20816 emit_insn (gen_rtx_SET (VOIDmode,
20817 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20818 gen_rtx_fmt_ee (comparison, QImode,
20822 return SUBREG_REG (target);
20825 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
20828 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
20829 tree exp, rtx target)
20832 tree arg0 = CALL_EXPR_ARG (exp, 0);
20833 tree arg1 = CALL_EXPR_ARG (exp, 1);
20834 tree arg2 = CALL_EXPR_ARG (exp, 2);
20835 tree arg3 = CALL_EXPR_ARG (exp, 3);
20836 tree arg4 = CALL_EXPR_ARG (exp, 4);
20837 rtx scratch0, scratch1;
20838 rtx op0 = expand_normal (arg0);
20839 rtx op1 = expand_normal (arg1);
20840 rtx op2 = expand_normal (arg2);
20841 rtx op3 = expand_normal (arg3);
20842 rtx op4 = expand_normal (arg4);
20843 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
20845 tmode0 = insn_data[d->icode].operand[0].mode;
20846 tmode1 = insn_data[d->icode].operand[1].mode;
20847 modev2 = insn_data[d->icode].operand[2].mode;
20848 modei3 = insn_data[d->icode].operand[3].mode;
20849 modev4 = insn_data[d->icode].operand[4].mode;
20850 modei5 = insn_data[d->icode].operand[5].mode;
20851 modeimm = insn_data[d->icode].operand[6].mode;
20853 if (VECTOR_MODE_P (modev2))
20854 op0 = safe_vector_operand (op0, modev2);
20855 if (VECTOR_MODE_P (modev4))
20856 op2 = safe_vector_operand (op2, modev4);
20858 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20859 op0 = copy_to_mode_reg (modev2, op0);
20860 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
20861 op1 = copy_to_mode_reg (modei3, op1);
20862 if ((optimize && !register_operand (op2, modev4))
20863 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
20864 op2 = copy_to_mode_reg (modev4, op2);
20865 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
20866 op3 = copy_to_mode_reg (modei5, op3);
20868 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
20870 error ("the fifth argument must be a 8-bit immediate");
20874 if (d->code == IX86_BUILTIN_PCMPESTRI128)
20876 if (optimize || !target
20877 || GET_MODE (target) != tmode0
20878 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20879 target = gen_reg_rtx (tmode0);
20881 scratch1 = gen_reg_rtx (tmode1);
20883 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
20885 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
20887 if (optimize || !target
20888 || GET_MODE (target) != tmode1
20889 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20890 target = gen_reg_rtx (tmode1);
20892 scratch0 = gen_reg_rtx (tmode0);
20894 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
20898 gcc_assert (d->flag);
20900 scratch0 = gen_reg_rtx (tmode0);
20901 scratch1 = gen_reg_rtx (tmode1);
20903 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
20913 target = gen_reg_rtx (SImode);
20914 emit_move_insn (target, const0_rtx);
20915 target = gen_rtx_SUBREG (QImode, target, 0);
20918 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
20919 gen_rtx_fmt_ee (EQ, QImode,
20920 gen_rtx_REG ((enum machine_mode) d->flag,
20923 return SUBREG_REG (target);
20930 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
20933 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
20934 tree exp, rtx target)
20937 tree arg0 = CALL_EXPR_ARG (exp, 0);
20938 tree arg1 = CALL_EXPR_ARG (exp, 1);
20939 tree arg2 = CALL_EXPR_ARG (exp, 2);
20940 rtx scratch0, scratch1;
20941 rtx op0 = expand_normal (arg0);
20942 rtx op1 = expand_normal (arg1);
20943 rtx op2 = expand_normal (arg2);
20944 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
20946 tmode0 = insn_data[d->icode].operand[0].mode;
20947 tmode1 = insn_data[d->icode].operand[1].mode;
20948 modev2 = insn_data[d->icode].operand[2].mode;
20949 modev3 = insn_data[d->icode].operand[3].mode;
20950 modeimm = insn_data[d->icode].operand[4].mode;
20952 if (VECTOR_MODE_P (modev2))
20953 op0 = safe_vector_operand (op0, modev2);
20954 if (VECTOR_MODE_P (modev3))
20955 op1 = safe_vector_operand (op1, modev3);
20957 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
20958 op0 = copy_to_mode_reg (modev2, op0);
20959 if ((optimize && !register_operand (op1, modev3))
20960 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
20961 op1 = copy_to_mode_reg (modev3, op1);
20963 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
20965 error ("the third argument must be a 8-bit immediate");
20969 if (d->code == IX86_BUILTIN_PCMPISTRI128)
20971 if (optimize || !target
20972 || GET_MODE (target) != tmode0
20973 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
20974 target = gen_reg_rtx (tmode0);
20976 scratch1 = gen_reg_rtx (tmode1);
20978 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
20980 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
20982 if (optimize || !target
20983 || GET_MODE (target) != tmode1
20984 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
20985 target = gen_reg_rtx (tmode1);
20987 scratch0 = gen_reg_rtx (tmode0);
20989 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
20993 gcc_assert (d->flag);
20995 scratch0 = gen_reg_rtx (tmode0);
20996 scratch1 = gen_reg_rtx (tmode1);
20998 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
21008 target = gen_reg_rtx (SImode);
21009 emit_move_insn (target, const0_rtx);
21010 target = gen_rtx_SUBREG (QImode, target, 0);
21013 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
21014 gen_rtx_fmt_ee (EQ, QImode,
21015 gen_rtx_REG ((enum machine_mode) d->flag,
21018 return SUBREG_REG (target);
21024 /* Subroutine of ix86_expand_builtin to take care of insns with
21025 variable number of operands. */
21028 ix86_expand_args_builtin (const struct builtin_description *d,
21029 tree exp, rtx target)
21031 rtx pat, real_target;
21032 unsigned int i, nargs;
21033 unsigned int nargs_constant = 0;
21034 int num_memory = 0;
21038 enum machine_mode mode;
21040 bool last_arg_count = false;
21041 enum insn_code icode = d->icode;
21042 const struct insn_data *insn_p = &insn_data[icode];
21043 enum machine_mode tmode = insn_p->operand[0].mode;
21044 enum machine_mode rmode = VOIDmode;
21046 enum rtx_code comparison = d->comparison;
21048 switch ((enum ix86_builtin_type) d->flag)
21050 case INT_FTYPE_V2DI_V2DI_PTEST:
21051 return ix86_expand_sse_ptest (d, exp, target);
21052 case FLOAT128_FTYPE_FLOAT128:
21053 case FLOAT_FTYPE_FLOAT:
21054 case INT64_FTYPE_V4SF:
21055 case INT64_FTYPE_V2DF:
21056 case INT_FTYPE_V16QI:
21057 case INT_FTYPE_V8QI:
21058 case INT_FTYPE_V4SF:
21059 case INT_FTYPE_V2DF:
21060 case V16QI_FTYPE_V16QI:
21061 case V8HI_FTYPE_V8HI:
21062 case V8HI_FTYPE_V16QI:
21063 case V8QI_FTYPE_V8QI:
21064 case V4SI_FTYPE_V4SI:
21065 case V4SI_FTYPE_V16QI:
21066 case V4SI_FTYPE_V4SF:
21067 case V4SI_FTYPE_V8HI:
21068 case V4SI_FTYPE_V2DF:
21069 case V4HI_FTYPE_V4HI:
21070 case V4SF_FTYPE_V4SF:
21071 case V4SF_FTYPE_V4SI:
21072 case V4SF_FTYPE_V2DF:
21073 case V2DI_FTYPE_V2DI:
21074 case V2DI_FTYPE_V16QI:
21075 case V2DI_FTYPE_V8HI:
21076 case V2DI_FTYPE_V4SI:
21077 case V2DF_FTYPE_V2DF:
21078 case V2DF_FTYPE_V4SI:
21079 case V2DF_FTYPE_V4SF:
21080 case V2DF_FTYPE_V2SI:
21081 case V2SI_FTYPE_V2SI:
21082 case V2SI_FTYPE_V4SF:
21083 case V2SI_FTYPE_V2SF:
21084 case V2SI_FTYPE_V2DF:
21085 case V2SF_FTYPE_V2SF:
21086 case V2SF_FTYPE_V2SI:
21089 case V4SF_FTYPE_V4SF_VEC_MERGE:
21090 case V2DF_FTYPE_V2DF_VEC_MERGE:
21091 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
21092 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
21093 case V16QI_FTYPE_V16QI_V16QI:
21094 case V16QI_FTYPE_V8HI_V8HI:
21095 case V8QI_FTYPE_V8QI_V8QI:
21096 case V8QI_FTYPE_V4HI_V4HI:
21097 case V8HI_FTYPE_V8HI_V8HI:
21098 case V8HI_FTYPE_V16QI_V16QI:
21099 case V8HI_FTYPE_V4SI_V4SI:
21100 case V4SI_FTYPE_V4SI_V4SI:
21101 case V4SI_FTYPE_V8HI_V8HI:
21102 case V4SI_FTYPE_V4SF_V4SF:
21103 case V4SI_FTYPE_V2DF_V2DF:
21104 case V4HI_FTYPE_V4HI_V4HI:
21105 case V4HI_FTYPE_V8QI_V8QI:
21106 case V4HI_FTYPE_V2SI_V2SI:
21107 case V4SF_FTYPE_V4SF_V4SF:
21108 case V4SF_FTYPE_V4SF_V2SI:
21109 case V4SF_FTYPE_V4SF_V2DF:
21110 case V4SF_FTYPE_V4SF_DI:
21111 case V4SF_FTYPE_V4SF_SI:
21112 case V2DI_FTYPE_V2DI_V2DI:
21113 case V2DI_FTYPE_V16QI_V16QI:
21114 case V2DI_FTYPE_V4SI_V4SI:
21115 case V2DI_FTYPE_V2DI_V16QI:
21116 case V2DI_FTYPE_V2DF_V2DF:
21117 case V2SI_FTYPE_V2SI_V2SI:
21118 case V2SI_FTYPE_V4HI_V4HI:
21119 case V2SI_FTYPE_V2SF_V2SF:
21120 case V2DF_FTYPE_V2DF_V2DF:
21121 case V2DF_FTYPE_V2DF_V4SF:
21122 case V2DF_FTYPE_V2DF_DI:
21123 case V2DF_FTYPE_V2DF_SI:
21124 case V2SF_FTYPE_V2SF_V2SF:
21125 case V1DI_FTYPE_V1DI_V1DI:
21126 case V1DI_FTYPE_V8QI_V8QI:
21127 case V1DI_FTYPE_V2SI_V2SI:
21128 if (comparison == UNKNOWN)
21129 return ix86_expand_binop_builtin (icode, exp, target);
21132 case V4SF_FTYPE_V4SF_V4SF_SWAP:
21133 case V2DF_FTYPE_V2DF_V2DF_SWAP:
21134 gcc_assert (comparison != UNKNOWN);
21138 case V8HI_FTYPE_V8HI_V8HI_COUNT:
21139 case V8HI_FTYPE_V8HI_SI_COUNT:
21140 case V4SI_FTYPE_V4SI_V4SI_COUNT:
21141 case V4SI_FTYPE_V4SI_SI_COUNT:
21142 case V4HI_FTYPE_V4HI_V4HI_COUNT:
21143 case V4HI_FTYPE_V4HI_SI_COUNT:
21144 case V2DI_FTYPE_V2DI_V2DI_COUNT:
21145 case V2DI_FTYPE_V2DI_SI_COUNT:
21146 case V2SI_FTYPE_V2SI_V2SI_COUNT:
21147 case V2SI_FTYPE_V2SI_SI_COUNT:
21148 case V1DI_FTYPE_V1DI_V1DI_COUNT:
21149 case V1DI_FTYPE_V1DI_SI_COUNT:
21151 last_arg_count = true;
21153 case UINT64_FTYPE_UINT64_UINT64:
21154 case UINT_FTYPE_UINT_UINT:
21155 case UINT_FTYPE_UINT_USHORT:
21156 case UINT_FTYPE_UINT_UCHAR:
21159 case V2DI2TI_FTYPE_V2DI_INT:
21162 nargs_constant = 1;
21164 case V8HI_FTYPE_V8HI_INT:
21165 case V4SI_FTYPE_V4SI_INT:
21166 case V4HI_FTYPE_V4HI_INT:
21167 case V4SF_FTYPE_V4SF_INT:
21168 case V2DI_FTYPE_V2DI_INT:
21169 case V2DF_FTYPE_V2DF_INT:
21171 nargs_constant = 1;
21173 case V16QI_FTYPE_V16QI_V16QI_V16QI:
21174 case V4SF_FTYPE_V4SF_V4SF_V4SF:
21175 case V2DF_FTYPE_V2DF_V2DF_V2DF:
21178 case V16QI_FTYPE_V16QI_V16QI_INT:
21179 case V8HI_FTYPE_V8HI_V8HI_INT:
21180 case V4SI_FTYPE_V4SI_V4SI_INT:
21181 case V4SF_FTYPE_V4SF_V4SF_INT:
21182 case V2DI_FTYPE_V2DI_V2DI_INT:
21183 case V2DF_FTYPE_V2DF_V2DF_INT:
21185 nargs_constant = 1;
21187 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
21190 nargs_constant = 1;
21192 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
21195 nargs_constant = 1;
21197 case V2DI_FTYPE_V2DI_UINT_UINT:
21199 nargs_constant = 2;
21201 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
21203 nargs_constant = 2;
21206 gcc_unreachable ();
21209 gcc_assert (nargs <= ARRAY_SIZE (args));
21211 if (comparison != UNKNOWN)
21213 gcc_assert (nargs == 2);
21214 return ix86_expand_sse_compare (d, exp, target, swap);
21217 if (rmode == VOIDmode || rmode == tmode)
21221 || GET_MODE (target) != tmode
21222 || ! (*insn_p->operand[0].predicate) (target, tmode))
21223 target = gen_reg_rtx (tmode);
21224 real_target = target;
21228 target = gen_reg_rtx (rmode);
21229 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
21232 for (i = 0; i < nargs; i++)
21234 tree arg = CALL_EXPR_ARG (exp, i);
21235 rtx op = expand_normal (arg);
21236 enum machine_mode mode = insn_p->operand[i + 1].mode;
21237 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
21239 if (last_arg_count && (i + 1) == nargs)
21241 /* SIMD shift insns take either an 8-bit immediate or
21242 register as count. But builtin functions take int as
21243 count. If count doesn't match, we put it in register. */
21246 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
21247 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
21248 op = copy_to_reg (op);
21251 else if ((nargs - i) <= nargs_constant)
21256 case CODE_FOR_sse4_1_roundpd:
21257 case CODE_FOR_sse4_1_roundps:
21258 case CODE_FOR_sse4_1_roundsd:
21259 case CODE_FOR_sse4_1_roundss:
21260 case CODE_FOR_sse4_1_blendps:
21261 error ("the last argument must be a 4-bit immediate");
21264 case CODE_FOR_sse4_1_blendpd:
21265 error ("the last argument must be a 2-bit immediate");
21269 switch (nargs_constant)
21272 if ((nargs - i) == nargs_constant)
21274 error ("the next to last argument must be an 8-bit immediate");
21278 error ("the last argument must be an 8-bit immediate");
21281 gcc_unreachable ();
21288 if (VECTOR_MODE_P (mode))
21289 op = safe_vector_operand (op, mode);
21291 /* If we aren't optimizing, only allow one memory operand to
21293 if (memory_operand (op, mode))
21296 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
21298 if (optimize || !match || num_memory > 1)
21299 op = copy_to_mode_reg (mode, op);
21303 op = copy_to_reg (op);
21304 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
21309 args[i].mode = mode;
21315 pat = GEN_FCN (icode) (real_target, args[0].op);
21318 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
21321 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21325 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
21326 args[2].op, args[3].op);
21329 gcc_unreachable ();
21339 /* Subroutine of ix86_expand_builtin to take care of special insns
21340 with variable number of operands. */
21343 ix86_expand_special_args_builtin (const struct builtin_description *d,
21344 tree exp, rtx target)
21348 unsigned int i, nargs, arg_adjust, memory;
21352 enum machine_mode mode;
21354 enum insn_code icode = d->icode;
21355 bool last_arg_constant = false;
21356 const struct insn_data *insn_p = &insn_data[icode];
21357 enum machine_mode tmode = insn_p->operand[0].mode;
21358 enum { load, store } class;
21360 switch ((enum ix86_special_builtin_type) d->flag)
21362 case VOID_FTYPE_VOID:
21363 emit_insn (GEN_FCN (icode) (target));
21365 case V2DI_FTYPE_PV2DI:
21366 case V16QI_FTYPE_PCCHAR:
21367 case V4SF_FTYPE_PCFLOAT:
21368 case V2DF_FTYPE_PCDOUBLE:
21373 case VOID_FTYPE_PV2SF_V4SF:
21374 case VOID_FTYPE_PV2DI_V2DI:
21375 case VOID_FTYPE_PCHAR_V16QI:
21376 case VOID_FTYPE_PFLOAT_V4SF:
21377 case VOID_FTYPE_PDOUBLE_V2DF:
21378 case VOID_FTYPE_PDI_DI:
21379 case VOID_FTYPE_PINT_INT:
21382 /* Reserve memory operand for target. */
21383 memory = ARRAY_SIZE (args);
21385 case V4SF_FTYPE_V4SF_PCV2SF:
21386 case V2DF_FTYPE_V2DF_PCDOUBLE:
21392 gcc_unreachable ();
21395 gcc_assert (nargs <= ARRAY_SIZE (args));
21397 if (class == store)
21399 arg = CALL_EXPR_ARG (exp, 0);
21400 op = expand_normal (arg);
21401 gcc_assert (target == 0);
21402 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
21410 || GET_MODE (target) != tmode
21411 || ! (*insn_p->operand[0].predicate) (target, tmode))
21412 target = gen_reg_rtx (tmode);
21415 for (i = 0; i < nargs; i++)
21417 enum machine_mode mode = insn_p->operand[i + 1].mode;
21420 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
21421 op = expand_normal (arg);
21422 match = (*insn_p->operand[i + 1].predicate) (op, mode);
21424 if (last_arg_constant && (i + 1) == nargs)
21430 error ("the last argument must be an 8-bit immediate");
21438 /* This must be the memory operand. */
21439 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
21440 gcc_assert (GET_MODE (op) == mode
21441 || GET_MODE (op) == VOIDmode);
21445 /* This must be register. */
21446 if (VECTOR_MODE_P (mode))
21447 op = safe_vector_operand (op, mode);
21449 gcc_assert (GET_MODE (op) == mode
21450 || GET_MODE (op) == VOIDmode);
21451 op = copy_to_mode_reg (mode, op);
21456 args[i].mode = mode;
21462 pat = GEN_FCN (icode) (target, args[0].op);
21465 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
21468 gcc_unreachable ();
21474 return class == store ? 0 : target;
21477 /* Return the integer constant in ARG. Constrain it to be in the range
21478 of the subparts of VEC_TYPE; issue an error if not. */
21481 get_element_number (tree vec_type, tree arg)
21483 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
21485 if (!host_integerp (arg, 1)
21486 || (elt = tree_low_cst (arg, 1), elt > max))
21488 error ("selector must be an integer constant in the range 0..%wi", max);
21495 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21496 ix86_expand_vector_init. We DO have language-level syntax for this, in
21497 the form of (type){ init-list }. Except that since we can't place emms
21498 instructions from inside the compiler, we can't allow the use of MMX
21499 registers unless the user explicitly asks for it. So we do *not* define
21500 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
21501 we have builtins invoked by mmintrin.h that gives us license to emit
21502 these sorts of instructions. */
21505 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
21507 enum machine_mode tmode = TYPE_MODE (type);
21508 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
21509 int i, n_elt = GET_MODE_NUNITS (tmode);
21510 rtvec v = rtvec_alloc (n_elt);
21512 gcc_assert (VECTOR_MODE_P (tmode));
21513 gcc_assert (call_expr_nargs (exp) == n_elt);
21515 for (i = 0; i < n_elt; ++i)
21517 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
21518 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
21521 if (!target || !register_operand (target, tmode))
21522 target = gen_reg_rtx (tmode);
21524 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
21528 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21529 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
21530 had a language-level syntax for referencing vector elements. */
21533 ix86_expand_vec_ext_builtin (tree exp, rtx target)
21535 enum machine_mode tmode, mode0;
21540 arg0 = CALL_EXPR_ARG (exp, 0);
21541 arg1 = CALL_EXPR_ARG (exp, 1);
21543 op0 = expand_normal (arg0);
21544 elt = get_element_number (TREE_TYPE (arg0), arg1);
21546 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21547 mode0 = TYPE_MODE (TREE_TYPE (arg0));
21548 gcc_assert (VECTOR_MODE_P (mode0));
21550 op0 = force_reg (mode0, op0);
21552 if (optimize || !target || !register_operand (target, tmode))
21553 target = gen_reg_rtx (tmode);
21555 ix86_expand_vector_extract (true, target, op0, elt);
21560 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
21561 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
21562 a language-level syntax for referencing vector elements. */
21565 ix86_expand_vec_set_builtin (tree exp)
21567 enum machine_mode tmode, mode1;
21568 tree arg0, arg1, arg2;
21570 rtx op0, op1, target;
21572 arg0 = CALL_EXPR_ARG (exp, 0);
21573 arg1 = CALL_EXPR_ARG (exp, 1);
21574 arg2 = CALL_EXPR_ARG (exp, 2);
21576 tmode = TYPE_MODE (TREE_TYPE (arg0));
21577 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
21578 gcc_assert (VECTOR_MODE_P (tmode));
21580 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
21581 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
21582 elt = get_element_number (TREE_TYPE (arg0), arg2);
21584 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
21585 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
21587 op0 = force_reg (tmode, op0);
21588 op1 = force_reg (mode1, op1);
21590 /* OP0 is the source of these builtin functions and shouldn't be
21591 modified. Create a copy, use it and return it as target. */
21592 target = gen_reg_rtx (tmode);
21593 emit_move_insn (target, op0);
21594 ix86_expand_vector_set (true, target, op1, elt);
21599 /* Expand an expression EXP that calls a built-in function,
21600 with result going to TARGET if that's convenient
21601 (and in mode MODE if that's convenient).
21602 SUBTARGET may be used as the target for computing one of EXP's operands.
21603 IGNORE is nonzero if the value is to be ignored. */
21606 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
21607 enum machine_mode mode ATTRIBUTE_UNUSED,
21608 int ignore ATTRIBUTE_UNUSED)
21610 const struct builtin_description *d;
21612 enum insn_code icode;
21613 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
21614 tree arg0, arg1, arg2;
21615 rtx op0, op1, op2, pat;
21616 enum machine_mode mode0, mode1, mode2;
21617 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
21621 case IX86_BUILTIN_MASKMOVQ:
21622 case IX86_BUILTIN_MASKMOVDQU:
21623 icode = (fcode == IX86_BUILTIN_MASKMOVQ
21624 ? CODE_FOR_mmx_maskmovq
21625 : CODE_FOR_sse2_maskmovdqu);
21626 /* Note the arg order is different from the operand order. */
21627 arg1 = CALL_EXPR_ARG (exp, 0);
21628 arg2 = CALL_EXPR_ARG (exp, 1);
21629 arg0 = CALL_EXPR_ARG (exp, 2);
21630 op0 = expand_normal (arg0);
21631 op1 = expand_normal (arg1);
21632 op2 = expand_normal (arg2);
21633 mode0 = insn_data[icode].operand[0].mode;
21634 mode1 = insn_data[icode].operand[1].mode;
21635 mode2 = insn_data[icode].operand[2].mode;
21637 op0 = force_reg (Pmode, op0);
21638 op0 = gen_rtx_MEM (mode1, op0);
21640 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
21641 op0 = copy_to_mode_reg (mode0, op0);
21642 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
21643 op1 = copy_to_mode_reg (mode1, op1);
21644 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
21645 op2 = copy_to_mode_reg (mode2, op2);
21646 pat = GEN_FCN (icode) (op0, op1, op2);
21652 case IX86_BUILTIN_LDMXCSR:
21653 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
21654 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21655 emit_move_insn (target, op0);
21656 emit_insn (gen_sse_ldmxcsr (target));
21659 case IX86_BUILTIN_STMXCSR:
21660 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
21661 emit_insn (gen_sse_stmxcsr (target));
21662 return copy_to_mode_reg (SImode, target);
21664 case IX86_BUILTIN_CLFLUSH:
21665 arg0 = CALL_EXPR_ARG (exp, 0);
21666 op0 = expand_normal (arg0);
21667 icode = CODE_FOR_sse2_clflush;
21668 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
21669 op0 = copy_to_mode_reg (Pmode, op0);
21671 emit_insn (gen_sse2_clflush (op0));
21674 case IX86_BUILTIN_MONITOR:
21675 arg0 = CALL_EXPR_ARG (exp, 0);
21676 arg1 = CALL_EXPR_ARG (exp, 1);
21677 arg2 = CALL_EXPR_ARG (exp, 2);
21678 op0 = expand_normal (arg0);
21679 op1 = expand_normal (arg1);
21680 op2 = expand_normal (arg2);
21682 op0 = copy_to_mode_reg (Pmode, op0);
21684 op1 = copy_to_mode_reg (SImode, op1);
21686 op2 = copy_to_mode_reg (SImode, op2);
21687 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
21690 case IX86_BUILTIN_MWAIT:
21691 arg0 = CALL_EXPR_ARG (exp, 0);
21692 arg1 = CALL_EXPR_ARG (exp, 1);
21693 op0 = expand_normal (arg0);
21694 op1 = expand_normal (arg1);
21696 op0 = copy_to_mode_reg (SImode, op0);
21698 op1 = copy_to_mode_reg (SImode, op1);
21699 emit_insn (gen_sse3_mwait (op0, op1));
21702 case IX86_BUILTIN_VEC_INIT_V2SI:
21703 case IX86_BUILTIN_VEC_INIT_V4HI:
21704 case IX86_BUILTIN_VEC_INIT_V8QI:
21705 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
21707 case IX86_BUILTIN_VEC_EXT_V2DF:
21708 case IX86_BUILTIN_VEC_EXT_V2DI:
21709 case IX86_BUILTIN_VEC_EXT_V4SF:
21710 case IX86_BUILTIN_VEC_EXT_V4SI:
21711 case IX86_BUILTIN_VEC_EXT_V8HI:
21712 case IX86_BUILTIN_VEC_EXT_V2SI:
21713 case IX86_BUILTIN_VEC_EXT_V4HI:
21714 case IX86_BUILTIN_VEC_EXT_V16QI:
21715 return ix86_expand_vec_ext_builtin (exp, target);
21717 case IX86_BUILTIN_VEC_SET_V2DI:
21718 case IX86_BUILTIN_VEC_SET_V4SF:
21719 case IX86_BUILTIN_VEC_SET_V4SI:
21720 case IX86_BUILTIN_VEC_SET_V8HI:
21721 case IX86_BUILTIN_VEC_SET_V4HI:
21722 case IX86_BUILTIN_VEC_SET_V16QI:
21723 return ix86_expand_vec_set_builtin (exp);
21725 case IX86_BUILTIN_INFQ:
21727 REAL_VALUE_TYPE inf;
21731 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
21733 tmp = validize_mem (force_const_mem (mode, tmp));
21736 target = gen_reg_rtx (mode);
21738 emit_move_insn (target, tmp);
21746 for (i = 0, d = bdesc_special_args;
21747 i < ARRAY_SIZE (bdesc_special_args);
21749 if (d->code == fcode)
21750 return ix86_expand_special_args_builtin (d, exp, target);
21752 for (i = 0, d = bdesc_args;
21753 i < ARRAY_SIZE (bdesc_args);
21755 if (d->code == fcode)
21758 case IX86_BUILTIN_FABSQ:
21759 case IX86_BUILTIN_COPYSIGNQ:
21761 /* Emit a normal call if SSE2 isn't available. */
21762 return expand_call (exp, target, ignore);
21764 return ix86_expand_args_builtin (d, exp, target);
21767 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
21768 if (d->code == fcode)
21769 return ix86_expand_sse_comi (d, exp, target);
21771 for (i = 0, d = bdesc_pcmpestr;
21772 i < ARRAY_SIZE (bdesc_pcmpestr);
21774 if (d->code == fcode)
21775 return ix86_expand_sse_pcmpestr (d, exp, target);
21777 for (i = 0, d = bdesc_pcmpistr;
21778 i < ARRAY_SIZE (bdesc_pcmpistr);
21780 if (d->code == fcode)
21781 return ix86_expand_sse_pcmpistr (d, exp, target);
21783 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
21784 if (d->code == fcode)
21785 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
21786 (enum multi_arg_type)d->flag,
21789 gcc_unreachable ();
21792 /* Returns a function decl for a vectorized version of the builtin function
21793 with builtin function code FN and the result vector type TYPE, or NULL_TREE
21794 if it is not available. */
21797 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
21800 enum machine_mode in_mode, out_mode;
21803 if (TREE_CODE (type_out) != VECTOR_TYPE
21804 || TREE_CODE (type_in) != VECTOR_TYPE)
21807 out_mode = TYPE_MODE (TREE_TYPE (type_out));
21808 out_n = TYPE_VECTOR_SUBPARTS (type_out);
21809 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21810 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21814 case BUILT_IN_SQRT:
21815 if (out_mode == DFmode && out_n == 2
21816 && in_mode == DFmode && in_n == 2)
21817 return ix86_builtins[IX86_BUILTIN_SQRTPD];
21820 case BUILT_IN_SQRTF:
21821 if (out_mode == SFmode && out_n == 4
21822 && in_mode == SFmode && in_n == 4)
21823 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
21826 case BUILT_IN_LRINT:
21827 if (out_mode == SImode && out_n == 4
21828 && in_mode == DFmode && in_n == 2)
21829 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
21832 case BUILT_IN_LRINTF:
21833 if (out_mode == SImode && out_n == 4
21834 && in_mode == SFmode && in_n == 4)
21835 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
21842 /* Dispatch to a handler for a vectorization library. */
21843 if (ix86_veclib_handler)
21844 return (*ix86_veclib_handler)(fn, type_out, type_in);
21849 /* Handler for an SVML-style interface to
21850 a library with vectorized intrinsics. */
21853 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
21856 tree fntype, new_fndecl, args;
21859 enum machine_mode el_mode, in_mode;
21862 /* The SVML is suitable for unsafe math only. */
21863 if (!flag_unsafe_math_optimizations)
21866 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21867 n = TYPE_VECTOR_SUBPARTS (type_out);
21868 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21869 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21870 if (el_mode != in_mode
21878 case BUILT_IN_LOG10:
21880 case BUILT_IN_TANH:
21882 case BUILT_IN_ATAN:
21883 case BUILT_IN_ATAN2:
21884 case BUILT_IN_ATANH:
21885 case BUILT_IN_CBRT:
21886 case BUILT_IN_SINH:
21888 case BUILT_IN_ASINH:
21889 case BUILT_IN_ASIN:
21890 case BUILT_IN_COSH:
21892 case BUILT_IN_ACOSH:
21893 case BUILT_IN_ACOS:
21894 if (el_mode != DFmode || n != 2)
21898 case BUILT_IN_EXPF:
21899 case BUILT_IN_LOGF:
21900 case BUILT_IN_LOG10F:
21901 case BUILT_IN_POWF:
21902 case BUILT_IN_TANHF:
21903 case BUILT_IN_TANF:
21904 case BUILT_IN_ATANF:
21905 case BUILT_IN_ATAN2F:
21906 case BUILT_IN_ATANHF:
21907 case BUILT_IN_CBRTF:
21908 case BUILT_IN_SINHF:
21909 case BUILT_IN_SINF:
21910 case BUILT_IN_ASINHF:
21911 case BUILT_IN_ASINF:
21912 case BUILT_IN_COSHF:
21913 case BUILT_IN_COSF:
21914 case BUILT_IN_ACOSHF:
21915 case BUILT_IN_ACOSF:
21916 if (el_mode != SFmode || n != 4)
21924 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
21926 if (fn == BUILT_IN_LOGF)
21927 strcpy (name, "vmlsLn4");
21928 else if (fn == BUILT_IN_LOG)
21929 strcpy (name, "vmldLn2");
21932 sprintf (name, "vmls%s", bname+10);
21933 name[strlen (name)-1] = '4';
21936 sprintf (name, "vmld%s2", bname+10);
21938 /* Convert to uppercase. */
21942 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
21943 args = TREE_CHAIN (args))
21947 fntype = build_function_type_list (type_out, type_in, NULL);
21949 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
21951 /* Build a function declaration for the vectorized function. */
21952 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
21953 TREE_PUBLIC (new_fndecl) = 1;
21954 DECL_EXTERNAL (new_fndecl) = 1;
21955 DECL_IS_NOVOPS (new_fndecl) = 1;
21956 TREE_READONLY (new_fndecl) = 1;
21961 /* Handler for an ACML-style interface to
21962 a library with vectorized intrinsics. */
21965 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
21967 char name[20] = "__vr.._";
21968 tree fntype, new_fndecl, args;
21971 enum machine_mode el_mode, in_mode;
21974 /* The ACML is 64bits only and suitable for unsafe math only as
21975 it does not correctly support parts of IEEE with the required
21976 precision such as denormals. */
21978 || !flag_unsafe_math_optimizations)
21981 el_mode = TYPE_MODE (TREE_TYPE (type_out));
21982 n = TYPE_VECTOR_SUBPARTS (type_out);
21983 in_mode = TYPE_MODE (TREE_TYPE (type_in));
21984 in_n = TYPE_VECTOR_SUBPARTS (type_in);
21985 if (el_mode != in_mode
21995 case BUILT_IN_LOG2:
21996 case BUILT_IN_LOG10:
21999 if (el_mode != DFmode
22004 case BUILT_IN_SINF:
22005 case BUILT_IN_COSF:
22006 case BUILT_IN_EXPF:
22007 case BUILT_IN_POWF:
22008 case BUILT_IN_LOGF:
22009 case BUILT_IN_LOG2F:
22010 case BUILT_IN_LOG10F:
22013 if (el_mode != SFmode
22022 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
22023 sprintf (name + 7, "%s", bname+10);
22026 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
22027 args = TREE_CHAIN (args))
22031 fntype = build_function_type_list (type_out, type_in, NULL);
22033 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
22035 /* Build a function declaration for the vectorized function. */
22036 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
22037 TREE_PUBLIC (new_fndecl) = 1;
22038 DECL_EXTERNAL (new_fndecl) = 1;
22039 DECL_IS_NOVOPS (new_fndecl) = 1;
22040 TREE_READONLY (new_fndecl) = 1;
22046 /* Returns a decl of a function that implements conversion of the
22047 input vector of type TYPE, or NULL_TREE if it is not available. */
22050 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
22052 if (TREE_CODE (type) != VECTOR_TYPE)
22058 switch (TYPE_MODE (type))
22061 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
22066 case FIX_TRUNC_EXPR:
22067 switch (TYPE_MODE (type))
22070 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
22080 /* Returns a code for a target-specific builtin that implements
22081 reciprocal of the function, or NULL_TREE if not available. */
22084 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
22085 bool sqrt ATTRIBUTE_UNUSED)
22087 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
22088 && flag_finite_math_only && !flag_trapping_math
22089 && flag_unsafe_math_optimizations))
22093 /* Machine dependent builtins. */
22096 /* Vectorized version of sqrt to rsqrt conversion. */
22097 case IX86_BUILTIN_SQRTPS_NR:
22098 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
22104 /* Normal builtins. */
22107 /* Sqrt to rsqrt conversion. */
22108 case BUILT_IN_SQRTF:
22109 return ix86_builtins[IX86_BUILTIN_RSQRTF];
22116 /* Store OPERAND to the memory after reload is completed. This means
22117 that we can't easily use assign_stack_local. */
22119 ix86_force_to_memory (enum machine_mode mode, rtx operand)
22123 gcc_assert (reload_completed);
22124 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
22126 result = gen_rtx_MEM (mode,
22127 gen_rtx_PLUS (Pmode,
22129 GEN_INT (-RED_ZONE_SIZE)));
22130 emit_move_insn (result, operand);
22132 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
22138 operand = gen_lowpart (DImode, operand);
22142 gen_rtx_SET (VOIDmode,
22143 gen_rtx_MEM (DImode,
22144 gen_rtx_PRE_DEC (DImode,
22145 stack_pointer_rtx)),
22149 gcc_unreachable ();
22151 result = gen_rtx_MEM (mode, stack_pointer_rtx);
22160 split_di (&operand, 1, operands, operands + 1);
22162 gen_rtx_SET (VOIDmode,
22163 gen_rtx_MEM (SImode,
22164 gen_rtx_PRE_DEC (Pmode,
22165 stack_pointer_rtx)),
22168 gen_rtx_SET (VOIDmode,
22169 gen_rtx_MEM (SImode,
22170 gen_rtx_PRE_DEC (Pmode,
22171 stack_pointer_rtx)),
22176 /* Store HImodes as SImodes. */
22177 operand = gen_lowpart (SImode, operand);
22181 gen_rtx_SET (VOIDmode,
22182 gen_rtx_MEM (GET_MODE (operand),
22183 gen_rtx_PRE_DEC (SImode,
22184 stack_pointer_rtx)),
22188 gcc_unreachable ();
22190 result = gen_rtx_MEM (mode, stack_pointer_rtx);
22195 /* Free operand from the memory. */
22197 ix86_free_from_memory (enum machine_mode mode)
22199 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
22203 if (mode == DImode || TARGET_64BIT)
22207 /* Use LEA to deallocate stack space. In peephole2 it will be converted
22208 to pop or add instruction if registers are available. */
22209 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
22210 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
22215 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
22216 QImode must go into class Q_REGS.
22217 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
22218 movdf to do mem-to-mem moves through integer regs. */
22220 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
22222 enum machine_mode mode = GET_MODE (x);
22224 /* We're only allowed to return a subclass of CLASS. Many of the
22225 following checks fail for NO_REGS, so eliminate that early. */
22226 if (regclass == NO_REGS)
22229 /* All classes can load zeros. */
22230 if (x == CONST0_RTX (mode))
22233 /* Force constants into memory if we are loading a (nonzero) constant into
22234 an MMX or SSE register. This is because there are no MMX/SSE instructions
22235 to load from a constant. */
22237 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
22240 /* Prefer SSE regs only, if we can use them for math. */
22241 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
22242 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
22244 /* Floating-point constants need more complex checks. */
22245 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
22247 /* General regs can load everything. */
22248 if (reg_class_subset_p (regclass, GENERAL_REGS))
22251 /* Floats can load 0 and 1 plus some others. Note that we eliminated
22252 zero above. We only want to wind up preferring 80387 registers if
22253 we plan on doing computation with them. */
22255 && standard_80387_constant_p (x))
22257 /* Limit class to non-sse. */
22258 if (regclass == FLOAT_SSE_REGS)
22260 if (regclass == FP_TOP_SSE_REGS)
22262 if (regclass == FP_SECOND_SSE_REGS)
22263 return FP_SECOND_REG;
22264 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
22271 /* Generally when we see PLUS here, it's the function invariant
22272 (plus soft-fp const_int). Which can only be computed into general
22274 if (GET_CODE (x) == PLUS)
22275 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
22277 /* QImode constants are easy to load, but non-constant QImode data
22278 must go into Q_REGS. */
22279 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
22281 if (reg_class_subset_p (regclass, Q_REGS))
22283 if (reg_class_subset_p (Q_REGS, regclass))
22291 /* Discourage putting floating-point values in SSE registers unless
22292 SSE math is being used, and likewise for the 387 registers. */
22294 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
22296 enum machine_mode mode = GET_MODE (x);
22298 /* Restrict the output reload class to the register bank that we are doing
22299 math on. If we would like not to return a subset of CLASS, reject this
22300 alternative: if reload cannot do this, it will still use its choice. */
22301 mode = GET_MODE (x);
22302 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
22303 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
22305 if (X87_FLOAT_MODE_P (mode))
22307 if (regclass == FP_TOP_SSE_REGS)
22309 else if (regclass == FP_SECOND_SSE_REGS)
22310 return FP_SECOND_REG;
22312 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
22318 static enum reg_class
22319 ix86_secondary_reload (bool in_p, rtx x, enum reg_class class,
22320 enum machine_mode mode,
22321 secondary_reload_info *sri ATTRIBUTE_UNUSED)
22323 /* QImode spills from non-QI registers require
22324 intermediate register on 32bit targets. */
22325 if (!in_p && mode == QImode && !TARGET_64BIT
22326 && (class == GENERAL_REGS
22327 || class == LEGACY_REGS
22328 || class == INDEX_REGS))
22337 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
22338 regno = true_regnum (x);
22340 /* Return Q_REGS if the operand is in memory. */
22348 /* If we are copying between general and FP registers, we need a memory
22349 location. The same is true for SSE and MMX registers.
22351 To optimize register_move_cost performance, allow inline variant.
22353 The macro can't work reliably when one of the CLASSES is class containing
22354 registers from multiple units (SSE, MMX, integer). We avoid this by never
22355 combining those units in single alternative in the machine description.
22356 Ensure that this constraint holds to avoid unexpected surprises.
22358 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
22359 enforce these sanity checks. */
22362 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22363 enum machine_mode mode, int strict)
22365 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
22366 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
22367 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
22368 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
22369 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
22370 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
22372 gcc_assert (!strict);
22376 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
22379 /* ??? This is a lie. We do have moves between mmx/general, and for
22380 mmx/sse2. But by saying we need secondary memory we discourage the
22381 register allocator from using the mmx registers unless needed. */
22382 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
22385 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22387 /* SSE1 doesn't have any direct moves from other classes. */
22391 /* If the target says that inter-unit moves are more expensive
22392 than moving through memory, then don't generate them. */
22393 if (!TARGET_INTER_UNIT_MOVES)
22396 /* Between SSE and general, we have moves no larger than word size. */
22397 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
22405 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
22406 enum machine_mode mode, int strict)
22408 return inline_secondary_memory_needed (class1, class2, mode, strict);
22411 /* Return true if the registers in CLASS cannot represent the change from
22412 modes FROM to TO. */
22415 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
22416 enum reg_class regclass)
22421 /* x87 registers can't do subreg at all, as all values are reformatted
22422 to extended precision. */
22423 if (MAYBE_FLOAT_CLASS_P (regclass))
22426 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
22428 /* Vector registers do not support QI or HImode loads. If we don't
22429 disallow a change to these modes, reload will assume it's ok to
22430 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
22431 the vec_dupv4hi pattern. */
22432 if (GET_MODE_SIZE (from) < 4)
22435 /* Vector registers do not support subreg with nonzero offsets, which
22436 are otherwise valid for integer registers. Since we can't see
22437 whether we have a nonzero offset from here, prohibit all
22438 nonparadoxical subregs changing size. */
22439 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
22446 /* Return the cost of moving data of mode M between a
22447 register and memory. A value of 2 is the default; this cost is
22448 relative to those in `REGISTER_MOVE_COST'.
22450 This function is used extensively by register_move_cost that is used to
22451 build tables at startup. Make it inline in this case.
22452 When IN is 2, return maximum of in and out move cost.
22454 If moving between registers and memory is more expensive than
22455 between two registers, you should define this macro to express the
22458 Model also increased moving costs of QImode registers in non
22462 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
22466 if (FLOAT_CLASS_P (regclass))
22484 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
22485 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
22487 if (SSE_CLASS_P (regclass))
22490 switch (GET_MODE_SIZE (mode))
22505 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
22506 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
22508 if (MMX_CLASS_P (regclass))
22511 switch (GET_MODE_SIZE (mode))
22523 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
22524 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
22526 switch (GET_MODE_SIZE (mode))
22529 if (Q_CLASS_P (regclass) || TARGET_64BIT)
22532 return ix86_cost->int_store[0];
22533 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
22534 cost = ix86_cost->movzbl_load;
22536 cost = ix86_cost->int_load[0];
22538 return MAX (cost, ix86_cost->int_store[0]);
22544 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
22546 return ix86_cost->movzbl_load;
22548 return ix86_cost->int_store[0] + 4;
22553 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
22554 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
22556 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
22557 if (mode == TFmode)
22560 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
22562 cost = ix86_cost->int_load[2];
22564 cost = ix86_cost->int_store[2];
22565 return (cost * (((int) GET_MODE_SIZE (mode)
22566 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
22571 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
22573 return inline_memory_move_cost (mode, regclass, in);
22577 /* Return the cost of moving data from a register in class CLASS1 to
22578 one in class CLASS2.
22580 It is not required that the cost always equal 2 when FROM is the same as TO;
22581 on some machines it is expensive to move between registers if they are not
22582 general registers. */
22585 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
22586 enum reg_class class2)
22588 /* In case we require secondary memory, compute cost of the store followed
22589 by load. In order to avoid bad register allocation choices, we need
22590 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
22592 if (inline_secondary_memory_needed (class1, class2, mode, 0))
22596 cost += inline_memory_move_cost (mode, class1, 2);
22597 cost += inline_memory_move_cost (mode, class2, 2);
22599 /* In case of copying from general_purpose_register we may emit multiple
22600 stores followed by single load causing memory size mismatch stall.
22601 Count this as arbitrarily high cost of 20. */
22602 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
22605 /* In the case of FP/MMX moves, the registers actually overlap, and we
22606 have to switch modes in order to treat them differently. */
22607 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
22608 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
22614 /* Moves between SSE/MMX and integer unit are expensive. */
22615 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
22616 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
22618 /* ??? By keeping returned value relatively high, we limit the number
22619 of moves between integer and MMX/SSE registers for all targets.
22620 Additionally, high value prevents problem with x86_modes_tieable_p(),
22621 where integer modes in MMX/SSE registers are not tieable
22622 because of missing QImode and HImode moves to, from or between
22623 MMX/SSE registers. */
22624 return MAX (8, ix86_cost->mmxsse_to_integer);
22626 if (MAYBE_FLOAT_CLASS_P (class1))
22627 return ix86_cost->fp_move;
22628 if (MAYBE_SSE_CLASS_P (class1))
22629 return ix86_cost->sse_move;
22630 if (MAYBE_MMX_CLASS_P (class1))
22631 return ix86_cost->mmx_move;
22635 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
22638 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
22640 /* Flags and only flags can only hold CCmode values. */
22641 if (CC_REGNO_P (regno))
22642 return GET_MODE_CLASS (mode) == MODE_CC;
22643 if (GET_MODE_CLASS (mode) == MODE_CC
22644 || GET_MODE_CLASS (mode) == MODE_RANDOM
22645 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
22647 if (FP_REGNO_P (regno))
22648 return VALID_FP_MODE_P (mode);
22649 if (SSE_REGNO_P (regno))
22651 /* We implement the move patterns for all vector modes into and
22652 out of SSE registers, even when no operation instructions
22654 return (VALID_SSE_REG_MODE (mode)
22655 || VALID_SSE2_REG_MODE (mode)
22656 || VALID_MMX_REG_MODE (mode)
22657 || VALID_MMX_REG_MODE_3DNOW (mode));
22659 if (MMX_REGNO_P (regno))
22661 /* We implement the move patterns for 3DNOW modes even in MMX mode,
22662 so if the register is available at all, then we can move data of
22663 the given mode into or out of it. */
22664 return (VALID_MMX_REG_MODE (mode)
22665 || VALID_MMX_REG_MODE_3DNOW (mode));
22668 if (mode == QImode)
22670 /* Take care for QImode values - they can be in non-QI regs,
22671 but then they do cause partial register stalls. */
22672 if (regno < 4 || TARGET_64BIT)
22674 if (!TARGET_PARTIAL_REG_STALL)
22676 return reload_in_progress || reload_completed;
22678 /* We handle both integer and floats in the general purpose registers. */
22679 else if (VALID_INT_MODE_P (mode))
22681 else if (VALID_FP_MODE_P (mode))
22683 else if (VALID_DFP_MODE_P (mode))
22685 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
22686 on to use that value in smaller contexts, this can easily force a
22687 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
22688 supporting DImode, allow it. */
22689 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
22695 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
22696 tieable integer mode. */
22699 ix86_tieable_integer_mode_p (enum machine_mode mode)
22708 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
22711 return TARGET_64BIT;
22718 /* Return true if MODE1 is accessible in a register that can hold MODE2
22719 without copying. That is, all register classes that can hold MODE2
22720 can also hold MODE1. */
22723 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
22725 if (mode1 == mode2)
22728 if (ix86_tieable_integer_mode_p (mode1)
22729 && ix86_tieable_integer_mode_p (mode2))
22732 /* MODE2 being XFmode implies fp stack or general regs, which means we
22733 can tie any smaller floating point modes to it. Note that we do not
22734 tie this with TFmode. */
22735 if (mode2 == XFmode)
22736 return mode1 == SFmode || mode1 == DFmode;
22738 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
22739 that we can tie it with SFmode. */
22740 if (mode2 == DFmode)
22741 return mode1 == SFmode;
22743 /* If MODE2 is only appropriate for an SSE register, then tie with
22744 any other mode acceptable to SSE registers. */
22745 if (GET_MODE_SIZE (mode2) == 16
22746 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
22747 return (GET_MODE_SIZE (mode1) == 16
22748 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
22750 /* If MODE2 is appropriate for an MMX register, then tie
22751 with any other mode acceptable to MMX registers. */
22752 if (GET_MODE_SIZE (mode2) == 8
22753 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
22754 return (GET_MODE_SIZE (mode1) == 8
22755 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
22760 /* Compute a (partial) cost for rtx X. Return true if the complete
22761 cost has been computed, and false if subexpressions should be
22762 scanned. In either case, *TOTAL contains the cost result. */
22765 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
22767 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
22768 enum machine_mode mode = GET_MODE (x);
22776 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
22778 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
22780 else if (flag_pic && SYMBOLIC_CONST (x)
22782 || (!GET_CODE (x) != LABEL_REF
22783 && (GET_CODE (x) != SYMBOL_REF
22784 || !SYMBOL_REF_LOCAL_P (x)))))
22791 if (mode == VOIDmode)
22794 switch (standard_80387_constant_p (x))
22799 default: /* Other constants */
22804 /* Start with (MEM (SYMBOL_REF)), since that's where
22805 it'll probably end up. Add a penalty for size. */
22806 *total = (COSTS_N_INSNS (1)
22807 + (flag_pic != 0 && !TARGET_64BIT)
22808 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
22814 /* The zero extensions is often completely free on x86_64, so make
22815 it as cheap as possible. */
22816 if (TARGET_64BIT && mode == DImode
22817 && GET_MODE (XEXP (x, 0)) == SImode)
22819 else if (TARGET_ZERO_EXTEND_WITH_AND)
22820 *total = ix86_cost->add;
22822 *total = ix86_cost->movzx;
22826 *total = ix86_cost->movsx;
22830 if (CONST_INT_P (XEXP (x, 1))
22831 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
22833 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22836 *total = ix86_cost->add;
22839 if ((value == 2 || value == 3)
22840 && ix86_cost->lea <= ix86_cost->shift_const)
22842 *total = ix86_cost->lea;
22852 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
22854 if (CONST_INT_P (XEXP (x, 1)))
22856 if (INTVAL (XEXP (x, 1)) > 32)
22857 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
22859 *total = ix86_cost->shift_const * 2;
22863 if (GET_CODE (XEXP (x, 1)) == AND)
22864 *total = ix86_cost->shift_var * 2;
22866 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
22871 if (CONST_INT_P (XEXP (x, 1)))
22872 *total = ix86_cost->shift_const;
22874 *total = ix86_cost->shift_var;
22879 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22881 /* ??? SSE scalar cost should be used here. */
22882 *total = ix86_cost->fmul;
22885 else if (X87_FLOAT_MODE_P (mode))
22887 *total = ix86_cost->fmul;
22890 else if (FLOAT_MODE_P (mode))
22892 /* ??? SSE vector cost should be used here. */
22893 *total = ix86_cost->fmul;
22898 rtx op0 = XEXP (x, 0);
22899 rtx op1 = XEXP (x, 1);
22901 if (CONST_INT_P (XEXP (x, 1)))
22903 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
22904 for (nbits = 0; value != 0; value &= value - 1)
22908 /* This is arbitrary. */
22911 /* Compute costs correctly for widening multiplication. */
22912 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
22913 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
22914 == GET_MODE_SIZE (mode))
22916 int is_mulwiden = 0;
22917 enum machine_mode inner_mode = GET_MODE (op0);
22919 if (GET_CODE (op0) == GET_CODE (op1))
22920 is_mulwiden = 1, op1 = XEXP (op1, 0);
22921 else if (CONST_INT_P (op1))
22923 if (GET_CODE (op0) == SIGN_EXTEND)
22924 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
22927 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
22931 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
22934 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
22935 + nbits * ix86_cost->mult_bit
22936 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
22945 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
22946 /* ??? SSE cost should be used here. */
22947 *total = ix86_cost->fdiv;
22948 else if (X87_FLOAT_MODE_P (mode))
22949 *total = ix86_cost->fdiv;
22950 else if (FLOAT_MODE_P (mode))
22951 /* ??? SSE vector cost should be used here. */
22952 *total = ix86_cost->fdiv;
22954 *total = ix86_cost->divide[MODE_INDEX (mode)];
22958 if (GET_MODE_CLASS (mode) == MODE_INT
22959 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
22961 if (GET_CODE (XEXP (x, 0)) == PLUS
22962 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
22963 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
22964 && CONSTANT_P (XEXP (x, 1)))
22966 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
22967 if (val == 2 || val == 4 || val == 8)
22969 *total = ix86_cost->lea;
22970 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22971 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
22973 *total += rtx_cost (XEXP (x, 1), outer_code);
22977 else if (GET_CODE (XEXP (x, 0)) == MULT
22978 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
22980 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
22981 if (val == 2 || val == 4 || val == 8)
22983 *total = ix86_cost->lea;
22984 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22985 *total += rtx_cost (XEXP (x, 1), outer_code);
22989 else if (GET_CODE (XEXP (x, 0)) == PLUS)
22991 *total = ix86_cost->lea;
22992 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
22993 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
22994 *total += rtx_cost (XEXP (x, 1), outer_code);
23001 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23003 /* ??? SSE cost should be used here. */
23004 *total = ix86_cost->fadd;
23007 else if (X87_FLOAT_MODE_P (mode))
23009 *total = ix86_cost->fadd;
23012 else if (FLOAT_MODE_P (mode))
23014 /* ??? SSE vector cost should be used here. */
23015 *total = ix86_cost->fadd;
23023 if (!TARGET_64BIT && mode == DImode)
23025 *total = (ix86_cost->add * 2
23026 + (rtx_cost (XEXP (x, 0), outer_code)
23027 << (GET_MODE (XEXP (x, 0)) != DImode))
23028 + (rtx_cost (XEXP (x, 1), outer_code)
23029 << (GET_MODE (XEXP (x, 1)) != DImode)));
23035 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23037 /* ??? SSE cost should be used here. */
23038 *total = ix86_cost->fchs;
23041 else if (X87_FLOAT_MODE_P (mode))
23043 *total = ix86_cost->fchs;
23046 else if (FLOAT_MODE_P (mode))
23048 /* ??? SSE vector cost should be used here. */
23049 *total = ix86_cost->fchs;
23055 if (!TARGET_64BIT && mode == DImode)
23056 *total = ix86_cost->add * 2;
23058 *total = ix86_cost->add;
23062 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
23063 && XEXP (XEXP (x, 0), 1) == const1_rtx
23064 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
23065 && XEXP (x, 1) == const0_rtx)
23067 /* This kind of construct is implemented using test[bwl].
23068 Treat it as if we had an AND. */
23069 *total = (ix86_cost->add
23070 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
23071 + rtx_cost (const1_rtx, outer_code));
23077 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
23082 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23083 /* ??? SSE cost should be used here. */
23084 *total = ix86_cost->fabs;
23085 else if (X87_FLOAT_MODE_P (mode))
23086 *total = ix86_cost->fabs;
23087 else if (FLOAT_MODE_P (mode))
23088 /* ??? SSE vector cost should be used here. */
23089 *total = ix86_cost->fabs;
23093 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
23094 /* ??? SSE cost should be used here. */
23095 *total = ix86_cost->fsqrt;
23096 else if (X87_FLOAT_MODE_P (mode))
23097 *total = ix86_cost->fsqrt;
23098 else if (FLOAT_MODE_P (mode))
23099 /* ??? SSE vector cost should be used here. */
23100 *total = ix86_cost->fsqrt;
23104 if (XINT (x, 1) == UNSPEC_TP)
23115 static int current_machopic_label_num;
23117 /* Given a symbol name and its associated stub, write out the
23118 definition of the stub. */
23121 machopic_output_stub (FILE *file, const char *symb, const char *stub)
23123 unsigned int length;
23124 char *binder_name, *symbol_name, lazy_ptr_name[32];
23125 int label = ++current_machopic_label_num;
23127 /* For 64-bit we shouldn't get here. */
23128 gcc_assert (!TARGET_64BIT);
23130 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
23131 symb = (*targetm.strip_name_encoding) (symb);
23133 length = strlen (stub);
23134 binder_name = XALLOCAVEC (char, length + 32);
23135 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
23137 length = strlen (symb);
23138 symbol_name = XALLOCAVEC (char, length + 32);
23139 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
23141 sprintf (lazy_ptr_name, "L%d$lz", label);
23144 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
23146 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
23148 fprintf (file, "%s:\n", stub);
23149 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23153 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
23154 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
23155 fprintf (file, "\tjmp\t*%%edx\n");
23158 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
23160 fprintf (file, "%s:\n", binder_name);
23164 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
23165 fprintf (file, "\tpushl\t%%eax\n");
23168 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
23170 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
23172 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
23173 fprintf (file, "%s:\n", lazy_ptr_name);
23174 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
23175 fprintf (file, "\t.long %s\n", binder_name);
23179 darwin_x86_file_end (void)
23181 darwin_file_end ();
23184 #endif /* TARGET_MACHO */
23186 /* Order the registers for register allocator. */
23189 x86_order_regs_for_local_alloc (void)
23194 /* First allocate the local general purpose registers. */
23195 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23196 if (GENERAL_REGNO_P (i) && call_used_regs[i])
23197 reg_alloc_order [pos++] = i;
23199 /* Global general purpose registers. */
23200 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
23201 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
23202 reg_alloc_order [pos++] = i;
23204 /* x87 registers come first in case we are doing FP math
23206 if (!TARGET_SSE_MATH)
23207 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23208 reg_alloc_order [pos++] = i;
23210 /* SSE registers. */
23211 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
23212 reg_alloc_order [pos++] = i;
23213 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
23214 reg_alloc_order [pos++] = i;
23216 /* x87 registers. */
23217 if (TARGET_SSE_MATH)
23218 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
23219 reg_alloc_order [pos++] = i;
23221 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
23222 reg_alloc_order [pos++] = i;
23224 /* Initialize the rest of array as we do not allocate some registers
23226 while (pos < FIRST_PSEUDO_REGISTER)
23227 reg_alloc_order [pos++] = 0;
23230 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
23231 struct attribute_spec.handler. */
23233 ix86_handle_abi_attribute (tree *node, tree name,
23234 tree args ATTRIBUTE_UNUSED,
23235 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
23237 if (TREE_CODE (*node) != FUNCTION_TYPE
23238 && TREE_CODE (*node) != METHOD_TYPE
23239 && TREE_CODE (*node) != FIELD_DECL
23240 && TREE_CODE (*node) != TYPE_DECL)
23242 warning (OPT_Wattributes, "%qs attribute only applies to functions",
23243 IDENTIFIER_POINTER (name));
23244 *no_add_attrs = true;
23249 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
23250 IDENTIFIER_POINTER (name));
23251 *no_add_attrs = true;
23255 /* Can combine regparm with all attributes but fastcall. */
23256 if (is_attribute_p ("ms_abi", name))
23258 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
23260 error ("ms_abi and sysv_abi attributes are not compatible");
23265 else if (is_attribute_p ("sysv_abi", name))
23267 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
23269 error ("ms_abi and sysv_abi attributes are not compatible");
23278 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
23279 struct attribute_spec.handler. */
23281 ix86_handle_struct_attribute (tree *node, tree name,
23282 tree args ATTRIBUTE_UNUSED,
23283 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
23286 if (DECL_P (*node))
23288 if (TREE_CODE (*node) == TYPE_DECL)
23289 type = &TREE_TYPE (*node);
23294 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
23295 || TREE_CODE (*type) == UNION_TYPE)))
23297 warning (OPT_Wattributes, "%qs attribute ignored",
23298 IDENTIFIER_POINTER (name));
23299 *no_add_attrs = true;
23302 else if ((is_attribute_p ("ms_struct", name)
23303 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
23304 || ((is_attribute_p ("gcc_struct", name)
23305 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
23307 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
23308 IDENTIFIER_POINTER (name));
23309 *no_add_attrs = true;
23316 ix86_ms_bitfield_layout_p (const_tree record_type)
23318 return (TARGET_MS_BITFIELD_LAYOUT &&
23319 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
23320 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
23323 /* Returns an expression indicating where the this parameter is
23324 located on entry to the FUNCTION. */
23327 x86_this_parameter (tree function)
23329 tree type = TREE_TYPE (function);
23330 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
23335 const int *parm_regs;
23337 if (ix86_function_type_abi (type) == MS_ABI)
23338 parm_regs = x86_64_ms_abi_int_parameter_registers;
23340 parm_regs = x86_64_int_parameter_registers;
23341 return gen_rtx_REG (DImode, parm_regs[aggr]);
23344 nregs = ix86_function_regparm (type, function);
23346 if (nregs > 0 && !stdarg_p (type))
23350 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
23351 regno = aggr ? DX_REG : CX_REG;
23359 return gen_rtx_MEM (SImode,
23360 plus_constant (stack_pointer_rtx, 4));
23363 return gen_rtx_REG (SImode, regno);
23366 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
23369 /* Determine whether x86_output_mi_thunk can succeed. */
23372 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
23373 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
23374 HOST_WIDE_INT vcall_offset, const_tree function)
23376 /* 64-bit can handle anything. */
23380 /* For 32-bit, everything's fine if we have one free register. */
23381 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
23384 /* Need a free register for vcall_offset. */
23388 /* Need a free register for GOT references. */
23389 if (flag_pic && !(*targetm.binds_local_p) (function))
23392 /* Otherwise ok. */
23396 /* Output the assembler code for a thunk function. THUNK_DECL is the
23397 declaration for the thunk function itself, FUNCTION is the decl for
23398 the target function. DELTA is an immediate constant offset to be
23399 added to THIS. If VCALL_OFFSET is nonzero, the word at
23400 *(*this + vcall_offset) should be added to THIS. */
23403 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
23404 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
23405 HOST_WIDE_INT vcall_offset, tree function)
23408 rtx this_param = x86_this_parameter (function);
23411 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
23412 pull it in now and let DELTA benefit. */
23413 if (REG_P (this_param))
23414 this_reg = this_param;
23415 else if (vcall_offset)
23417 /* Put the this parameter into %eax. */
23418 xops[0] = this_param;
23419 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
23420 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
23423 this_reg = NULL_RTX;
23425 /* Adjust the this parameter by a fixed constant. */
23428 xops[0] = GEN_INT (delta);
23429 xops[1] = this_reg ? this_reg : this_param;
23432 if (!x86_64_general_operand (xops[0], DImode))
23434 tmp = gen_rtx_REG (DImode, R10_REG);
23436 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
23438 xops[1] = this_param;
23440 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
23443 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
23446 /* Adjust the this parameter by a value stored in the vtable. */
23450 tmp = gen_rtx_REG (DImode, R10_REG);
23453 int tmp_regno = CX_REG;
23454 if (lookup_attribute ("fastcall",
23455 TYPE_ATTRIBUTES (TREE_TYPE (function))))
23456 tmp_regno = AX_REG;
23457 tmp = gen_rtx_REG (SImode, tmp_regno);
23460 xops[0] = gen_rtx_MEM (Pmode, this_reg);
23462 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
23464 /* Adjust the this parameter. */
23465 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
23466 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
23468 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
23469 xops[0] = GEN_INT (vcall_offset);
23471 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
23472 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
23474 xops[1] = this_reg;
23475 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
23478 /* If necessary, drop THIS back to its stack slot. */
23479 if (this_reg && this_reg != this_param)
23481 xops[0] = this_reg;
23482 xops[1] = this_param;
23483 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
23486 xops[0] = XEXP (DECL_RTL (function), 0);
23489 if (!flag_pic || (*targetm.binds_local_p) (function))
23490 output_asm_insn ("jmp\t%P0", xops);
23491 /* All thunks should be in the same object as their target,
23492 and thus binds_local_p should be true. */
23493 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
23494 gcc_unreachable ();
23497 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
23498 tmp = gen_rtx_CONST (Pmode, tmp);
23499 tmp = gen_rtx_MEM (QImode, tmp);
23501 output_asm_insn ("jmp\t%A0", xops);
23506 if (!flag_pic || (*targetm.binds_local_p) (function))
23507 output_asm_insn ("jmp\t%P0", xops);
23512 rtx sym_ref = XEXP (DECL_RTL (function), 0);
23513 tmp = (gen_rtx_SYMBOL_REF
23515 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
23516 tmp = gen_rtx_MEM (QImode, tmp);
23518 output_asm_insn ("jmp\t%0", xops);
23521 #endif /* TARGET_MACHO */
23523 tmp = gen_rtx_REG (SImode, CX_REG);
23524 output_set_got (tmp, NULL_RTX);
23527 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
23528 output_asm_insn ("jmp\t{*}%1", xops);
23534 x86_file_start (void)
23536 default_file_start ();
23538 darwin_file_start ();
23540 if (X86_FILE_START_VERSION_DIRECTIVE)
23541 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
23542 if (X86_FILE_START_FLTUSED)
23543 fputs ("\t.global\t__fltused\n", asm_out_file);
23544 if (ix86_asm_dialect == ASM_INTEL)
23545 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
23549 x86_field_alignment (tree field, int computed)
23551 enum machine_mode mode;
23552 tree type = TREE_TYPE (field);
23554 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
23556 mode = TYPE_MODE (strip_array_types (type));
23557 if (mode == DFmode || mode == DCmode
23558 || GET_MODE_CLASS (mode) == MODE_INT
23559 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
23560 return MIN (32, computed);
23564 /* Output assembler code to FILE to increment profiler label # LABELNO
23565 for profiling a function entry. */
23567 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
23571 #ifndef NO_PROFILE_COUNTERS
23572 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
23575 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
23576 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
23578 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23582 #ifndef NO_PROFILE_COUNTERS
23583 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
23584 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
23586 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
23590 #ifndef NO_PROFILE_COUNTERS
23591 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
23592 PROFILE_COUNT_REGISTER);
23594 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
23598 /* We don't have exact information about the insn sizes, but we may assume
23599 quite safely that we are informed about all 1 byte insns and memory
23600 address sizes. This is enough to eliminate unnecessary padding in
23604 min_insn_size (rtx insn)
23608 if (!INSN_P (insn) || !active_insn_p (insn))
23611 /* Discard alignments we've emit and jump instructions. */
23612 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
23613 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
23616 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
23617 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
23620 /* Important case - calls are always 5 bytes.
23621 It is common to have many calls in the row. */
23623 && symbolic_reference_mentioned_p (PATTERN (insn))
23624 && !SIBLING_CALL_P (insn))
23626 if (get_attr_length (insn) <= 1)
23629 /* For normal instructions we may rely on the sizes of addresses
23630 and the presence of symbol to require 4 bytes of encoding.
23631 This is not the case for jumps where references are PC relative. */
23632 if (!JUMP_P (insn))
23634 l = get_attr_length_address (insn);
23635 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
23644 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
23648 ix86_avoid_jump_misspredicts (void)
23650 rtx insn, start = get_insns ();
23651 int nbytes = 0, njumps = 0;
23654 /* Look for all minimal intervals of instructions containing 4 jumps.
23655 The intervals are bounded by START and INSN. NBYTES is the total
23656 size of instructions in the interval including INSN and not including
23657 START. When the NBYTES is smaller than 16 bytes, it is possible
23658 that the end of START and INSN ends up in the same 16byte page.
23660 The smallest offset in the page INSN can start is the case where START
23661 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
23662 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
23664 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23667 nbytes += min_insn_size (insn);
23669 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
23670 INSN_UID (insn), min_insn_size (insn));
23672 && GET_CODE (PATTERN (insn)) != ADDR_VEC
23673 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
23681 start = NEXT_INSN (start);
23682 if ((JUMP_P (start)
23683 && GET_CODE (PATTERN (start)) != ADDR_VEC
23684 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
23686 njumps--, isjump = 1;
23689 nbytes -= min_insn_size (start);
23691 gcc_assert (njumps >= 0);
23693 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
23694 INSN_UID (start), INSN_UID (insn), nbytes);
23696 if (njumps == 3 && isjump && nbytes < 16)
23698 int padsize = 15 - nbytes + min_insn_size (insn);
23701 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
23702 INSN_UID (insn), padsize);
23703 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
23708 /* AMD Athlon works faster
23709 when RET is not destination of conditional jump or directly preceded
23710 by other jump instruction. We avoid the penalty by inserting NOP just
23711 before the RET instructions in such cases. */
23713 ix86_pad_returns (void)
23718 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
23720 basic_block bb = e->src;
23721 rtx ret = BB_END (bb);
23723 bool replace = false;
23725 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
23726 || !maybe_hot_bb_p (bb))
23728 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
23729 if (active_insn_p (prev) || LABEL_P (prev))
23731 if (prev && LABEL_P (prev))
23736 FOR_EACH_EDGE (e, ei, bb->preds)
23737 if (EDGE_FREQUENCY (e) && e->src->index >= 0
23738 && !(e->flags & EDGE_FALLTHRU))
23743 prev = prev_active_insn (ret);
23745 && ((JUMP_P (prev) && any_condjump_p (prev))
23748 /* Empty functions get branch mispredict even when the jump destination
23749 is not visible to us. */
23750 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
23755 emit_insn_before (gen_return_internal_long (), ret);
23761 /* Implement machine specific optimizations. We implement padding of returns
23762 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
23766 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
23767 ix86_pad_returns ();
23768 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
23769 ix86_avoid_jump_misspredicts ();
23772 /* Return nonzero when QImode register that must be represented via REX prefix
23775 x86_extended_QIreg_mentioned_p (rtx insn)
23778 extract_insn_cached (insn);
23779 for (i = 0; i < recog_data.n_operands; i++)
23780 if (REG_P (recog_data.operand[i])
23781 && REGNO (recog_data.operand[i]) >= 4)
23786 /* Return nonzero when P points to register encoded via REX prefix.
23787 Called via for_each_rtx. */
23789 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
23791 unsigned int regno;
23794 regno = REGNO (*p);
23795 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
23798 /* Return true when INSN mentions register that must be encoded using REX
23801 x86_extended_reg_mentioned_p (rtx insn)
23803 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
23806 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
23807 optabs would emit if we didn't have TFmode patterns. */
23810 x86_emit_floatuns (rtx operands[2])
23812 rtx neglab, donelab, i0, i1, f0, in, out;
23813 enum machine_mode mode, inmode;
23815 inmode = GET_MODE (operands[1]);
23816 gcc_assert (inmode == SImode || inmode == DImode);
23819 in = force_reg (inmode, operands[1]);
23820 mode = GET_MODE (out);
23821 neglab = gen_label_rtx ();
23822 donelab = gen_label_rtx ();
23823 f0 = gen_reg_rtx (mode);
23825 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
23827 expand_float (out, in, 0);
23829 emit_jump_insn (gen_jump (donelab));
23832 emit_label (neglab);
23834 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
23836 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
23838 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
23840 expand_float (f0, i0, 0);
23842 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
23844 emit_label (donelab);
23847 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23848 with all elements equal to VAR. Return true if successful. */
23851 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
23852 rtx target, rtx val)
23854 enum machine_mode smode, wsmode, wvmode;
23869 val = force_reg (GET_MODE_INNER (mode), val);
23870 x = gen_rtx_VEC_DUPLICATE (mode, val);
23871 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23877 if (TARGET_SSE || TARGET_3DNOW_A)
23879 val = gen_lowpart (SImode, val);
23880 x = gen_rtx_TRUNCATE (HImode, val);
23881 x = gen_rtx_VEC_DUPLICATE (mode, x);
23882 emit_insn (gen_rtx_SET (VOIDmode, target, x));
23904 /* Extend HImode to SImode using a paradoxical SUBREG. */
23905 tmp1 = gen_reg_rtx (SImode);
23906 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23907 /* Insert the SImode value as low element of V4SImode vector. */
23908 tmp2 = gen_reg_rtx (V4SImode);
23909 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23910 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23911 CONST0_RTX (V4SImode),
23913 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23914 /* Cast the V4SImode vector back to a V8HImode vector. */
23915 tmp1 = gen_reg_rtx (V8HImode);
23916 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
23917 /* Duplicate the low short through the whole low SImode word. */
23918 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
23919 /* Cast the V8HImode vector back to a V4SImode vector. */
23920 tmp2 = gen_reg_rtx (V4SImode);
23921 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23922 /* Replicate the low element of the V4SImode vector. */
23923 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23924 /* Cast the V2SImode back to V8HImode, and store in target. */
23925 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
23936 /* Extend QImode to SImode using a paradoxical SUBREG. */
23937 tmp1 = gen_reg_rtx (SImode);
23938 emit_move_insn (tmp1, gen_lowpart (SImode, val));
23939 /* Insert the SImode value as low element of V4SImode vector. */
23940 tmp2 = gen_reg_rtx (V4SImode);
23941 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
23942 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
23943 CONST0_RTX (V4SImode),
23945 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
23946 /* Cast the V4SImode vector back to a V16QImode vector. */
23947 tmp1 = gen_reg_rtx (V16QImode);
23948 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
23949 /* Duplicate the low byte through the whole low SImode word. */
23950 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23951 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
23952 /* Cast the V16QImode vector back to a V4SImode vector. */
23953 tmp2 = gen_reg_rtx (V4SImode);
23954 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
23955 /* Replicate the low element of the V4SImode vector. */
23956 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
23957 /* Cast the V2SImode back to V16QImode, and store in target. */
23958 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
23966 /* Replicate the value once into the next wider mode and recurse. */
23967 val = convert_modes (wsmode, smode, val, true);
23968 x = expand_simple_binop (wsmode, ASHIFT, val,
23969 GEN_INT (GET_MODE_BITSIZE (smode)),
23970 NULL_RTX, 1, OPTAB_LIB_WIDEN);
23971 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
23973 x = gen_reg_rtx (wvmode);
23974 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
23975 gcc_unreachable ();
23976 emit_move_insn (target, gen_lowpart (mode, x));
23984 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
23985 whose ONE_VAR element is VAR, and other elements are zero. Return true
23989 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
23990 rtx target, rtx var, int one_var)
23992 enum machine_mode vsimode;
23995 bool use_vector_set = false;
24000 use_vector_set = TARGET_64BIT && TARGET_SSE4_1;
24005 use_vector_set = TARGET_SSE4_1;
24008 use_vector_set = TARGET_SSE2;
24011 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
24017 if (use_vector_set)
24019 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
24020 var = force_reg (GET_MODE_INNER (mode), var);
24021 ix86_expand_vector_set (mmx_ok, target, var, one_var);
24037 var = force_reg (GET_MODE_INNER (mode), var);
24038 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
24039 emit_insn (gen_rtx_SET (VOIDmode, target, x));
24044 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
24045 new_target = gen_reg_rtx (mode);
24047 new_target = target;
24048 var = force_reg (GET_MODE_INNER (mode), var);
24049 x = gen_rtx_VEC_DUPLICATE (mode, var);
24050 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
24051 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
24054 /* We need to shuffle the value to the correct position, so
24055 create a new pseudo to store the intermediate result. */
24057 /* With SSE2, we can use the integer shuffle insns. */
24058 if (mode != V4SFmode && TARGET_SSE2)
24060 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
24062 GEN_INT (one_var == 1 ? 0 : 1),
24063 GEN_INT (one_var == 2 ? 0 : 1),
24064 GEN_INT (one_var == 3 ? 0 : 1)));
24065 if (target != new_target)
24066 emit_move_insn (target, new_target);
24070 /* Otherwise convert the intermediate result to V4SFmode and
24071 use the SSE1 shuffle instructions. */
24072 if (mode != V4SFmode)
24074 tmp = gen_reg_rtx (V4SFmode);
24075 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
24080 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
24082 GEN_INT (one_var == 1 ? 0 : 1),
24083 GEN_INT (one_var == 2 ? 0+4 : 1+4),
24084 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
24086 if (mode != V4SFmode)
24087 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
24088 else if (tmp != target)
24089 emit_move_insn (target, tmp);
24091 else if (target != new_target)
24092 emit_move_insn (target, new_target);
24097 vsimode = V4SImode;
24103 vsimode = V2SImode;
24109 /* Zero extend the variable element to SImode and recurse. */
24110 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
24112 x = gen_reg_rtx (vsimode);
24113 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
24115 gcc_unreachable ();
24117 emit_move_insn (target, gen_lowpart (mode, x));
24125 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
24126 consisting of the values in VALS. It is known that all elements
24127 except ONE_VAR are constants. Return true if successful. */
24130 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
24131 rtx target, rtx vals, int one_var)
24133 rtx var = XVECEXP (vals, 0, one_var);
24134 enum machine_mode wmode;
24137 const_vec = copy_rtx (vals);
24138 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
24139 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
24147 /* For the two element vectors, it's just as easy to use
24148 the general case. */
24166 /* There's no way to set one QImode entry easily. Combine
24167 the variable value with its adjacent constant value, and
24168 promote to an HImode set. */
24169 x = XVECEXP (vals, 0, one_var ^ 1);
24172 var = convert_modes (HImode, QImode, var, true);
24173 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
24174 NULL_RTX, 1, OPTAB_LIB_WIDEN);
24175 x = GEN_INT (INTVAL (x) & 0xff);
24179 var = convert_modes (HImode, QImode, var, true);
24180 x = gen_int_mode (INTVAL (x) << 8, HImode);
24182 if (x != const0_rtx)
24183 var = expand_simple_binop (HImode, IOR, var, x, var,
24184 1, OPTAB_LIB_WIDEN);
24186 x = gen_reg_rtx (wmode);
24187 emit_move_insn (x, gen_lowpart (wmode, const_vec));
24188 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
24190 emit_move_insn (target, gen_lowpart (mode, x));
24197 emit_move_insn (target, const_vec);
24198 ix86_expand_vector_set (mmx_ok, target, var, one_var);
24202 /* A subroutine of ix86_expand_vector_init_general. Use vector
24203 concatenate to handle the most general case: all values variable,
24204 and none identical. */
24207 ix86_expand_vector_init_concat (enum machine_mode mode,
24208 rtx target, rtx *ops, int n)
24210 enum machine_mode cmode, hmode = VOIDmode;
24211 rtx first[4], second[2];
24239 gcc_unreachable ();
24242 if (!register_operand (ops[1], cmode))
24243 ops[1] = force_reg (cmode, ops[1]);
24244 if (!register_operand (ops[0], cmode))
24245 ops[0] = force_reg (cmode, ops[0]);
24246 emit_insn (gen_rtx_SET (VOIDmode, target,
24247 gen_rtx_VEC_CONCAT (mode, ops[0],
24261 gcc_unreachable ();
24266 /* FIXME: We process inputs backward to help RA. PR 36222. */
24269 for (; i > 0; i -= 2, j--)
24271 first[j] = gen_reg_rtx (cmode);
24272 v = gen_rtvec (2, ops[i - 1], ops[i]);
24273 ix86_expand_vector_init (false, first[j],
24274 gen_rtx_PARALLEL (cmode, v));
24280 gcc_assert (hmode != VOIDmode);
24281 for (i = j = 0; i < n; i += 2, j++)
24283 second[j] = gen_reg_rtx (hmode);
24284 ix86_expand_vector_init_concat (hmode, second [j],
24288 ix86_expand_vector_init_concat (mode, target, second, n);
24291 ix86_expand_vector_init_concat (mode, target, first, n);
24295 gcc_unreachable ();
24299 /* A subroutine of ix86_expand_vector_init_general. Use vector
24300 interleave to handle the most general case: all values variable,
24301 and none identical. */
24304 ix86_expand_vector_init_interleave (enum machine_mode mode,
24305 rtx target, rtx *ops, int n)
24307 enum machine_mode first_imode, second_imode, third_imode;
24310 rtx (*gen_load_even) (rtx, rtx, rtx);
24311 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
24312 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
24317 gen_load_even = gen_vec_setv8hi;
24318 gen_interleave_first_low = gen_vec_interleave_lowv4si;
24319 gen_interleave_second_low = gen_vec_interleave_lowv2di;
24320 first_imode = V4SImode;
24321 second_imode = V2DImode;
24322 third_imode = VOIDmode;
24325 gen_load_even = gen_vec_setv16qi;
24326 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
24327 gen_interleave_second_low = gen_vec_interleave_lowv4si;
24328 first_imode = V8HImode;
24329 second_imode = V4SImode;
24330 third_imode = V2DImode;
24333 gcc_unreachable ();
24336 for (i = 0; i < n; i++)
24338 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
24339 op0 = gen_reg_rtx (SImode);
24340 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
24342 /* Insert the SImode value as low element of V4SImode vector. */
24343 op1 = gen_reg_rtx (V4SImode);
24344 op0 = gen_rtx_VEC_MERGE (V4SImode,
24345 gen_rtx_VEC_DUPLICATE (V4SImode,
24347 CONST0_RTX (V4SImode),
24349 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
24351 /* Cast the V4SImode vector back to a vector in orignal mode. */
24352 op0 = gen_reg_rtx (mode);
24353 emit_move_insn (op0, gen_lowpart (mode, op1));
24355 /* Load even elements into the second positon. */
24356 emit_insn ((*gen_load_even) (op0, ops [i + i + 1],
24359 /* Cast vector to FIRST_IMODE vector. */
24360 ops[i] = gen_reg_rtx (first_imode);
24361 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
24364 /* Interleave low FIRST_IMODE vectors. */
24365 for (i = j = 0; i < n; i += 2, j++)
24367 op0 = gen_reg_rtx (first_imode);
24368 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
24370 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
24371 ops[j] = gen_reg_rtx (second_imode);
24372 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
24375 /* Interleave low SECOND_IMODE vectors. */
24376 switch (second_imode)
24379 for (i = j = 0; i < n / 2; i += 2, j++)
24381 op0 = gen_reg_rtx (second_imode);
24382 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
24385 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
24387 ops[j] = gen_reg_rtx (third_imode);
24388 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
24390 second_imode = V2DImode;
24391 gen_interleave_second_low = gen_vec_interleave_lowv2di;
24395 op0 = gen_reg_rtx (second_imode);
24396 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
24399 /* Cast the SECOND_IMODE vector back to a vector on original
24401 emit_insn (gen_rtx_SET (VOIDmode, target,
24402 gen_lowpart (mode, op0)));
24406 gcc_unreachable ();
24410 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
24411 all values variable, and none identical. */
24414 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
24415 rtx target, rtx vals)
24424 if (!mmx_ok && !TARGET_SSE)
24432 n = GET_MODE_NUNITS (mode);
24433 for (i = 0; i < n; i++)
24434 ops[i] = XVECEXP (vals, 0, i);
24435 ix86_expand_vector_init_concat (mode, target, ops, n);
24439 if (!TARGET_SSE4_1)
24447 n = GET_MODE_NUNITS (mode);
24448 for (i = 0; i < n; i++)
24449 ops[i] = XVECEXP (vals, 0, i);
24450 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
24458 gcc_unreachable ();
24462 int i, j, n_elts, n_words, n_elt_per_word;
24463 enum machine_mode inner_mode;
24464 rtx words[4], shift;
24466 inner_mode = GET_MODE_INNER (mode);
24467 n_elts = GET_MODE_NUNITS (mode);
24468 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
24469 n_elt_per_word = n_elts / n_words;
24470 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
24472 for (i = 0; i < n_words; ++i)
24474 rtx word = NULL_RTX;
24476 for (j = 0; j < n_elt_per_word; ++j)
24478 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
24479 elt = convert_modes (word_mode, inner_mode, elt, true);
24485 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
24486 word, 1, OPTAB_LIB_WIDEN);
24487 word = expand_simple_binop (word_mode, IOR, word, elt,
24488 word, 1, OPTAB_LIB_WIDEN);
24496 emit_move_insn (target, gen_lowpart (mode, words[0]));
24497 else if (n_words == 2)
24499 rtx tmp = gen_reg_rtx (mode);
24500 emit_clobber (tmp);
24501 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
24502 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
24503 emit_move_insn (target, tmp);
24505 else if (n_words == 4)
24507 rtx tmp = gen_reg_rtx (V4SImode);
24508 gcc_assert (word_mode == SImode);
24509 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
24510 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
24511 emit_move_insn (target, gen_lowpart (mode, tmp));
24514 gcc_unreachable ();
24518 /* Initialize vector TARGET via VALS. Suppress the use of MMX
24519 instructions unless MMX_OK is true. */
24522 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
24524 enum machine_mode mode = GET_MODE (target);
24525 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24526 int n_elts = GET_MODE_NUNITS (mode);
24527 int n_var = 0, one_var = -1;
24528 bool all_same = true, all_const_zero = true;
24532 for (i = 0; i < n_elts; ++i)
24534 x = XVECEXP (vals, 0, i);
24535 if (!(CONST_INT_P (x)
24536 || GET_CODE (x) == CONST_DOUBLE
24537 || GET_CODE (x) == CONST_FIXED))
24538 n_var++, one_var = i;
24539 else if (x != CONST0_RTX (inner_mode))
24540 all_const_zero = false;
24541 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
24545 /* Constants are best loaded from the constant pool. */
24548 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
24552 /* If all values are identical, broadcast the value. */
24554 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
24555 XVECEXP (vals, 0, 0)))
24558 /* Values where only one field is non-constant are best loaded from
24559 the pool and overwritten via move later. */
24563 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
24564 XVECEXP (vals, 0, one_var),
24568 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
24572 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
24576 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
24578 enum machine_mode mode = GET_MODE (target);
24579 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24580 bool use_vec_merge = false;
24589 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
24590 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
24592 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
24594 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
24595 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24601 use_vec_merge = TARGET_SSE4_1;
24609 /* For the two element vectors, we implement a VEC_CONCAT with
24610 the extraction of the other element. */
24612 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
24613 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
24616 op0 = val, op1 = tmp;
24618 op0 = tmp, op1 = val;
24620 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
24621 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24626 use_vec_merge = TARGET_SSE4_1;
24633 use_vec_merge = true;
24637 /* tmp = target = A B C D */
24638 tmp = copy_to_reg (target);
24639 /* target = A A B B */
24640 emit_insn (gen_sse_unpcklps (target, target, target));
24641 /* target = X A B B */
24642 ix86_expand_vector_set (false, target, val, 0);
24643 /* target = A X C D */
24644 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24645 GEN_INT (1), GEN_INT (0),
24646 GEN_INT (2+4), GEN_INT (3+4)));
24650 /* tmp = target = A B C D */
24651 tmp = copy_to_reg (target);
24652 /* tmp = X B C D */
24653 ix86_expand_vector_set (false, tmp, val, 0);
24654 /* target = A B X D */
24655 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24656 GEN_INT (0), GEN_INT (1),
24657 GEN_INT (0+4), GEN_INT (3+4)));
24661 /* tmp = target = A B C D */
24662 tmp = copy_to_reg (target);
24663 /* tmp = X B C D */
24664 ix86_expand_vector_set (false, tmp, val, 0);
24665 /* target = A B X D */
24666 emit_insn (gen_sse_shufps_1 (target, target, tmp,
24667 GEN_INT (0), GEN_INT (1),
24668 GEN_INT (2+4), GEN_INT (0+4)));
24672 gcc_unreachable ();
24677 use_vec_merge = TARGET_SSE4_1;
24681 /* Element 0 handled by vec_merge below. */
24684 use_vec_merge = true;
24690 /* With SSE2, use integer shuffles to swap element 0 and ELT,
24691 store into element 0, then shuffle them back. */
24695 order[0] = GEN_INT (elt);
24696 order[1] = const1_rtx;
24697 order[2] = const2_rtx;
24698 order[3] = GEN_INT (3);
24699 order[elt] = const0_rtx;
24701 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24702 order[1], order[2], order[3]));
24704 ix86_expand_vector_set (false, target, val, 0);
24706 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
24707 order[1], order[2], order[3]));
24711 /* For SSE1, we have to reuse the V4SF code. */
24712 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
24713 gen_lowpart (SFmode, val), elt);
24718 use_vec_merge = TARGET_SSE2;
24721 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24725 use_vec_merge = TARGET_SSE4_1;
24735 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
24736 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
24737 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24741 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24743 emit_move_insn (mem, target);
24745 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24746 emit_move_insn (tmp, val);
24748 emit_move_insn (target, mem);
24753 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
24755 enum machine_mode mode = GET_MODE (vec);
24756 enum machine_mode inner_mode = GET_MODE_INNER (mode);
24757 bool use_vec_extr = false;
24770 use_vec_extr = true;
24774 use_vec_extr = TARGET_SSE4_1;
24786 tmp = gen_reg_rtx (mode);
24787 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
24788 GEN_INT (elt), GEN_INT (elt),
24789 GEN_INT (elt+4), GEN_INT (elt+4)));
24793 tmp = gen_reg_rtx (mode);
24794 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
24798 gcc_unreachable ();
24801 use_vec_extr = true;
24806 use_vec_extr = TARGET_SSE4_1;
24820 tmp = gen_reg_rtx (mode);
24821 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
24822 GEN_INT (elt), GEN_INT (elt),
24823 GEN_INT (elt), GEN_INT (elt)));
24827 tmp = gen_reg_rtx (mode);
24828 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
24832 gcc_unreachable ();
24835 use_vec_extr = true;
24840 /* For SSE1, we have to reuse the V4SF code. */
24841 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
24842 gen_lowpart (V4SFmode, vec), elt);
24848 use_vec_extr = TARGET_SSE2;
24851 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
24855 use_vec_extr = TARGET_SSE4_1;
24859 /* ??? Could extract the appropriate HImode element and shift. */
24866 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
24867 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
24869 /* Let the rtl optimizers know about the zero extension performed. */
24870 if (inner_mode == QImode || inner_mode == HImode)
24872 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
24873 target = gen_lowpart (SImode, target);
24876 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
24880 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
24882 emit_move_insn (mem, vec);
24884 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
24885 emit_move_insn (target, tmp);
24889 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
24890 pattern to reduce; DEST is the destination; IN is the input vector. */
24893 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
24895 rtx tmp1, tmp2, tmp3;
24897 tmp1 = gen_reg_rtx (V4SFmode);
24898 tmp2 = gen_reg_rtx (V4SFmode);
24899 tmp3 = gen_reg_rtx (V4SFmode);
24901 emit_insn (gen_sse_movhlps (tmp1, in, in));
24902 emit_insn (fn (tmp2, tmp1, in));
24904 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
24905 GEN_INT (1), GEN_INT (1),
24906 GEN_INT (1+4), GEN_INT (1+4)));
24907 emit_insn (fn (dest, tmp2, tmp3));
24910 /* Target hook for scalar_mode_supported_p. */
24912 ix86_scalar_mode_supported_p (enum machine_mode mode)
24914 if (DECIMAL_FLOAT_MODE_P (mode))
24916 else if (mode == TFmode)
24919 return default_scalar_mode_supported_p (mode);
24922 /* Implements target hook vector_mode_supported_p. */
24924 ix86_vector_mode_supported_p (enum machine_mode mode)
24926 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
24928 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
24930 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
24932 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
24937 /* Target hook for c_mode_for_suffix. */
24938 static enum machine_mode
24939 ix86_c_mode_for_suffix (char suffix)
24949 /* Worker function for TARGET_MD_ASM_CLOBBERS.
24951 We do this in the new i386 backend to maintain source compatibility
24952 with the old cc0-based compiler. */
24955 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
24956 tree inputs ATTRIBUTE_UNUSED,
24959 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
24961 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
24966 /* Implements target vector targetm.asm.encode_section_info. This
24967 is not used by netware. */
24969 static void ATTRIBUTE_UNUSED
24970 ix86_encode_section_info (tree decl, rtx rtl, int first)
24972 default_encode_section_info (decl, rtl, first);
24974 if (TREE_CODE (decl) == VAR_DECL
24975 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
24976 && ix86_in_large_data_p (decl))
24977 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
24980 /* Worker function for REVERSE_CONDITION. */
24983 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
24985 return (mode != CCFPmode && mode != CCFPUmode
24986 ? reverse_condition (code)
24987 : reverse_condition_maybe_unordered (code));
24990 /* Output code to perform an x87 FP register move, from OPERANDS[1]
24994 output_387_reg_move (rtx insn, rtx *operands)
24996 if (REG_P (operands[0]))
24998 if (REG_P (operands[1])
24999 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25001 if (REGNO (operands[0]) == FIRST_STACK_REG)
25002 return output_387_ffreep (operands, 0);
25003 return "fstp\t%y0";
25005 if (STACK_TOP_P (operands[0]))
25006 return "fld%z1\t%y1";
25009 else if (MEM_P (operands[0]))
25011 gcc_assert (REG_P (operands[1]));
25012 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
25013 return "fstp%z0\t%y0";
25016 /* There is no non-popping store to memory for XFmode.
25017 So if we need one, follow the store with a load. */
25018 if (GET_MODE (operands[0]) == XFmode)
25019 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
25021 return "fst%z0\t%y0";
25028 /* Output code to perform a conditional jump to LABEL, if C2 flag in
25029 FP status register is set. */
25032 ix86_emit_fp_unordered_jump (rtx label)
25034 rtx reg = gen_reg_rtx (HImode);
25037 emit_insn (gen_x86_fnstsw_1 (reg));
25039 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
25041 emit_insn (gen_x86_sahf_1 (reg));
25043 temp = gen_rtx_REG (CCmode, FLAGS_REG);
25044 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
25048 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
25050 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
25051 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
25054 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
25055 gen_rtx_LABEL_REF (VOIDmode, label),
25057 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
25059 emit_jump_insn (temp);
25060 predict_jump (REG_BR_PROB_BASE * 10 / 100);
25063 /* Output code to perform a log1p XFmode calculation. */
25065 void ix86_emit_i387_log1p (rtx op0, rtx op1)
25067 rtx label1 = gen_label_rtx ();
25068 rtx label2 = gen_label_rtx ();
25070 rtx tmp = gen_reg_rtx (XFmode);
25071 rtx tmp2 = gen_reg_rtx (XFmode);
25073 emit_insn (gen_absxf2 (tmp, op1));
25074 emit_insn (gen_cmpxf (tmp,
25075 CONST_DOUBLE_FROM_REAL_VALUE (
25076 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
25078 emit_jump_insn (gen_bge (label1));
25080 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
25081 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
25082 emit_jump (label2);
25084 emit_label (label1);
25085 emit_move_insn (tmp, CONST1_RTX (XFmode));
25086 emit_insn (gen_addxf3 (tmp, op1, tmp));
25087 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
25088 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
25090 emit_label (label2);
25093 /* Output code to perform a Newton-Rhapson approximation of a single precision
25094 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
25096 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
25098 rtx x0, x1, e0, e1, two;
25100 x0 = gen_reg_rtx (mode);
25101 e0 = gen_reg_rtx (mode);
25102 e1 = gen_reg_rtx (mode);
25103 x1 = gen_reg_rtx (mode);
25105 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
25107 if (VECTOR_MODE_P (mode))
25108 two = ix86_build_const_vector (SFmode, true, two);
25110 two = force_reg (mode, two);
25112 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
25114 /* x0 = rcp(b) estimate */
25115 emit_insn (gen_rtx_SET (VOIDmode, x0,
25116 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
25119 emit_insn (gen_rtx_SET (VOIDmode, e0,
25120 gen_rtx_MULT (mode, x0, b)));
25122 emit_insn (gen_rtx_SET (VOIDmode, e1,
25123 gen_rtx_MINUS (mode, two, e0)));
25125 emit_insn (gen_rtx_SET (VOIDmode, x1,
25126 gen_rtx_MULT (mode, x0, e1)));
25128 emit_insn (gen_rtx_SET (VOIDmode, res,
25129 gen_rtx_MULT (mode, a, x1)));
25132 /* Output code to perform a Newton-Rhapson approximation of a
25133 single precision floating point [reciprocal] square root. */
25135 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
25138 rtx x0, e0, e1, e2, e3, mthree, mhalf;
25141 x0 = gen_reg_rtx (mode);
25142 e0 = gen_reg_rtx (mode);
25143 e1 = gen_reg_rtx (mode);
25144 e2 = gen_reg_rtx (mode);
25145 e3 = gen_reg_rtx (mode);
25147 real_from_integer (&r, VOIDmode, -3, -1, 0);
25148 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
25150 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
25151 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
25153 if (VECTOR_MODE_P (mode))
25155 mthree = ix86_build_const_vector (SFmode, true, mthree);
25156 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
25159 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
25160 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
25162 /* x0 = rsqrt(a) estimate */
25163 emit_insn (gen_rtx_SET (VOIDmode, x0,
25164 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
25167 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
25172 zero = gen_reg_rtx (mode);
25173 mask = gen_reg_rtx (mode);
25175 zero = force_reg (mode, CONST0_RTX(mode));
25176 emit_insn (gen_rtx_SET (VOIDmode, mask,
25177 gen_rtx_NE (mode, zero, a)));
25179 emit_insn (gen_rtx_SET (VOIDmode, x0,
25180 gen_rtx_AND (mode, x0, mask)));
25184 emit_insn (gen_rtx_SET (VOIDmode, e0,
25185 gen_rtx_MULT (mode, x0, a)));
25187 emit_insn (gen_rtx_SET (VOIDmode, e1,
25188 gen_rtx_MULT (mode, e0, x0)));
25191 mthree = force_reg (mode, mthree);
25192 emit_insn (gen_rtx_SET (VOIDmode, e2,
25193 gen_rtx_PLUS (mode, e1, mthree)));
25195 mhalf = force_reg (mode, mhalf);
25197 /* e3 = -.5 * x0 */
25198 emit_insn (gen_rtx_SET (VOIDmode, e3,
25199 gen_rtx_MULT (mode, x0, mhalf)));
25201 /* e3 = -.5 * e0 */
25202 emit_insn (gen_rtx_SET (VOIDmode, e3,
25203 gen_rtx_MULT (mode, e0, mhalf)));
25204 /* ret = e2 * e3 */
25205 emit_insn (gen_rtx_SET (VOIDmode, res,
25206 gen_rtx_MULT (mode, e2, e3)));
25209 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
25211 static void ATTRIBUTE_UNUSED
25212 i386_solaris_elf_named_section (const char *name, unsigned int flags,
25215 /* With Binutils 2.15, the "@unwind" marker must be specified on
25216 every occurrence of the ".eh_frame" section, not just the first
25219 && strcmp (name, ".eh_frame") == 0)
25221 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
25222 flags & SECTION_WRITE ? "aw" : "a");
25225 default_elf_asm_named_section (name, flags, decl);
25228 /* Return the mangling of TYPE if it is an extended fundamental type. */
25230 static const char *
25231 ix86_mangle_type (const_tree type)
25233 type = TYPE_MAIN_VARIANT (type);
25235 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
25236 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
25239 switch (TYPE_MODE (type))
25242 /* __float128 is "g". */
25245 /* "long double" or __float80 is "e". */
25252 /* For 32-bit code we can save PIC register setup by using
25253 __stack_chk_fail_local hidden function instead of calling
25254 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
25255 register, so it is better to call __stack_chk_fail directly. */
25258 ix86_stack_protect_fail (void)
25260 return TARGET_64BIT
25261 ? default_external_stack_protect_fail ()
25262 : default_hidden_stack_protect_fail ();
25265 /* Select a format to encode pointers in exception handling data. CODE
25266 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
25267 true if the symbol may be affected by dynamic relocations.
25269 ??? All x86 object file formats are capable of representing this.
25270 After all, the relocation needed is the same as for the call insn.
25271 Whether or not a particular assembler allows us to enter such, I
25272 guess we'll have to see. */
25274 asm_preferred_eh_data_format (int code, int global)
25278 int type = DW_EH_PE_sdata8;
25280 || ix86_cmodel == CM_SMALL_PIC
25281 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
25282 type = DW_EH_PE_sdata4;
25283 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
25285 if (ix86_cmodel == CM_SMALL
25286 || (ix86_cmodel == CM_MEDIUM && code))
25287 return DW_EH_PE_udata4;
25288 return DW_EH_PE_absptr;
25291 /* Expand copysign from SIGN to the positive value ABS_VALUE
25292 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
25295 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
25297 enum machine_mode mode = GET_MODE (sign);
25298 rtx sgn = gen_reg_rtx (mode);
25299 if (mask == NULL_RTX)
25301 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
25302 if (!VECTOR_MODE_P (mode))
25304 /* We need to generate a scalar mode mask in this case. */
25305 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25306 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25307 mask = gen_reg_rtx (mode);
25308 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25312 mask = gen_rtx_NOT (mode, mask);
25313 emit_insn (gen_rtx_SET (VOIDmode, sgn,
25314 gen_rtx_AND (mode, mask, sign)));
25315 emit_insn (gen_rtx_SET (VOIDmode, result,
25316 gen_rtx_IOR (mode, abs_value, sgn)));
25319 /* Expand fabs (OP0) and return a new rtx that holds the result. The
25320 mask for masking out the sign-bit is stored in *SMASK, if that is
25323 ix86_expand_sse_fabs (rtx op0, rtx *smask)
25325 enum machine_mode mode = GET_MODE (op0);
25328 xa = gen_reg_rtx (mode);
25329 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
25330 if (!VECTOR_MODE_P (mode))
25332 /* We need to generate a scalar mode mask in this case. */
25333 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
25334 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
25335 mask = gen_reg_rtx (mode);
25336 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
25338 emit_insn (gen_rtx_SET (VOIDmode, xa,
25339 gen_rtx_AND (mode, op0, mask)));
25347 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
25348 swapping the operands if SWAP_OPERANDS is true. The expanded
25349 code is a forward jump to a newly created label in case the
25350 comparison is true. The generated label rtx is returned. */
25352 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
25353 bool swap_operands)
25364 label = gen_label_rtx ();
25365 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
25366 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25367 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
25368 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
25369 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
25370 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
25371 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
25372 JUMP_LABEL (tmp) = label;
25377 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
25378 using comparison code CODE. Operands are swapped for the comparison if
25379 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
25381 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
25382 bool swap_operands)
25384 enum machine_mode mode = GET_MODE (op0);
25385 rtx mask = gen_reg_rtx (mode);
25394 if (mode == DFmode)
25395 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
25396 gen_rtx_fmt_ee (code, mode, op0, op1)));
25398 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
25399 gen_rtx_fmt_ee (code, mode, op0, op1)));
25404 /* Generate and return a rtx of mode MODE for 2**n where n is the number
25405 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
25407 ix86_gen_TWO52 (enum machine_mode mode)
25409 REAL_VALUE_TYPE TWO52r;
25412 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
25413 TWO52 = const_double_from_real_value (TWO52r, mode);
25414 TWO52 = force_reg (mode, TWO52);
25419 /* Expand SSE sequence for computing lround from OP1 storing
25422 ix86_expand_lround (rtx op0, rtx op1)
25424 /* C code for the stuff we're doing below:
25425 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
25428 enum machine_mode mode = GET_MODE (op1);
25429 const struct real_format *fmt;
25430 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25433 /* load nextafter (0.5, 0.0) */
25434 fmt = REAL_MODE_FORMAT (mode);
25435 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25436 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25438 /* adj = copysign (0.5, op1) */
25439 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
25440 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
25442 /* adj = op1 + adj */
25443 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
25445 /* op0 = (imode)adj */
25446 expand_fix (op0, adj, 0);
25449 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
25452 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
25454 /* C code for the stuff we're doing below (for do_floor):
25456 xi -= (double)xi > op1 ? 1 : 0;
25459 enum machine_mode fmode = GET_MODE (op1);
25460 enum machine_mode imode = GET_MODE (op0);
25461 rtx ireg, freg, label, tmp;
25463 /* reg = (long)op1 */
25464 ireg = gen_reg_rtx (imode);
25465 expand_fix (ireg, op1, 0);
25467 /* freg = (double)reg */
25468 freg = gen_reg_rtx (fmode);
25469 expand_float (freg, ireg, 0);
25471 /* ireg = (freg > op1) ? ireg - 1 : ireg */
25472 label = ix86_expand_sse_compare_and_jump (UNLE,
25473 freg, op1, !do_floor);
25474 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
25475 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
25476 emit_move_insn (ireg, tmp);
25478 emit_label (label);
25479 LABEL_NUSES (label) = 1;
25481 emit_move_insn (op0, ireg);
25484 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
25485 result in OPERAND0. */
25487 ix86_expand_rint (rtx operand0, rtx operand1)
25489 /* C code for the stuff we're doing below:
25490 xa = fabs (operand1);
25491 if (!isless (xa, 2**52))
25493 xa = xa + 2**52 - 2**52;
25494 return copysign (xa, operand1);
25496 enum machine_mode mode = GET_MODE (operand0);
25497 rtx res, xa, label, TWO52, mask;
25499 res = gen_reg_rtx (mode);
25500 emit_move_insn (res, operand1);
25502 /* xa = abs (operand1) */
25503 xa = ix86_expand_sse_fabs (res, &mask);
25505 /* if (!isless (xa, TWO52)) goto label; */
25506 TWO52 = ix86_gen_TWO52 (mode);
25507 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25509 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25510 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25512 ix86_sse_copysign_to_positive (res, xa, res, mask);
25514 emit_label (label);
25515 LABEL_NUSES (label) = 1;
25517 emit_move_insn (operand0, res);
25520 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25523 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
25525 /* C code for the stuff we expand below.
25526 double xa = fabs (x), x2;
25527 if (!isless (xa, TWO52))
25529 xa = xa + TWO52 - TWO52;
25530 x2 = copysign (xa, x);
25539 enum machine_mode mode = GET_MODE (operand0);
25540 rtx xa, TWO52, tmp, label, one, res, mask;
25542 TWO52 = ix86_gen_TWO52 (mode);
25544 /* Temporary for holding the result, initialized to the input
25545 operand to ease control flow. */
25546 res = gen_reg_rtx (mode);
25547 emit_move_insn (res, operand1);
25549 /* xa = abs (operand1) */
25550 xa = ix86_expand_sse_fabs (res, &mask);
25552 /* if (!isless (xa, TWO52)) goto label; */
25553 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25555 /* xa = xa + TWO52 - TWO52; */
25556 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25557 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
25559 /* xa = copysign (xa, operand1) */
25560 ix86_sse_copysign_to_positive (xa, xa, res, mask);
25562 /* generate 1.0 or -1.0 */
25563 one = force_reg (mode,
25564 const_double_from_real_value (do_floor
25565 ? dconst1 : dconstm1, mode));
25567 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25568 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25569 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25570 gen_rtx_AND (mode, one, tmp)));
25571 /* We always need to subtract here to preserve signed zero. */
25572 tmp = expand_simple_binop (mode, MINUS,
25573 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25574 emit_move_insn (res, tmp);
25576 emit_label (label);
25577 LABEL_NUSES (label) = 1;
25579 emit_move_insn (operand0, res);
25582 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
25585 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
25587 /* C code for the stuff we expand below.
25588 double xa = fabs (x), x2;
25589 if (!isless (xa, TWO52))
25591 x2 = (double)(long)x;
25598 if (HONOR_SIGNED_ZEROS (mode))
25599 return copysign (x2, x);
25602 enum machine_mode mode = GET_MODE (operand0);
25603 rtx xa, xi, TWO52, tmp, label, one, res, mask;
25605 TWO52 = ix86_gen_TWO52 (mode);
25607 /* Temporary for holding the result, initialized to the input
25608 operand to ease control flow. */
25609 res = gen_reg_rtx (mode);
25610 emit_move_insn (res, operand1);
25612 /* xa = abs (operand1) */
25613 xa = ix86_expand_sse_fabs (res, &mask);
25615 /* if (!isless (xa, TWO52)) goto label; */
25616 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25618 /* xa = (double)(long)x */
25619 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25620 expand_fix (xi, res, 0);
25621 expand_float (xa, xi, 0);
25624 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25626 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
25627 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
25628 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25629 gen_rtx_AND (mode, one, tmp)));
25630 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
25631 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25632 emit_move_insn (res, tmp);
25634 if (HONOR_SIGNED_ZEROS (mode))
25635 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25637 emit_label (label);
25638 LABEL_NUSES (label) = 1;
25640 emit_move_insn (operand0, res);
25643 /* Expand SSE sequence for computing round from OPERAND1 storing
25644 into OPERAND0. Sequence that works without relying on DImode truncation
25645 via cvttsd2siq that is only available on 64bit targets. */
25647 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
25649 /* C code for the stuff we expand below.
25650 double xa = fabs (x), xa2, x2;
25651 if (!isless (xa, TWO52))
25653 Using the absolute value and copying back sign makes
25654 -0.0 -> -0.0 correct.
25655 xa2 = xa + TWO52 - TWO52;
25660 else if (dxa > 0.5)
25662 x2 = copysign (xa2, x);
25665 enum machine_mode mode = GET_MODE (operand0);
25666 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
25668 TWO52 = ix86_gen_TWO52 (mode);
25670 /* Temporary for holding the result, initialized to the input
25671 operand to ease control flow. */
25672 res = gen_reg_rtx (mode);
25673 emit_move_insn (res, operand1);
25675 /* xa = abs (operand1) */
25676 xa = ix86_expand_sse_fabs (res, &mask);
25678 /* if (!isless (xa, TWO52)) goto label; */
25679 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25681 /* xa2 = xa + TWO52 - TWO52; */
25682 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25683 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
25685 /* dxa = xa2 - xa; */
25686 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
25688 /* generate 0.5, 1.0 and -0.5 */
25689 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
25690 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
25691 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
25695 tmp = gen_reg_rtx (mode);
25696 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
25697 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
25698 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25699 gen_rtx_AND (mode, one, tmp)));
25700 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25701 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
25702 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
25703 emit_insn (gen_rtx_SET (VOIDmode, tmp,
25704 gen_rtx_AND (mode, one, tmp)));
25705 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
25707 /* res = copysign (xa2, operand1) */
25708 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
25710 emit_label (label);
25711 LABEL_NUSES (label) = 1;
25713 emit_move_insn (operand0, res);
25716 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25719 ix86_expand_trunc (rtx operand0, rtx operand1)
25721 /* C code for SSE variant we expand below.
25722 double xa = fabs (x), x2;
25723 if (!isless (xa, TWO52))
25725 x2 = (double)(long)x;
25726 if (HONOR_SIGNED_ZEROS (mode))
25727 return copysign (x2, x);
25730 enum machine_mode mode = GET_MODE (operand0);
25731 rtx xa, xi, TWO52, label, res, mask;
25733 TWO52 = ix86_gen_TWO52 (mode);
25735 /* Temporary for holding the result, initialized to the input
25736 operand to ease control flow. */
25737 res = gen_reg_rtx (mode);
25738 emit_move_insn (res, operand1);
25740 /* xa = abs (operand1) */
25741 xa = ix86_expand_sse_fabs (res, &mask);
25743 /* if (!isless (xa, TWO52)) goto label; */
25744 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25746 /* x = (double)(long)x */
25747 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25748 expand_fix (xi, res, 0);
25749 expand_float (res, xi, 0);
25751 if (HONOR_SIGNED_ZEROS (mode))
25752 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
25754 emit_label (label);
25755 LABEL_NUSES (label) = 1;
25757 emit_move_insn (operand0, res);
25760 /* Expand SSE sequence for computing trunc from OPERAND1 storing
25763 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
25765 enum machine_mode mode = GET_MODE (operand0);
25766 rtx xa, mask, TWO52, label, one, res, smask, tmp;
25768 /* C code for SSE variant we expand below.
25769 double xa = fabs (x), x2;
25770 if (!isless (xa, TWO52))
25772 xa2 = xa + TWO52 - TWO52;
25776 x2 = copysign (xa2, x);
25780 TWO52 = ix86_gen_TWO52 (mode);
25782 /* Temporary for holding the result, initialized to the input
25783 operand to ease control flow. */
25784 res = gen_reg_rtx (mode);
25785 emit_move_insn (res, operand1);
25787 /* xa = abs (operand1) */
25788 xa = ix86_expand_sse_fabs (res, &smask);
25790 /* if (!isless (xa, TWO52)) goto label; */
25791 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25793 /* res = xa + TWO52 - TWO52; */
25794 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
25795 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
25796 emit_move_insn (res, tmp);
25799 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
25801 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
25802 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
25803 emit_insn (gen_rtx_SET (VOIDmode, mask,
25804 gen_rtx_AND (mode, mask, one)));
25805 tmp = expand_simple_binop (mode, MINUS,
25806 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
25807 emit_move_insn (res, tmp);
25809 /* res = copysign (res, operand1) */
25810 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
25812 emit_label (label);
25813 LABEL_NUSES (label) = 1;
25815 emit_move_insn (operand0, res);
25818 /* Expand SSE sequence for computing round from OPERAND1 storing
25821 ix86_expand_round (rtx operand0, rtx operand1)
25823 /* C code for the stuff we're doing below:
25824 double xa = fabs (x);
25825 if (!isless (xa, TWO52))
25827 xa = (double)(long)(xa + nextafter (0.5, 0.0));
25828 return copysign (xa, x);
25830 enum machine_mode mode = GET_MODE (operand0);
25831 rtx res, TWO52, xa, label, xi, half, mask;
25832 const struct real_format *fmt;
25833 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
25835 /* Temporary for holding the result, initialized to the input
25836 operand to ease control flow. */
25837 res = gen_reg_rtx (mode);
25838 emit_move_insn (res, operand1);
25840 TWO52 = ix86_gen_TWO52 (mode);
25841 xa = ix86_expand_sse_fabs (res, &mask);
25842 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
25844 /* load nextafter (0.5, 0.0) */
25845 fmt = REAL_MODE_FORMAT (mode);
25846 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
25847 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
25849 /* xa = xa + 0.5 */
25850 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
25851 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
25853 /* xa = (double)(int64_t)xa */
25854 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
25855 expand_fix (xi, xa, 0);
25856 expand_float (xa, xi, 0);
25858 /* res = copysign (xa, operand1) */
25859 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
25861 emit_label (label);
25862 LABEL_NUSES (label) = 1;
25864 emit_move_insn (operand0, res);
25868 /* Validate whether a SSE5 instruction is valid or not.
25869 OPERANDS is the array of operands.
25870 NUM is the number of operands.
25871 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
25872 NUM_MEMORY is the maximum number of memory operands to accept.
25873 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
25876 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
25877 bool uses_oc0, int num_memory, bool commutative)
25883 /* Count the number of memory arguments */
25886 for (i = 0; i < num; i++)
25888 enum machine_mode mode = GET_MODE (operands[i]);
25889 if (register_operand (operands[i], mode))
25892 else if (memory_operand (operands[i], mode))
25894 mem_mask |= (1 << i);
25900 rtx pattern = PATTERN (insn);
25902 /* allow 0 for pcmov */
25903 if (GET_CODE (pattern) != SET
25904 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
25906 || operands[i] != CONST0_RTX (mode))
25911 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
25912 a memory operation. */
25913 if (num_memory < 0)
25915 num_memory = -num_memory;
25916 if ((mem_mask & (1 << (num-1))) != 0)
25918 mem_mask &= ~(1 << (num-1));
25923 /* If there were no memory operations, allow the insn */
25927 /* Do not allow the destination register to be a memory operand. */
25928 else if (mem_mask & (1 << 0))
25931 /* If there are too many memory operations, disallow the instruction. While
25932 the hardware only allows 1 memory reference, before register allocation
25933 for some insns, we allow two memory operations sometimes in order to allow
25934 code like the following to be optimized:
25936 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
25938 or similar cases that are vectorized into using the fmaddss
25940 else if (mem_count > num_memory)
25943 /* Don't allow more than one memory operation if not optimizing. */
25944 else if (mem_count > 1 && !optimize)
25947 else if (num == 4 && mem_count == 1)
25949 /* formats (destination is the first argument), example fmaddss:
25950 xmm1, xmm1, xmm2, xmm3/mem
25951 xmm1, xmm1, xmm2/mem, xmm3
25952 xmm1, xmm2, xmm3/mem, xmm1
25953 xmm1, xmm2/mem, xmm3, xmm1 */
25955 return ((mem_mask == (1 << 1))
25956 || (mem_mask == (1 << 2))
25957 || (mem_mask == (1 << 3)));
25959 /* format, example pmacsdd:
25960 xmm1, xmm2, xmm3/mem, xmm1 */
25962 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
25964 return (mem_mask == (1 << 2));
25967 else if (num == 4 && num_memory == 2)
25969 /* If there are two memory operations, we can load one of the memory ops
25970 into the destination register. This is for optimizing the
25971 multiply/add ops, which the combiner has optimized both the multiply
25972 and the add insns to have a memory operation. We have to be careful
25973 that the destination doesn't overlap with the inputs. */
25974 rtx op0 = operands[0];
25976 if (reg_mentioned_p (op0, operands[1])
25977 || reg_mentioned_p (op0, operands[2])
25978 || reg_mentioned_p (op0, operands[3]))
25981 /* formats (destination is the first argument), example fmaddss:
25982 xmm1, xmm1, xmm2, xmm3/mem
25983 xmm1, xmm1, xmm2/mem, xmm3
25984 xmm1, xmm2, xmm3/mem, xmm1
25985 xmm1, xmm2/mem, xmm3, xmm1
25987 For the oc0 case, we will load either operands[1] or operands[3] into
25988 operands[0], so any combination of 2 memory operands is ok. */
25992 /* format, example pmacsdd:
25993 xmm1, xmm2, xmm3/mem, xmm1
25995 For the integer multiply/add instructions be more restrictive and
25996 require operands[2] and operands[3] to be the memory operands. */
25998 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
26000 return (mem_mask == ((1 << 2) | (1 << 3)));
26003 else if (num == 3 && num_memory == 1)
26005 /* formats, example protb:
26006 xmm1, xmm2, xmm3/mem
26007 xmm1, xmm2/mem, xmm3 */
26009 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
26011 /* format, example comeq:
26012 xmm1, xmm2, xmm3/mem */
26014 return (mem_mask == (1 << 2));
26018 gcc_unreachable ();
26024 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
26025 hardware will allow by using the destination register to load one of the
26026 memory operations. Presently this is used by the multiply/add routines to
26027 allow 2 memory references. */
26030 ix86_expand_sse5_multiple_memory (rtx operands[],
26032 enum machine_mode mode)
26034 rtx op0 = operands[0];
26036 || memory_operand (op0, mode)
26037 || reg_mentioned_p (op0, operands[1])
26038 || reg_mentioned_p (op0, operands[2])
26039 || reg_mentioned_p (op0, operands[3]))
26040 gcc_unreachable ();
26042 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
26043 the destination register. */
26044 if (memory_operand (operands[1], mode))
26046 emit_move_insn (op0, operands[1]);
26049 else if (memory_operand (operands[3], mode))
26051 emit_move_insn (op0, operands[3]);
26055 gcc_unreachable ();
26061 /* Table of valid machine attributes. */
26062 static const struct attribute_spec ix86_attribute_table[] =
26064 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
26065 /* Stdcall attribute says callee is responsible for popping arguments
26066 if they are not variable. */
26067 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
26068 /* Fastcall attribute says callee is responsible for popping arguments
26069 if they are not variable. */
26070 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
26071 /* Cdecl attribute says the callee is a normal C declaration */
26072 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
26073 /* Regparm attribute specifies how many integer arguments are to be
26074 passed in registers. */
26075 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
26076 /* Sseregparm attribute says we are using x86_64 calling conventions
26077 for FP arguments. */
26078 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
26079 /* force_align_arg_pointer says this function realigns the stack at entry. */
26080 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
26081 false, true, true, ix86_handle_cconv_attribute },
26082 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26083 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
26084 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
26085 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
26087 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
26088 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
26089 #ifdef SUBTARGET_ATTRIBUTE_TABLE
26090 SUBTARGET_ATTRIBUTE_TABLE,
26092 /* ms_abi and sysv_abi calling convention function attributes. */
26093 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
26094 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
26096 { NULL, 0, 0, false, false, false, NULL }
26099 /* Implement targetm.vectorize.builtin_vectorization_cost. */
26101 x86_builtin_vectorization_cost (bool runtime_test)
26103 /* If the branch of the runtime test is taken - i.e. - the vectorized
26104 version is skipped - this incurs a misprediction cost (because the
26105 vectorized version is expected to be the fall-through). So we subtract
26106 the latency of a mispredicted branch from the costs that are incured
26107 when the vectorized version is executed.
26109 TODO: The values in individual target tables have to be tuned or new
26110 fields may be needed. For eg. on K8, the default branch path is the
26111 not-taken path. If the taken path is predicted correctly, the minimum
26112 penalty of going down the taken-path is 1 cycle. If the taken-path is
26113 not predicted correctly, then the minimum penalty is 10 cycles. */
26117 return (-(ix86_cost->cond_taken_branch_cost));
26123 /* This function returns the calling abi specific va_list type node.
26124 It returns the FNDECL specific va_list type. */
26127 ix86_fn_abi_va_list (tree fndecl)
26132 return va_list_type_node;
26133 gcc_assert (fndecl != NULL_TREE);
26134 abi = ix86_function_abi ((const_tree) fndecl);
26137 return ms_va_list_type_node;
26139 return sysv_va_list_type_node;
26142 /* Returns the canonical va_list type specified by TYPE. If there
26143 is no valid TYPE provided, it return NULL_TREE. */
26146 ix86_canonical_va_list_type (tree type)
26150 /* Resolve references and pointers to va_list type. */
26151 if (INDIRECT_REF_P (type))
26152 type = TREE_TYPE (type);
26153 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
26154 type = TREE_TYPE (type);
26158 wtype = va_list_type_node;
26159 gcc_assert (wtype != NULL_TREE);
26161 if (TREE_CODE (wtype) == ARRAY_TYPE)
26163 /* If va_list is an array type, the argument may have decayed
26164 to a pointer type, e.g. by being passed to another function.
26165 In that case, unwrap both types so that we can compare the
26166 underlying records. */
26167 if (TREE_CODE (htype) == ARRAY_TYPE
26168 || POINTER_TYPE_P (htype))
26170 wtype = TREE_TYPE (wtype);
26171 htype = TREE_TYPE (htype);
26174 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
26175 return va_list_type_node;
26176 wtype = sysv_va_list_type_node;
26177 gcc_assert (wtype != NULL_TREE);
26179 if (TREE_CODE (wtype) == ARRAY_TYPE)
26181 /* If va_list is an array type, the argument may have decayed
26182 to a pointer type, e.g. by being passed to another function.
26183 In that case, unwrap both types so that we can compare the
26184 underlying records. */
26185 if (TREE_CODE (htype) == ARRAY_TYPE
26186 || POINTER_TYPE_P (htype))
26188 wtype = TREE_TYPE (wtype);
26189 htype = TREE_TYPE (htype);
26192 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
26193 return sysv_va_list_type_node;
26194 wtype = ms_va_list_type_node;
26195 gcc_assert (wtype != NULL_TREE);
26197 if (TREE_CODE (wtype) == ARRAY_TYPE)
26199 /* If va_list is an array type, the argument may have decayed
26200 to a pointer type, e.g. by being passed to another function.
26201 In that case, unwrap both types so that we can compare the
26202 underlying records. */
26203 if (TREE_CODE (htype) == ARRAY_TYPE
26204 || POINTER_TYPE_P (htype))
26206 wtype = TREE_TYPE (wtype);
26207 htype = TREE_TYPE (htype);
26210 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
26211 return ms_va_list_type_node;
26214 return std_canonical_va_list_type (type);
26217 /* Iterate through the target-specific builtin types for va_list.
26218 IDX denotes the iterator, *PTREE is set to the result type of
26219 the va_list builtin, and *PNAME to its internal type.
26220 Returns zero if there is no element for this index, otherwise
26221 IDX should be increased upon the next call.
26222 Note, do not iterate a base builtin's name like __builtin_va_list.
26223 Used from c_common_nodes_and_builtins. */
26226 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
26232 *ptree = ms_va_list_type_node;
26233 *pname = "__builtin_ms_va_list";
26236 *ptree = sysv_va_list_type_node;
26237 *pname = "__builtin_sysv_va_list";
26245 /* Initialize the GCC target structure. */
26246 #undef TARGET_RETURN_IN_MEMORY
26247 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
26249 #undef TARGET_ATTRIBUTE_TABLE
26250 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
26251 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26252 # undef TARGET_MERGE_DECL_ATTRIBUTES
26253 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
26256 #undef TARGET_COMP_TYPE_ATTRIBUTES
26257 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
26259 #undef TARGET_INIT_BUILTINS
26260 #define TARGET_INIT_BUILTINS ix86_init_builtins
26261 #undef TARGET_EXPAND_BUILTIN
26262 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
26264 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
26265 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
26266 ix86_builtin_vectorized_function
26268 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
26269 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
26271 #undef TARGET_BUILTIN_RECIPROCAL
26272 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
26274 #undef TARGET_ASM_FUNCTION_EPILOGUE
26275 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
26277 #undef TARGET_ENCODE_SECTION_INFO
26278 #ifndef SUBTARGET_ENCODE_SECTION_INFO
26279 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
26281 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
26284 #undef TARGET_ASM_OPEN_PAREN
26285 #define TARGET_ASM_OPEN_PAREN ""
26286 #undef TARGET_ASM_CLOSE_PAREN
26287 #define TARGET_ASM_CLOSE_PAREN ""
26289 #undef TARGET_ASM_ALIGNED_HI_OP
26290 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
26291 #undef TARGET_ASM_ALIGNED_SI_OP
26292 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
26294 #undef TARGET_ASM_ALIGNED_DI_OP
26295 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
26298 #undef TARGET_ASM_UNALIGNED_HI_OP
26299 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
26300 #undef TARGET_ASM_UNALIGNED_SI_OP
26301 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
26302 #undef TARGET_ASM_UNALIGNED_DI_OP
26303 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
26305 #undef TARGET_SCHED_ADJUST_COST
26306 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
26307 #undef TARGET_SCHED_ISSUE_RATE
26308 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
26309 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
26310 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
26311 ia32_multipass_dfa_lookahead
26313 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
26314 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
26317 #undef TARGET_HAVE_TLS
26318 #define TARGET_HAVE_TLS true
26320 #undef TARGET_CANNOT_FORCE_CONST_MEM
26321 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
26322 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
26323 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
26325 #undef TARGET_DELEGITIMIZE_ADDRESS
26326 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
26328 #undef TARGET_MS_BITFIELD_LAYOUT_P
26329 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
26332 #undef TARGET_BINDS_LOCAL_P
26333 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
26335 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
26336 #undef TARGET_BINDS_LOCAL_P
26337 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
26340 #undef TARGET_ASM_OUTPUT_MI_THUNK
26341 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
26342 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
26343 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
26345 #undef TARGET_ASM_FILE_START
26346 #define TARGET_ASM_FILE_START x86_file_start
26348 #undef TARGET_DEFAULT_TARGET_FLAGS
26349 #define TARGET_DEFAULT_TARGET_FLAGS \
26351 | TARGET_SUBTARGET_DEFAULT \
26352 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
26354 #undef TARGET_HANDLE_OPTION
26355 #define TARGET_HANDLE_OPTION ix86_handle_option
26357 #undef TARGET_RTX_COSTS
26358 #define TARGET_RTX_COSTS ix86_rtx_costs
26359 #undef TARGET_ADDRESS_COST
26360 #define TARGET_ADDRESS_COST ix86_address_cost
26362 #undef TARGET_FIXED_CONDITION_CODE_REGS
26363 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
26364 #undef TARGET_CC_MODES_COMPATIBLE
26365 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
26367 #undef TARGET_MACHINE_DEPENDENT_REORG
26368 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
26370 #undef TARGET_BUILD_BUILTIN_VA_LIST
26371 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
26373 #undef TARGET_FN_ABI_VA_LIST
26374 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
26376 #undef TARGET_CANONICAL_VA_LIST_TYPE
26377 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
26379 #undef TARGET_EXPAND_BUILTIN_VA_START
26380 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
26382 #undef TARGET_MD_ASM_CLOBBERS
26383 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
26385 #undef TARGET_PROMOTE_PROTOTYPES
26386 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
26387 #undef TARGET_STRUCT_VALUE_RTX
26388 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
26389 #undef TARGET_SETUP_INCOMING_VARARGS
26390 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
26391 #undef TARGET_MUST_PASS_IN_STACK
26392 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
26393 #undef TARGET_PASS_BY_REFERENCE
26394 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
26395 #undef TARGET_INTERNAL_ARG_POINTER
26396 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
26397 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
26398 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
26399 #undef TARGET_STRICT_ARGUMENT_NAMING
26400 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
26402 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
26403 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
26405 #undef TARGET_SCALAR_MODE_SUPPORTED_P
26406 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
26408 #undef TARGET_VECTOR_MODE_SUPPORTED_P
26409 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
26411 #undef TARGET_C_MODE_FOR_SUFFIX
26412 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
26415 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
26416 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
26419 #ifdef SUBTARGET_INSERT_ATTRIBUTES
26420 #undef TARGET_INSERT_ATTRIBUTES
26421 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
26424 #undef TARGET_MANGLE_TYPE
26425 #define TARGET_MANGLE_TYPE ix86_mangle_type
26427 #undef TARGET_STACK_PROTECT_FAIL
26428 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
26430 #undef TARGET_FUNCTION_VALUE
26431 #define TARGET_FUNCTION_VALUE ix86_function_value
26433 #undef TARGET_SECONDARY_RELOAD
26434 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
26436 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
26437 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
26439 struct gcc_target targetm = TARGET_INITIALIZER;
26441 #include "gt-i386.h"