1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 static int x86_builtin_vectorization_cost (bool);
58 static rtx legitimize_dllimport_symbol (rtx, bool);
60 #ifndef CHECK_STACK_LIMIT
61 #define CHECK_STACK_LIMIT (-1)
64 /* Return index of given mode in mult and division cost tables. */
65 #define MODE_INDEX(mode) \
66 ((mode) == QImode ? 0 \
67 : (mode) == HImode ? 1 \
68 : (mode) == SImode ? 2 \
69 : (mode) == DImode ? 3 \
72 /* Processor costs (relative to an add) */
73 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
74 #define COSTS_N_BYTES(N) ((N) * 2)
76 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
79 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
80 COSTS_N_BYTES (2), /* cost of an add instruction */
81 COSTS_N_BYTES (3), /* cost of a lea instruction */
82 COSTS_N_BYTES (2), /* variable shift costs */
83 COSTS_N_BYTES (3), /* constant shift costs */
84 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
85 COSTS_N_BYTES (3), /* HI */
86 COSTS_N_BYTES (3), /* SI */
87 COSTS_N_BYTES (3), /* DI */
88 COSTS_N_BYTES (5)}, /* other */
89 0, /* cost of multiply per each bit set */
90 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
91 COSTS_N_BYTES (3), /* HI */
92 COSTS_N_BYTES (3), /* SI */
93 COSTS_N_BYTES (3), /* DI */
94 COSTS_N_BYTES (5)}, /* other */
95 COSTS_N_BYTES (3), /* cost of movsx */
96 COSTS_N_BYTES (3), /* cost of movzx */
99 2, /* cost for loading QImode using movzbl */
100 {2, 2, 2}, /* cost of loading integer registers
101 in QImode, HImode and SImode.
102 Relative to reg-reg move (2). */
103 {2, 2, 2}, /* cost of storing integer registers */
104 2, /* cost of reg,reg fld/fst */
105 {2, 2, 2}, /* cost of loading fp registers
106 in SFmode, DFmode and XFmode */
107 {2, 2, 2}, /* cost of storing fp registers
108 in SFmode, DFmode and XFmode */
109 3, /* cost of moving MMX register */
110 {3, 3}, /* cost of loading MMX registers
111 in SImode and DImode */
112 {3, 3}, /* cost of storing MMX registers
113 in SImode and DImode */
114 3, /* cost of moving SSE register */
115 {3, 3, 3}, /* cost of loading SSE registers
116 in SImode, DImode and TImode */
117 {3, 3, 3}, /* cost of storing SSE registers
118 in SImode, DImode and TImode */
119 3, /* MMX or SSE register to integer */
120 0, /* size of l1 cache */
121 0, /* size of l2 cache */
122 0, /* size of prefetch block */
123 0, /* number of parallel prefetches */
125 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
126 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
127 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
128 COSTS_N_BYTES (2), /* cost of FABS instruction. */
129 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
130 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
131 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
132 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
133 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
134 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
135 1, /* scalar_stmt_cost. */
136 1, /* scalar load_cost. */
137 1, /* scalar_store_cost. */
138 1, /* vec_stmt_cost. */
139 1, /* vec_to_scalar_cost. */
140 1, /* scalar_to_vec_cost. */
141 1, /* vec_align_load_cost. */
142 1, /* vec_unalign_load_cost. */
143 1, /* vec_store_cost. */
144 1, /* cond_taken_branch_cost. */
145 1, /* cond_not_taken_branch_cost. */
148 /* Processor costs (relative to an add) */
150 struct processor_costs i386_cost = { /* 386 specific costs */
151 COSTS_N_INSNS (1), /* cost of an add instruction */
152 COSTS_N_INSNS (1), /* cost of a lea instruction */
153 COSTS_N_INSNS (3), /* variable shift costs */
154 COSTS_N_INSNS (2), /* constant shift costs */
155 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
156 COSTS_N_INSNS (6), /* HI */
157 COSTS_N_INSNS (6), /* SI */
158 COSTS_N_INSNS (6), /* DI */
159 COSTS_N_INSNS (6)}, /* other */
160 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
161 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
162 COSTS_N_INSNS (23), /* HI */
163 COSTS_N_INSNS (23), /* SI */
164 COSTS_N_INSNS (23), /* DI */
165 COSTS_N_INSNS (23)}, /* other */
166 COSTS_N_INSNS (3), /* cost of movsx */
167 COSTS_N_INSNS (2), /* cost of movzx */
168 15, /* "large" insn */
170 4, /* cost for loading QImode using movzbl */
171 {2, 4, 2}, /* cost of loading integer registers
172 in QImode, HImode and SImode.
173 Relative to reg-reg move (2). */
174 {2, 4, 2}, /* cost of storing integer registers */
175 2, /* cost of reg,reg fld/fst */
176 {8, 8, 8}, /* cost of loading fp registers
177 in SFmode, DFmode and XFmode */
178 {8, 8, 8}, /* cost of storing fp registers
179 in SFmode, DFmode and XFmode */
180 2, /* cost of moving MMX register */
181 {4, 8}, /* cost of loading MMX registers
182 in SImode and DImode */
183 {4, 8}, /* cost of storing MMX registers
184 in SImode and DImode */
185 2, /* cost of moving SSE register */
186 {4, 8, 16}, /* cost of loading SSE registers
187 in SImode, DImode and TImode */
188 {4, 8, 16}, /* cost of storing SSE registers
189 in SImode, DImode and TImode */
190 3, /* MMX or SSE register to integer */
191 0, /* size of l1 cache */
192 0, /* size of l2 cache */
193 0, /* size of prefetch block */
194 0, /* number of parallel prefetches */
196 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
197 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
198 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
199 COSTS_N_INSNS (22), /* cost of FABS instruction. */
200 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
201 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
202 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
203 DUMMY_STRINGOP_ALGS},
204 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
205 DUMMY_STRINGOP_ALGS},
206 1, /* scalar_stmt_cost. */
207 1, /* scalar load_cost. */
208 1, /* scalar_store_cost. */
209 1, /* vec_stmt_cost. */
210 1, /* vec_to_scalar_cost. */
211 1, /* scalar_to_vec_cost. */
212 1, /* vec_align_load_cost. */
213 2, /* vec_unalign_load_cost. */
214 1, /* vec_store_cost. */
215 3, /* cond_taken_branch_cost. */
216 1, /* cond_not_taken_branch_cost. */
220 struct processor_costs i486_cost = { /* 486 specific costs */
221 COSTS_N_INSNS (1), /* cost of an add instruction */
222 COSTS_N_INSNS (1), /* cost of a lea instruction */
223 COSTS_N_INSNS (3), /* variable shift costs */
224 COSTS_N_INSNS (2), /* constant shift costs */
225 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
226 COSTS_N_INSNS (12), /* HI */
227 COSTS_N_INSNS (12), /* SI */
228 COSTS_N_INSNS (12), /* DI */
229 COSTS_N_INSNS (12)}, /* other */
230 1, /* cost of multiply per each bit set */
231 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
232 COSTS_N_INSNS (40), /* HI */
233 COSTS_N_INSNS (40), /* SI */
234 COSTS_N_INSNS (40), /* DI */
235 COSTS_N_INSNS (40)}, /* other */
236 COSTS_N_INSNS (3), /* cost of movsx */
237 COSTS_N_INSNS (2), /* cost of movzx */
238 15, /* "large" insn */
240 4, /* cost for loading QImode using movzbl */
241 {2, 4, 2}, /* cost of loading integer registers
242 in QImode, HImode and SImode.
243 Relative to reg-reg move (2). */
244 {2, 4, 2}, /* cost of storing integer registers */
245 2, /* cost of reg,reg fld/fst */
246 {8, 8, 8}, /* cost of loading fp registers
247 in SFmode, DFmode and XFmode */
248 {8, 8, 8}, /* cost of storing fp registers
249 in SFmode, DFmode and XFmode */
250 2, /* cost of moving MMX register */
251 {4, 8}, /* cost of loading MMX registers
252 in SImode and DImode */
253 {4, 8}, /* cost of storing MMX registers
254 in SImode and DImode */
255 2, /* cost of moving SSE register */
256 {4, 8, 16}, /* cost of loading SSE registers
257 in SImode, DImode and TImode */
258 {4, 8, 16}, /* cost of storing SSE registers
259 in SImode, DImode and TImode */
260 3, /* MMX or SSE register to integer */
261 4, /* size of l1 cache. 486 has 8kB cache
262 shared for code and data, so 4kB is
263 not really precise. */
264 4, /* size of l2 cache */
265 0, /* size of prefetch block */
266 0, /* number of parallel prefetches */
268 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
269 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
270 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
271 COSTS_N_INSNS (3), /* cost of FABS instruction. */
272 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
273 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
274 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
275 DUMMY_STRINGOP_ALGS},
276 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
277 DUMMY_STRINGOP_ALGS},
278 1, /* scalar_stmt_cost. */
279 1, /* scalar load_cost. */
280 1, /* scalar_store_cost. */
281 1, /* vec_stmt_cost. */
282 1, /* vec_to_scalar_cost. */
283 1, /* scalar_to_vec_cost. */
284 1, /* vec_align_load_cost. */
285 2, /* vec_unalign_load_cost. */
286 1, /* vec_store_cost. */
287 3, /* cond_taken_branch_cost. */
288 1, /* cond_not_taken_branch_cost. */
292 struct processor_costs pentium_cost = {
293 COSTS_N_INSNS (1), /* cost of an add instruction */
294 COSTS_N_INSNS (1), /* cost of a lea instruction */
295 COSTS_N_INSNS (4), /* variable shift costs */
296 COSTS_N_INSNS (1), /* constant shift costs */
297 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
298 COSTS_N_INSNS (11), /* HI */
299 COSTS_N_INSNS (11), /* SI */
300 COSTS_N_INSNS (11), /* DI */
301 COSTS_N_INSNS (11)}, /* other */
302 0, /* cost of multiply per each bit set */
303 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
304 COSTS_N_INSNS (25), /* HI */
305 COSTS_N_INSNS (25), /* SI */
306 COSTS_N_INSNS (25), /* DI */
307 COSTS_N_INSNS (25)}, /* other */
308 COSTS_N_INSNS (3), /* cost of movsx */
309 COSTS_N_INSNS (2), /* cost of movzx */
310 8, /* "large" insn */
312 6, /* cost for loading QImode using movzbl */
313 {2, 4, 2}, /* cost of loading integer registers
314 in QImode, HImode and SImode.
315 Relative to reg-reg move (2). */
316 {2, 4, 2}, /* cost of storing integer registers */
317 2, /* cost of reg,reg fld/fst */
318 {2, 2, 6}, /* cost of loading fp registers
319 in SFmode, DFmode and XFmode */
320 {4, 4, 6}, /* cost of storing fp registers
321 in SFmode, DFmode and XFmode */
322 8, /* cost of moving MMX register */
323 {8, 8}, /* cost of loading MMX registers
324 in SImode and DImode */
325 {8, 8}, /* cost of storing MMX registers
326 in SImode and DImode */
327 2, /* cost of moving SSE register */
328 {4, 8, 16}, /* cost of loading SSE registers
329 in SImode, DImode and TImode */
330 {4, 8, 16}, /* cost of storing SSE registers
331 in SImode, DImode and TImode */
332 3, /* MMX or SSE register to integer */
333 8, /* size of l1 cache. */
334 8, /* size of l2 cache */
335 0, /* size of prefetch block */
336 0, /* number of parallel prefetches */
338 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
339 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
340 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
341 COSTS_N_INSNS (1), /* cost of FABS instruction. */
342 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
343 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
344 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
345 DUMMY_STRINGOP_ALGS},
346 {{libcall, {{-1, rep_prefix_4_byte}}},
347 DUMMY_STRINGOP_ALGS},
348 1, /* scalar_stmt_cost. */
349 1, /* scalar load_cost. */
350 1, /* scalar_store_cost. */
351 1, /* vec_stmt_cost. */
352 1, /* vec_to_scalar_cost. */
353 1, /* scalar_to_vec_cost. */
354 1, /* vec_align_load_cost. */
355 2, /* vec_unalign_load_cost. */
356 1, /* vec_store_cost. */
357 3, /* cond_taken_branch_cost. */
358 1, /* cond_not_taken_branch_cost. */
362 struct processor_costs pentiumpro_cost = {
363 COSTS_N_INSNS (1), /* cost of an add instruction */
364 COSTS_N_INSNS (1), /* cost of a lea instruction */
365 COSTS_N_INSNS (1), /* variable shift costs */
366 COSTS_N_INSNS (1), /* constant shift costs */
367 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
368 COSTS_N_INSNS (4), /* HI */
369 COSTS_N_INSNS (4), /* SI */
370 COSTS_N_INSNS (4), /* DI */
371 COSTS_N_INSNS (4)}, /* other */
372 0, /* cost of multiply per each bit set */
373 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
374 COSTS_N_INSNS (17), /* HI */
375 COSTS_N_INSNS (17), /* SI */
376 COSTS_N_INSNS (17), /* DI */
377 COSTS_N_INSNS (17)}, /* other */
378 COSTS_N_INSNS (1), /* cost of movsx */
379 COSTS_N_INSNS (1), /* cost of movzx */
380 8, /* "large" insn */
382 2, /* cost for loading QImode using movzbl */
383 {4, 4, 4}, /* cost of loading integer registers
384 in QImode, HImode and SImode.
385 Relative to reg-reg move (2). */
386 {2, 2, 2}, /* cost of storing integer registers */
387 2, /* cost of reg,reg fld/fst */
388 {2, 2, 6}, /* cost of loading fp registers
389 in SFmode, DFmode and XFmode */
390 {4, 4, 6}, /* cost of storing fp registers
391 in SFmode, DFmode and XFmode */
392 2, /* cost of moving MMX register */
393 {2, 2}, /* cost of loading MMX registers
394 in SImode and DImode */
395 {2, 2}, /* cost of storing MMX registers
396 in SImode and DImode */
397 2, /* cost of moving SSE register */
398 {2, 2, 8}, /* cost of loading SSE registers
399 in SImode, DImode and TImode */
400 {2, 2, 8}, /* cost of storing SSE registers
401 in SImode, DImode and TImode */
402 3, /* MMX or SSE register to integer */
403 8, /* size of l1 cache. */
404 256, /* size of l2 cache */
405 32, /* size of prefetch block */
406 6, /* number of parallel prefetches */
408 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
409 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
410 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
411 COSTS_N_INSNS (2), /* cost of FABS instruction. */
412 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
413 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
414 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
415 the alignment). For small blocks inline loop is still a noticeable win, for bigger
416 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
417 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
420 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
421 DUMMY_STRINGOP_ALGS},
422 {{rep_prefix_4_byte, {{1024, unrolled_loop},
423 {8192, rep_prefix_4_byte}, {-1, libcall}}},
424 DUMMY_STRINGOP_ALGS},
425 1, /* scalar_stmt_cost. */
426 1, /* scalar load_cost. */
427 1, /* scalar_store_cost. */
428 1, /* vec_stmt_cost. */
429 1, /* vec_to_scalar_cost. */
430 1, /* scalar_to_vec_cost. */
431 1, /* vec_align_load_cost. */
432 2, /* vec_unalign_load_cost. */
433 1, /* vec_store_cost. */
434 3, /* cond_taken_branch_cost. */
435 1, /* cond_not_taken_branch_cost. */
439 struct processor_costs geode_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (1), /* cost of a lea instruction */
442 COSTS_N_INSNS (2), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (4), /* HI */
446 COSTS_N_INSNS (7), /* SI */
447 COSTS_N_INSNS (7), /* DI */
448 COSTS_N_INSNS (7)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (23), /* HI */
452 COSTS_N_INSNS (39), /* SI */
453 COSTS_N_INSNS (39), /* DI */
454 COSTS_N_INSNS (39)}, /* other */
455 COSTS_N_INSNS (1), /* cost of movsx */
456 COSTS_N_INSNS (1), /* cost of movzx */
457 8, /* "large" insn */
459 1, /* cost for loading QImode using movzbl */
460 {1, 1, 1}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {1, 1, 1}, /* cost of storing integer registers */
464 1, /* cost of reg,reg fld/fst */
465 {1, 1, 1}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 6, 6}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
470 1, /* cost of moving MMX register */
471 {1, 1}, /* cost of loading MMX registers
472 in SImode and DImode */
473 {1, 1}, /* cost of storing MMX registers
474 in SImode and DImode */
475 1, /* cost of moving SSE register */
476 {1, 1, 1}, /* cost of loading SSE registers
477 in SImode, DImode and TImode */
478 {1, 1, 1}, /* cost of storing SSE registers
479 in SImode, DImode and TImode */
480 1, /* MMX or SSE register to integer */
481 64, /* size of l1 cache. */
482 128, /* size of l2 cache. */
483 32, /* size of prefetch block */
484 1, /* number of parallel prefetches */
486 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
487 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
488 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
489 COSTS_N_INSNS (1), /* cost of FABS instruction. */
490 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
491 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
492 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
493 DUMMY_STRINGOP_ALGS},
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 1, /* scalar_stmt_cost. */
497 1, /* scalar load_cost. */
498 1, /* scalar_store_cost. */
499 1, /* vec_stmt_cost. */
500 1, /* vec_to_scalar_cost. */
501 1, /* scalar_to_vec_cost. */
502 1, /* vec_align_load_cost. */
503 2, /* vec_unalign_load_cost. */
504 1, /* vec_store_cost. */
505 3, /* cond_taken_branch_cost. */
506 1, /* cond_not_taken_branch_cost. */
510 struct processor_costs k6_cost = {
511 COSTS_N_INSNS (1), /* cost of an add instruction */
512 COSTS_N_INSNS (2), /* cost of a lea instruction */
513 COSTS_N_INSNS (1), /* variable shift costs */
514 COSTS_N_INSNS (1), /* constant shift costs */
515 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
516 COSTS_N_INSNS (3), /* HI */
517 COSTS_N_INSNS (3), /* SI */
518 COSTS_N_INSNS (3), /* DI */
519 COSTS_N_INSNS (3)}, /* other */
520 0, /* cost of multiply per each bit set */
521 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
522 COSTS_N_INSNS (18), /* HI */
523 COSTS_N_INSNS (18), /* SI */
524 COSTS_N_INSNS (18), /* DI */
525 COSTS_N_INSNS (18)}, /* other */
526 COSTS_N_INSNS (2), /* cost of movsx */
527 COSTS_N_INSNS (2), /* cost of movzx */
528 8, /* "large" insn */
530 3, /* cost for loading QImode using movzbl */
531 {4, 5, 4}, /* cost of loading integer registers
532 in QImode, HImode and SImode.
533 Relative to reg-reg move (2). */
534 {2, 3, 2}, /* cost of storing integer registers */
535 4, /* cost of reg,reg fld/fst */
536 {6, 6, 6}, /* cost of loading fp registers
537 in SFmode, DFmode and XFmode */
538 {4, 4, 4}, /* cost of storing fp registers
539 in SFmode, DFmode and XFmode */
540 2, /* cost of moving MMX register */
541 {2, 2}, /* cost of loading MMX registers
542 in SImode and DImode */
543 {2, 2}, /* cost of storing MMX registers
544 in SImode and DImode */
545 2, /* cost of moving SSE register */
546 {2, 2, 8}, /* cost of loading SSE registers
547 in SImode, DImode and TImode */
548 {2, 2, 8}, /* cost of storing SSE registers
549 in SImode, DImode and TImode */
550 6, /* MMX or SSE register to integer */
551 32, /* size of l1 cache. */
552 32, /* size of l2 cache. Some models
553 have integrated l2 cache, but
554 optimizing for k6 is not important
555 enough to worry about that. */
556 32, /* size of prefetch block */
557 1, /* number of parallel prefetches */
559 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
560 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
561 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
562 COSTS_N_INSNS (2), /* cost of FABS instruction. */
563 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
564 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
565 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
566 DUMMY_STRINGOP_ALGS},
567 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
568 DUMMY_STRINGOP_ALGS},
569 1, /* scalar_stmt_cost. */
570 1, /* scalar load_cost. */
571 1, /* scalar_store_cost. */
572 1, /* vec_stmt_cost. */
573 1, /* vec_to_scalar_cost. */
574 1, /* scalar_to_vec_cost. */
575 1, /* vec_align_load_cost. */
576 2, /* vec_unalign_load_cost. */
577 1, /* vec_store_cost. */
578 3, /* cond_taken_branch_cost. */
579 1, /* cond_not_taken_branch_cost. */
583 struct processor_costs athlon_cost = {
584 COSTS_N_INSNS (1), /* cost of an add instruction */
585 COSTS_N_INSNS (2), /* cost of a lea instruction */
586 COSTS_N_INSNS (1), /* variable shift costs */
587 COSTS_N_INSNS (1), /* constant shift costs */
588 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
589 COSTS_N_INSNS (5), /* HI */
590 COSTS_N_INSNS (5), /* SI */
591 COSTS_N_INSNS (5), /* DI */
592 COSTS_N_INSNS (5)}, /* other */
593 0, /* cost of multiply per each bit set */
594 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
595 COSTS_N_INSNS (26), /* HI */
596 COSTS_N_INSNS (42), /* SI */
597 COSTS_N_INSNS (74), /* DI */
598 COSTS_N_INSNS (74)}, /* other */
599 COSTS_N_INSNS (1), /* cost of movsx */
600 COSTS_N_INSNS (1), /* cost of movzx */
601 8, /* "large" insn */
603 4, /* cost for loading QImode using movzbl */
604 {3, 4, 3}, /* cost of loading integer registers
605 in QImode, HImode and SImode.
606 Relative to reg-reg move (2). */
607 {3, 4, 3}, /* cost of storing integer registers */
608 4, /* cost of reg,reg fld/fst */
609 {4, 4, 12}, /* cost of loading fp registers
610 in SFmode, DFmode and XFmode */
611 {6, 6, 8}, /* cost of storing fp registers
612 in SFmode, DFmode and XFmode */
613 2, /* cost of moving MMX register */
614 {4, 4}, /* cost of loading MMX registers
615 in SImode and DImode */
616 {4, 4}, /* cost of storing MMX registers
617 in SImode and DImode */
618 2, /* cost of moving SSE register */
619 {4, 4, 6}, /* cost of loading SSE registers
620 in SImode, DImode and TImode */
621 {4, 4, 5}, /* cost of storing SSE registers
622 in SImode, DImode and TImode */
623 5, /* MMX or SSE register to integer */
624 64, /* size of l1 cache. */
625 256, /* size of l2 cache. */
626 64, /* size of prefetch block */
627 6, /* number of parallel prefetches */
629 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
630 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
631 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
632 COSTS_N_INSNS (2), /* cost of FABS instruction. */
633 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
634 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
635 /* For some reason, Athlon deals better with REP prefix (relative to loops)
636 compared to K8. Alignment becomes important after 8 bytes for memcpy and
637 128 bytes for memset. */
638 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
639 DUMMY_STRINGOP_ALGS},
640 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
641 DUMMY_STRINGOP_ALGS},
642 1, /* scalar_stmt_cost. */
643 1, /* scalar load_cost. */
644 1, /* scalar_store_cost. */
645 1, /* vec_stmt_cost. */
646 1, /* vec_to_scalar_cost. */
647 1, /* scalar_to_vec_cost. */
648 1, /* vec_align_load_cost. */
649 2, /* vec_unalign_load_cost. */
650 1, /* vec_store_cost. */
651 3, /* cond_taken_branch_cost. */
652 1, /* cond_not_taken_branch_cost. */
656 struct processor_costs k8_cost = {
657 COSTS_N_INSNS (1), /* cost of an add instruction */
658 COSTS_N_INSNS (2), /* cost of a lea instruction */
659 COSTS_N_INSNS (1), /* variable shift costs */
660 COSTS_N_INSNS (1), /* constant shift costs */
661 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
662 COSTS_N_INSNS (4), /* HI */
663 COSTS_N_INSNS (3), /* SI */
664 COSTS_N_INSNS (4), /* DI */
665 COSTS_N_INSNS (5)}, /* other */
666 0, /* cost of multiply per each bit set */
667 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
668 COSTS_N_INSNS (26), /* HI */
669 COSTS_N_INSNS (42), /* SI */
670 COSTS_N_INSNS (74), /* DI */
671 COSTS_N_INSNS (74)}, /* other */
672 COSTS_N_INSNS (1), /* cost of movsx */
673 COSTS_N_INSNS (1), /* cost of movzx */
674 8, /* "large" insn */
676 4, /* cost for loading QImode using movzbl */
677 {3, 4, 3}, /* cost of loading integer registers
678 in QImode, HImode and SImode.
679 Relative to reg-reg move (2). */
680 {3, 4, 3}, /* cost of storing integer registers */
681 4, /* cost of reg,reg fld/fst */
682 {4, 4, 12}, /* cost of loading fp registers
683 in SFmode, DFmode and XFmode */
684 {6, 6, 8}, /* cost of storing fp registers
685 in SFmode, DFmode and XFmode */
686 2, /* cost of moving MMX register */
687 {3, 3}, /* cost of loading MMX registers
688 in SImode and DImode */
689 {4, 4}, /* cost of storing MMX registers
690 in SImode and DImode */
691 2, /* cost of moving SSE register */
692 {4, 3, 6}, /* cost of loading SSE registers
693 in SImode, DImode and TImode */
694 {4, 4, 5}, /* cost of storing SSE registers
695 in SImode, DImode and TImode */
696 5, /* MMX or SSE register to integer */
697 64, /* size of l1 cache. */
698 512, /* size of l2 cache. */
699 64, /* size of prefetch block */
700 /* New AMD processors never drop prefetches; if they cannot be performed
701 immediately, they are queued. We set number of simultaneous prefetches
702 to a large constant to reflect this (it probably is not a good idea not
703 to limit number of prefetches at all, as their execution also takes some
705 100, /* number of parallel prefetches */
707 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
708 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
709 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
710 COSTS_N_INSNS (2), /* cost of FABS instruction. */
711 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
712 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
713 /* K8 has optimized REP instruction for medium sized blocks, but for very small
714 blocks it is better to use loop. For large blocks, libcall can do
715 nontemporary accesses and beat inline considerably. */
716 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
718 {{libcall, {{8, loop}, {24, unrolled_loop},
719 {2048, rep_prefix_4_byte}, {-1, libcall}}},
720 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
721 4, /* scalar_stmt_cost. */
722 2, /* scalar load_cost. */
723 2, /* scalar_store_cost. */
724 5, /* vec_stmt_cost. */
725 0, /* vec_to_scalar_cost. */
726 2, /* scalar_to_vec_cost. */
727 2, /* vec_align_load_cost. */
728 3, /* vec_unalign_load_cost. */
729 3, /* vec_store_cost. */
730 3, /* cond_taken_branch_cost. */
731 2, /* cond_not_taken_branch_cost. */
734 struct processor_costs amdfam10_cost = {
735 COSTS_N_INSNS (1), /* cost of an add instruction */
736 COSTS_N_INSNS (2), /* cost of a lea instruction */
737 COSTS_N_INSNS (1), /* variable shift costs */
738 COSTS_N_INSNS (1), /* constant shift costs */
739 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
740 COSTS_N_INSNS (4), /* HI */
741 COSTS_N_INSNS (3), /* SI */
742 COSTS_N_INSNS (4), /* DI */
743 COSTS_N_INSNS (5)}, /* other */
744 0, /* cost of multiply per each bit set */
745 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
746 COSTS_N_INSNS (35), /* HI */
747 COSTS_N_INSNS (51), /* SI */
748 COSTS_N_INSNS (83), /* DI */
749 COSTS_N_INSNS (83)}, /* other */
750 COSTS_N_INSNS (1), /* cost of movsx */
751 COSTS_N_INSNS (1), /* cost of movzx */
752 8, /* "large" insn */
754 4, /* cost for loading QImode using movzbl */
755 {3, 4, 3}, /* cost of loading integer registers
756 in QImode, HImode and SImode.
757 Relative to reg-reg move (2). */
758 {3, 4, 3}, /* cost of storing integer registers */
759 4, /* cost of reg,reg fld/fst */
760 {4, 4, 12}, /* cost of loading fp registers
761 in SFmode, DFmode and XFmode */
762 {6, 6, 8}, /* cost of storing fp registers
763 in SFmode, DFmode and XFmode */
764 2, /* cost of moving MMX register */
765 {3, 3}, /* cost of loading MMX registers
766 in SImode and DImode */
767 {4, 4}, /* cost of storing MMX registers
768 in SImode and DImode */
769 2, /* cost of moving SSE register */
770 {4, 4, 3}, /* cost of loading SSE registers
771 in SImode, DImode and TImode */
772 {4, 4, 5}, /* cost of storing SSE registers
773 in SImode, DImode and TImode */
774 3, /* MMX or SSE register to integer */
776 MOVD reg64, xmmreg Double FSTORE 4
777 MOVD reg32, xmmreg Double FSTORE 4
779 MOVD reg64, xmmreg Double FADD 3
781 MOVD reg32, xmmreg Double FADD 3
783 64, /* size of l1 cache. */
784 512, /* size of l2 cache. */
785 64, /* size of prefetch block */
786 /* New AMD processors never drop prefetches; if they cannot be performed
787 immediately, they are queued. We set number of simultaneous prefetches
788 to a large constant to reflect this (it probably is not a good idea not
789 to limit number of prefetches at all, as their execution also takes some
791 100, /* number of parallel prefetches */
793 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
794 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
795 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
796 COSTS_N_INSNS (2), /* cost of FABS instruction. */
797 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
798 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
801 very small blocks it is better to use loop. For large blocks, libcall can
802 do nontemporary accesses and beat inline considerably. */
803 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
804 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
805 {{libcall, {{8, loop}, {24, unrolled_loop},
806 {2048, rep_prefix_4_byte}, {-1, libcall}}},
807 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
808 4, /* scalar_stmt_cost. */
809 2, /* scalar load_cost. */
810 2, /* scalar_store_cost. */
811 6, /* vec_stmt_cost. */
812 0, /* vec_to_scalar_cost. */
813 2, /* scalar_to_vec_cost. */
814 2, /* vec_align_load_cost. */
815 2, /* vec_unalign_load_cost. */
816 2, /* vec_store_cost. */
817 2, /* cond_taken_branch_cost. */
818 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs pentium4_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (3), /* cost of a lea instruction */
825 COSTS_N_INSNS (4), /* variable shift costs */
826 COSTS_N_INSNS (4), /* constant shift costs */
827 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (15), /* HI */
829 COSTS_N_INSNS (15), /* SI */
830 COSTS_N_INSNS (15), /* DI */
831 COSTS_N_INSNS (15)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (56), /* HI */
835 COSTS_N_INSNS (56), /* SI */
836 COSTS_N_INSNS (56), /* DI */
837 COSTS_N_INSNS (56)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 16, /* "large" insn */
842 2, /* cost for loading QImode using movzbl */
843 {4, 5, 4}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {2, 3, 2}, /* cost of storing integer registers */
847 2, /* cost of reg,reg fld/fst */
848 {2, 2, 6}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {4, 4, 6}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {2, 2}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {2, 2}, /* cost of storing MMX registers
856 in SImode and DImode */
857 12, /* cost of moving SSE register */
858 {12, 12, 12}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {2, 2, 8}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 10, /* MMX or SSE register to integer */
863 8, /* size of l1 cache. */
864 256, /* size of l2 cache. */
865 64, /* size of prefetch block */
866 6, /* number of parallel prefetches */
868 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
869 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
870 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
871 COSTS_N_INSNS (2), /* cost of FABS instruction. */
872 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
873 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
874 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
875 DUMMY_STRINGOP_ALGS},
876 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 DUMMY_STRINGOP_ALGS},
879 1, /* scalar_stmt_cost. */
880 1, /* scalar load_cost. */
881 1, /* scalar_store_cost. */
882 1, /* vec_stmt_cost. */
883 1, /* vec_to_scalar_cost. */
884 1, /* scalar_to_vec_cost. */
885 1, /* vec_align_load_cost. */
886 2, /* vec_unalign_load_cost. */
887 1, /* vec_store_cost. */
888 3, /* cond_taken_branch_cost. */
889 1, /* cond_not_taken_branch_cost. */
893 struct processor_costs nocona_cost = {
894 COSTS_N_INSNS (1), /* cost of an add instruction */
895 COSTS_N_INSNS (1), /* cost of a lea instruction */
896 COSTS_N_INSNS (1), /* variable shift costs */
897 COSTS_N_INSNS (1), /* constant shift costs */
898 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
899 COSTS_N_INSNS (10), /* HI */
900 COSTS_N_INSNS (10), /* SI */
901 COSTS_N_INSNS (10), /* DI */
902 COSTS_N_INSNS (10)}, /* other */
903 0, /* cost of multiply per each bit set */
904 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
905 COSTS_N_INSNS (66), /* HI */
906 COSTS_N_INSNS (66), /* SI */
907 COSTS_N_INSNS (66), /* DI */
908 COSTS_N_INSNS (66)}, /* other */
909 COSTS_N_INSNS (1), /* cost of movsx */
910 COSTS_N_INSNS (1), /* cost of movzx */
911 16, /* "large" insn */
913 4, /* cost for loading QImode using movzbl */
914 {4, 4, 4}, /* cost of loading integer registers
915 in QImode, HImode and SImode.
916 Relative to reg-reg move (2). */
917 {4, 4, 4}, /* cost of storing integer registers */
918 3, /* cost of reg,reg fld/fst */
919 {12, 12, 12}, /* cost of loading fp registers
920 in SFmode, DFmode and XFmode */
921 {4, 4, 4}, /* cost of storing fp registers
922 in SFmode, DFmode and XFmode */
923 6, /* cost of moving MMX register */
924 {12, 12}, /* cost of loading MMX registers
925 in SImode and DImode */
926 {12, 12}, /* cost of storing MMX registers
927 in SImode and DImode */
928 6, /* cost of moving SSE register */
929 {12, 12, 12}, /* cost of loading SSE registers
930 in SImode, DImode and TImode */
931 {12, 12, 12}, /* cost of storing SSE registers
932 in SImode, DImode and TImode */
933 8, /* MMX or SSE register to integer */
934 8, /* size of l1 cache. */
935 1024, /* size of l2 cache. */
936 128, /* size of prefetch block */
937 8, /* number of parallel prefetches */
939 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
940 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
941 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
942 COSTS_N_INSNS (3), /* cost of FABS instruction. */
943 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
944 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
945 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
946 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
947 {100000, unrolled_loop}, {-1, libcall}}}},
948 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {libcall, {{24, loop}, {64, unrolled_loop},
951 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
952 1, /* scalar_stmt_cost. */
953 1, /* scalar load_cost. */
954 1, /* scalar_store_cost. */
955 1, /* vec_stmt_cost. */
956 1, /* vec_to_scalar_cost. */
957 1, /* scalar_to_vec_cost. */
958 1, /* vec_align_load_cost. */
959 2, /* vec_unalign_load_cost. */
960 1, /* vec_store_cost. */
961 3, /* cond_taken_branch_cost. */
962 1, /* cond_not_taken_branch_cost. */
966 struct processor_costs core2_cost = {
967 COSTS_N_INSNS (1), /* cost of an add instruction */
968 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
969 COSTS_N_INSNS (1), /* variable shift costs */
970 COSTS_N_INSNS (1), /* constant shift costs */
971 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
972 COSTS_N_INSNS (3), /* HI */
973 COSTS_N_INSNS (3), /* SI */
974 COSTS_N_INSNS (3), /* DI */
975 COSTS_N_INSNS (3)}, /* other */
976 0, /* cost of multiply per each bit set */
977 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
978 COSTS_N_INSNS (22), /* HI */
979 COSTS_N_INSNS (22), /* SI */
980 COSTS_N_INSNS (22), /* DI */
981 COSTS_N_INSNS (22)}, /* other */
982 COSTS_N_INSNS (1), /* cost of movsx */
983 COSTS_N_INSNS (1), /* cost of movzx */
984 8, /* "large" insn */
986 2, /* cost for loading QImode using movzbl */
987 {6, 6, 6}, /* cost of loading integer registers
988 in QImode, HImode and SImode.
989 Relative to reg-reg move (2). */
990 {4, 4, 4}, /* cost of storing integer registers */
991 2, /* cost of reg,reg fld/fst */
992 {6, 6, 6}, /* cost of loading fp registers
993 in SFmode, DFmode and XFmode */
994 {4, 4, 4}, /* cost of storing fp registers
995 in SFmode, DFmode and XFmode */
996 2, /* cost of moving MMX register */
997 {6, 6}, /* cost of loading MMX registers
998 in SImode and DImode */
999 {4, 4}, /* cost of storing MMX registers
1000 in SImode and DImode */
1001 2, /* cost of moving SSE register */
1002 {6, 6, 6}, /* cost of loading SSE registers
1003 in SImode, DImode and TImode */
1004 {4, 4, 4}, /* cost of storing SSE registers
1005 in SImode, DImode and TImode */
1006 2, /* MMX or SSE register to integer */
1007 32, /* size of l1 cache. */
1008 2048, /* size of l2 cache. */
1009 128, /* size of prefetch block */
1010 8, /* number of parallel prefetches */
1011 3, /* Branch cost */
1012 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1013 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1014 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1015 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1016 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1017 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1018 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1019 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1020 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1021 {{libcall, {{8, loop}, {15, unrolled_loop},
1022 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1023 {libcall, {{24, loop}, {32, unrolled_loop},
1024 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1025 1, /* scalar_stmt_cost. */
1026 1, /* scalar load_cost. */
1027 1, /* scalar_store_cost. */
1028 1, /* vec_stmt_cost. */
1029 1, /* vec_to_scalar_cost. */
1030 1, /* scalar_to_vec_cost. */
1031 1, /* vec_align_load_cost. */
1032 2, /* vec_unalign_load_cost. */
1033 1, /* vec_store_cost. */
1034 3, /* cond_taken_branch_cost. */
1035 1, /* cond_not_taken_branch_cost. */
1039 struct processor_costs atom_cost = {
1040 COSTS_N_INSNS (1), /* cost of an add instruction */
1041 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1042 COSTS_N_INSNS (1), /* variable shift costs */
1043 COSTS_N_INSNS (1), /* constant shift costs */
1044 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1045 COSTS_N_INSNS (4), /* HI */
1046 COSTS_N_INSNS (3), /* SI */
1047 COSTS_N_INSNS (4), /* DI */
1048 COSTS_N_INSNS (2)}, /* other */
1049 0, /* cost of multiply per each bit set */
1050 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1051 COSTS_N_INSNS (26), /* HI */
1052 COSTS_N_INSNS (42), /* SI */
1053 COSTS_N_INSNS (74), /* DI */
1054 COSTS_N_INSNS (74)}, /* other */
1055 COSTS_N_INSNS (1), /* cost of movsx */
1056 COSTS_N_INSNS (1), /* cost of movzx */
1057 8, /* "large" insn */
1058 17, /* MOVE_RATIO */
1059 2, /* cost for loading QImode using movzbl */
1060 {4, 4, 4}, /* cost of loading integer registers
1061 in QImode, HImode and SImode.
1062 Relative to reg-reg move (2). */
1063 {4, 4, 4}, /* cost of storing integer registers */
1064 4, /* cost of reg,reg fld/fst */
1065 {12, 12, 12}, /* cost of loading fp registers
1066 in SFmode, DFmode and XFmode */
1067 {6, 6, 8}, /* cost of storing fp registers
1068 in SFmode, DFmode and XFmode */
1069 2, /* cost of moving MMX register */
1070 {8, 8}, /* cost of loading MMX registers
1071 in SImode and DImode */
1072 {8, 8}, /* cost of storing MMX registers
1073 in SImode and DImode */
1074 2, /* cost of moving SSE register */
1075 {8, 8, 8}, /* cost of loading SSE registers
1076 in SImode, DImode and TImode */
1077 {8, 8, 8}, /* cost of storing SSE registers
1078 in SImode, DImode and TImode */
1079 5, /* MMX or SSE register to integer */
1080 32, /* size of l1 cache. */
1081 256, /* size of l2 cache. */
1082 64, /* size of prefetch block */
1083 6, /* number of parallel prefetches */
1084 3, /* Branch cost */
1085 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1086 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1087 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1088 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1089 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1090 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1091 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1092 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1093 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1094 {{libcall, {{8, loop}, {15, unrolled_loop},
1095 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1096 {libcall, {{24, loop}, {32, unrolled_loop},
1097 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1098 1, /* scalar_stmt_cost. */
1099 1, /* scalar load_cost. */
1100 1, /* scalar_store_cost. */
1101 1, /* vec_stmt_cost. */
1102 1, /* vec_to_scalar_cost. */
1103 1, /* scalar_to_vec_cost. */
1104 1, /* vec_align_load_cost. */
1105 2, /* vec_unalign_load_cost. */
1106 1, /* vec_store_cost. */
1107 3, /* cond_taken_branch_cost. */
1108 1, /* cond_not_taken_branch_cost. */
1111 /* Generic64 should produce code tuned for Nocona and K8. */
1113 struct processor_costs generic64_cost = {
1114 COSTS_N_INSNS (1), /* cost of an add instruction */
1115 /* On all chips taken into consideration lea is 2 cycles and more. With
1116 this cost however our current implementation of synth_mult results in
1117 use of unnecessary temporary registers causing regression on several
1118 SPECfp benchmarks. */
1119 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1120 COSTS_N_INSNS (1), /* variable shift costs */
1121 COSTS_N_INSNS (1), /* constant shift costs */
1122 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1123 COSTS_N_INSNS (4), /* HI */
1124 COSTS_N_INSNS (3), /* SI */
1125 COSTS_N_INSNS (4), /* DI */
1126 COSTS_N_INSNS (2)}, /* other */
1127 0, /* cost of multiply per each bit set */
1128 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1129 COSTS_N_INSNS (26), /* HI */
1130 COSTS_N_INSNS (42), /* SI */
1131 COSTS_N_INSNS (74), /* DI */
1132 COSTS_N_INSNS (74)}, /* other */
1133 COSTS_N_INSNS (1), /* cost of movsx */
1134 COSTS_N_INSNS (1), /* cost of movzx */
1135 8, /* "large" insn */
1136 17, /* MOVE_RATIO */
1137 4, /* cost for loading QImode using movzbl */
1138 {4, 4, 4}, /* cost of loading integer registers
1139 in QImode, HImode and SImode.
1140 Relative to reg-reg move (2). */
1141 {4, 4, 4}, /* cost of storing integer registers */
1142 4, /* cost of reg,reg fld/fst */
1143 {12, 12, 12}, /* cost of loading fp registers
1144 in SFmode, DFmode and XFmode */
1145 {6, 6, 8}, /* cost of storing fp registers
1146 in SFmode, DFmode and XFmode */
1147 2, /* cost of moving MMX register */
1148 {8, 8}, /* cost of loading MMX registers
1149 in SImode and DImode */
1150 {8, 8}, /* cost of storing MMX registers
1151 in SImode and DImode */
1152 2, /* cost of moving SSE register */
1153 {8, 8, 8}, /* cost of loading SSE registers
1154 in SImode, DImode and TImode */
1155 {8, 8, 8}, /* cost of storing SSE registers
1156 in SImode, DImode and TImode */
1157 5, /* MMX or SSE register to integer */
1158 32, /* size of l1 cache. */
1159 512, /* size of l2 cache. */
1160 64, /* size of prefetch block */
1161 6, /* number of parallel prefetches */
1162 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1163 is increased to perhaps more appropriate value of 5. */
1164 3, /* Branch cost */
1165 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1166 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1167 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1168 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1169 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1170 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1171 {DUMMY_STRINGOP_ALGS,
1172 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1173 {DUMMY_STRINGOP_ALGS,
1174 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1175 1, /* scalar_stmt_cost. */
1176 1, /* scalar load_cost. */
1177 1, /* scalar_store_cost. */
1178 1, /* vec_stmt_cost. */
1179 1, /* vec_to_scalar_cost. */
1180 1, /* scalar_to_vec_cost. */
1181 1, /* vec_align_load_cost. */
1182 2, /* vec_unalign_load_cost. */
1183 1, /* vec_store_cost. */
1184 3, /* cond_taken_branch_cost. */
1185 1, /* cond_not_taken_branch_cost. */
1188 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1190 struct processor_costs generic32_cost = {
1191 COSTS_N_INSNS (1), /* cost of an add instruction */
1192 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1193 COSTS_N_INSNS (1), /* variable shift costs */
1194 COSTS_N_INSNS (1), /* constant shift costs */
1195 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1196 COSTS_N_INSNS (4), /* HI */
1197 COSTS_N_INSNS (3), /* SI */
1198 COSTS_N_INSNS (4), /* DI */
1199 COSTS_N_INSNS (2)}, /* other */
1200 0, /* cost of multiply per each bit set */
1201 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1202 COSTS_N_INSNS (26), /* HI */
1203 COSTS_N_INSNS (42), /* SI */
1204 COSTS_N_INSNS (74), /* DI */
1205 COSTS_N_INSNS (74)}, /* other */
1206 COSTS_N_INSNS (1), /* cost of movsx */
1207 COSTS_N_INSNS (1), /* cost of movzx */
1208 8, /* "large" insn */
1209 17, /* MOVE_RATIO */
1210 4, /* cost for loading QImode using movzbl */
1211 {4, 4, 4}, /* cost of loading integer registers
1212 in QImode, HImode and SImode.
1213 Relative to reg-reg move (2). */
1214 {4, 4, 4}, /* cost of storing integer registers */
1215 4, /* cost of reg,reg fld/fst */
1216 {12, 12, 12}, /* cost of loading fp registers
1217 in SFmode, DFmode and XFmode */
1218 {6, 6, 8}, /* cost of storing fp registers
1219 in SFmode, DFmode and XFmode */
1220 2, /* cost of moving MMX register */
1221 {8, 8}, /* cost of loading MMX registers
1222 in SImode and DImode */
1223 {8, 8}, /* cost of storing MMX registers
1224 in SImode and DImode */
1225 2, /* cost of moving SSE register */
1226 {8, 8, 8}, /* cost of loading SSE registers
1227 in SImode, DImode and TImode */
1228 {8, 8, 8}, /* cost of storing SSE registers
1229 in SImode, DImode and TImode */
1230 5, /* MMX or SSE register to integer */
1231 32, /* size of l1 cache. */
1232 256, /* size of l2 cache. */
1233 64, /* size of prefetch block */
1234 6, /* number of parallel prefetches */
1235 3, /* Branch cost */
1236 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1237 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1238 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1239 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1240 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1241 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1242 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1243 DUMMY_STRINGOP_ALGS},
1244 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1245 DUMMY_STRINGOP_ALGS},
1246 1, /* scalar_stmt_cost. */
1247 1, /* scalar load_cost. */
1248 1, /* scalar_store_cost. */
1249 1, /* vec_stmt_cost. */
1250 1, /* vec_to_scalar_cost. */
1251 1, /* scalar_to_vec_cost. */
1252 1, /* vec_align_load_cost. */
1253 2, /* vec_unalign_load_cost. */
1254 1, /* vec_store_cost. */
1255 3, /* cond_taken_branch_cost. */
1256 1, /* cond_not_taken_branch_cost. */
1259 const struct processor_costs *ix86_cost = &pentium_cost;
1261 /* Processor feature/optimization bitmasks. */
1262 #define m_386 (1<<PROCESSOR_I386)
1263 #define m_486 (1<<PROCESSOR_I486)
1264 #define m_PENT (1<<PROCESSOR_PENTIUM)
1265 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1266 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1267 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1268 #define m_CORE2 (1<<PROCESSOR_CORE2)
1269 #define m_ATOM (1<<PROCESSOR_ATOM)
1271 #define m_GEODE (1<<PROCESSOR_GEODE)
1272 #define m_K6 (1<<PROCESSOR_K6)
1273 #define m_K6_GEODE (m_K6 | m_GEODE)
1274 #define m_K8 (1<<PROCESSOR_K8)
1275 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1276 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1277 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1278 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1280 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1281 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1283 /* Generic instruction choice should be common subset of supported CPUs
1284 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1285 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1287 /* Feature tests against the various tunings. */
1288 unsigned char ix86_tune_features[X86_TUNE_LAST];
1290 /* Feature tests against the various tunings used to create ix86_tune_features
1291 based on the processor mask. */
1292 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1293 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1294 negatively, so enabling for Generic64 seems like good code size
1295 tradeoff. We can't enable it for 32bit generic because it does not
1296 work well with PPro base chips. */
1297 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1299 /* X86_TUNE_PUSH_MEMORY */
1300 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1301 | m_NOCONA | m_CORE2 | m_GENERIC,
1303 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1306 /* X86_TUNE_UNROLL_STRLEN */
1307 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1308 | m_CORE2 | m_GENERIC,
1310 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1311 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1313 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1314 on simulation result. But after P4 was made, no performance benefit
1315 was observed with branch hints. It also increases the code size.
1316 As a result, icc never generates branch hints. */
1319 /* X86_TUNE_DOUBLE_WITH_ADD */
1322 /* X86_TUNE_USE_SAHF */
1323 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1324 | m_NOCONA | m_CORE2 | m_GENERIC,
1326 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1327 partial dependencies. */
1328 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1329 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1331 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1332 register stalls on Generic32 compilation setting as well. However
1333 in current implementation the partial register stalls are not eliminated
1334 very well - they can be introduced via subregs synthesized by combine
1335 and can happen in caller/callee saving sequences. Because this option
1336 pays back little on PPro based chips and is in conflict with partial reg
1337 dependencies used by Athlon/P4 based chips, it is better to leave it off
1338 for generic32 for now. */
1341 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1342 m_CORE2 | m_GENERIC,
1344 /* X86_TUNE_USE_HIMODE_FIOP */
1345 m_386 | m_486 | m_K6_GEODE,
1347 /* X86_TUNE_USE_SIMODE_FIOP */
1348 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1350 /* X86_TUNE_USE_MOV0 */
1353 /* X86_TUNE_USE_CLTD */
1354 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1356 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1359 /* X86_TUNE_SPLIT_LONG_MOVES */
1362 /* X86_TUNE_READ_MODIFY_WRITE */
1365 /* X86_TUNE_READ_MODIFY */
1368 /* X86_TUNE_PROMOTE_QIMODE */
1369 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1370 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1372 /* X86_TUNE_FAST_PREFIX */
1373 ~(m_PENT | m_486 | m_386),
1375 /* X86_TUNE_SINGLE_STRINGOP */
1376 m_386 | m_PENT4 | m_NOCONA,
1378 /* X86_TUNE_QIMODE_MATH */
1381 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1382 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1383 might be considered for Generic32 if our scheme for avoiding partial
1384 stalls was more effective. */
1387 /* X86_TUNE_PROMOTE_QI_REGS */
1390 /* X86_TUNE_PROMOTE_HI_REGS */
1393 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1394 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1395 | m_CORE2 | m_GENERIC,
1397 /* X86_TUNE_ADD_ESP_8 */
1398 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1399 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1401 /* X86_TUNE_SUB_ESP_4 */
1402 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1405 /* X86_TUNE_SUB_ESP_8 */
1406 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1407 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1409 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1410 for DFmode copies */
1411 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1412 | m_GENERIC | m_GEODE),
1414 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1415 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1417 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1418 conflict here in between PPro/Pentium4 based chips that thread 128bit
1419 SSE registers as single units versus K8 based chips that divide SSE
1420 registers to two 64bit halves. This knob promotes all store destinations
1421 to be 128bit to allow register renaming on 128bit SSE units, but usually
1422 results in one extra microop on 64bit SSE units. Experimental results
1423 shows that disabling this option on P4 brings over 20% SPECfp regression,
1424 while enabling it on K8 brings roughly 2.4% regression that can be partly
1425 masked by careful scheduling of moves. */
1426 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1429 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1432 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1433 are resolved on SSE register parts instead of whole registers, so we may
1434 maintain just lower part of scalar values in proper format leaving the
1435 upper part undefined. */
1438 /* X86_TUNE_SSE_TYPELESS_STORES */
1441 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1442 m_PPRO | m_PENT4 | m_NOCONA,
1444 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1445 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1447 /* X86_TUNE_PROLOGUE_USING_MOVE */
1448 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1450 /* X86_TUNE_EPILOGUE_USING_MOVE */
1451 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1453 /* X86_TUNE_SHIFT1 */
1456 /* X86_TUNE_USE_FFREEP */
1459 /* X86_TUNE_INTER_UNIT_MOVES */
1460 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
1462 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1465 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1466 than 4 branch instructions in the 16 byte window. */
1467 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1470 /* X86_TUNE_SCHEDULE */
1471 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1474 /* X86_TUNE_USE_BT */
1475 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1477 /* X86_TUNE_USE_INCDEC */
1478 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1480 /* X86_TUNE_PAD_RETURNS */
1481 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1483 /* X86_TUNE_EXT_80387_CONSTANTS */
1484 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_SHORTEN_X87_SSE */
1490 /* X86_TUNE_AVOID_VECTOR_DECODE */
1493 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1494 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1497 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1498 vector path on AMD machines. */
1499 m_K8 | m_GENERIC64 | m_AMDFAM10,
1501 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1503 m_K8 | m_GENERIC64 | m_AMDFAM10,
1505 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1509 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1510 but one byte longer. */
1513 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1514 operand that cannot be represented using a modRM byte. The XOR
1515 replacement is long decoded, so this split helps here as well. */
1518 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1520 m_AMDFAM10 | m_GENERIC,
1522 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1523 from integer to FP. */
1526 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1527 with a subsequent conditional jump instruction into a single
1528 compare-and-branch uop. */
1531 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1532 will impact LEA instruction selection. */
1536 /* Feature tests against the various architecture variations. */
1537 unsigned char ix86_arch_features[X86_ARCH_LAST];
1539 /* Feature tests against the various architecture variations, used to create
1540 ix86_arch_features based on the processor mask. */
1541 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1542 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1543 ~(m_386 | m_486 | m_PENT | m_K6),
1545 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1548 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1551 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1554 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1558 static const unsigned int x86_accumulate_outgoing_args
1559 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1562 static const unsigned int x86_arch_always_fancy_math_387
1563 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1564 | m_NOCONA | m_CORE2 | m_GENERIC;
1566 static enum stringop_alg stringop_alg = no_stringop;
1568 /* In case the average insn count for single function invocation is
1569 lower than this constant, emit fast (but longer) prologue and
1571 #define FAST_PROLOGUE_INSN_COUNT 20
1573 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1574 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1575 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1576 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1578 /* Array of the smallest class containing reg number REGNO, indexed by
1579 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1581 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1583 /* ax, dx, cx, bx */
1584 AREG, DREG, CREG, BREG,
1585 /* si, di, bp, sp */
1586 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1588 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1589 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1592 /* flags, fpsr, fpcr, frame */
1593 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1595 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1598 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1601 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 /* SSE REX registers */
1604 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1608 /* The "default" register map used in 32bit mode. */
1610 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1612 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1613 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1614 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1615 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1616 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1617 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1621 /* The "default" register map used in 64bit mode. */
1623 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1625 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1626 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1627 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1628 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1629 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1630 8,9,10,11,12,13,14,15, /* extended integer registers */
1631 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1634 /* Define the register numbers to be used in Dwarf debugging information.
1635 The SVR4 reference port C compiler uses the following register numbers
1636 in its Dwarf output code:
1637 0 for %eax (gcc regno = 0)
1638 1 for %ecx (gcc regno = 2)
1639 2 for %edx (gcc regno = 1)
1640 3 for %ebx (gcc regno = 3)
1641 4 for %esp (gcc regno = 7)
1642 5 for %ebp (gcc regno = 6)
1643 6 for %esi (gcc regno = 4)
1644 7 for %edi (gcc regno = 5)
1645 The following three DWARF register numbers are never generated by
1646 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1647 believes these numbers have these meanings.
1648 8 for %eip (no gcc equivalent)
1649 9 for %eflags (gcc regno = 17)
1650 10 for %trapno (no gcc equivalent)
1651 It is not at all clear how we should number the FP stack registers
1652 for the x86 architecture. If the version of SDB on x86/svr4 were
1653 a bit less brain dead with respect to floating-point then we would
1654 have a precedent to follow with respect to DWARF register numbers
1655 for x86 FP registers, but the SDB on x86/svr4 is so completely
1656 broken with respect to FP registers that it is hardly worth thinking
1657 of it as something to strive for compatibility with.
1658 The version of x86/svr4 SDB I have at the moment does (partially)
1659 seem to believe that DWARF register number 11 is associated with
1660 the x86 register %st(0), but that's about all. Higher DWARF
1661 register numbers don't seem to be associated with anything in
1662 particular, and even for DWARF regno 11, SDB only seems to under-
1663 stand that it should say that a variable lives in %st(0) (when
1664 asked via an `=' command) if we said it was in DWARF regno 11,
1665 but SDB still prints garbage when asked for the value of the
1666 variable in question (via a `/' command).
1667 (Also note that the labels SDB prints for various FP stack regs
1668 when doing an `x' command are all wrong.)
1669 Note that these problems generally don't affect the native SVR4
1670 C compiler because it doesn't allow the use of -O with -g and
1671 because when it is *not* optimizing, it allocates a memory
1672 location for each floating-point variable, and the memory
1673 location is what gets described in the DWARF AT_location
1674 attribute for the variable in question.
1675 Regardless of the severe mental illness of the x86/svr4 SDB, we
1676 do something sensible here and we use the following DWARF
1677 register numbers. Note that these are all stack-top-relative
1679 11 for %st(0) (gcc regno = 8)
1680 12 for %st(1) (gcc regno = 9)
1681 13 for %st(2) (gcc regno = 10)
1682 14 for %st(3) (gcc regno = 11)
1683 15 for %st(4) (gcc regno = 12)
1684 16 for %st(5) (gcc regno = 13)
1685 17 for %st(6) (gcc regno = 14)
1686 18 for %st(7) (gcc regno = 15)
1688 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1690 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1691 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1692 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1693 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1694 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1695 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1699 /* Test and compare insns in i386.md store the information needed to
1700 generate branch and scc insns here. */
1702 rtx ix86_compare_op0 = NULL_RTX;
1703 rtx ix86_compare_op1 = NULL_RTX;
1705 /* Define parameter passing and return registers. */
1707 static int const x86_64_int_parameter_registers[6] =
1709 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1712 static int const x86_64_ms_abi_int_parameter_registers[4] =
1714 CX_REG, DX_REG, R8_REG, R9_REG
1717 static int const x86_64_int_return_registers[4] =
1719 AX_REG, DX_REG, DI_REG, SI_REG
1722 /* Define the structure for the machine field in struct function. */
1724 struct GTY(()) stack_local_entry {
1725 unsigned short mode;
1728 struct stack_local_entry *next;
1731 /* Structure describing stack frame layout.
1732 Stack grows downward:
1738 saved frame pointer if frame_pointer_needed
1739 <- HARD_FRAME_POINTER
1748 [va_arg registers] (
1749 > to_allocate <- FRAME_POINTER
1761 HOST_WIDE_INT frame;
1763 int outgoing_arguments_size;
1766 HOST_WIDE_INT to_allocate;
1767 /* The offsets relative to ARG_POINTER. */
1768 HOST_WIDE_INT frame_pointer_offset;
1769 HOST_WIDE_INT hard_frame_pointer_offset;
1770 HOST_WIDE_INT stack_pointer_offset;
1772 /* When save_regs_using_mov is set, emit prologue using
1773 move instead of push instructions. */
1774 bool save_regs_using_mov;
1777 /* Code model option. */
1778 enum cmodel ix86_cmodel;
1780 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1782 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1784 /* Which unit we are generating floating point math for. */
1785 enum fpmath_unit ix86_fpmath;
1787 /* Which cpu are we scheduling for. */
1788 enum attr_cpu ix86_schedule;
1790 /* Which cpu are we optimizing for. */
1791 enum processor_type ix86_tune;
1793 /* Which instruction set architecture to use. */
1794 enum processor_type ix86_arch;
1796 /* true if sse prefetch instruction is not NOOP. */
1797 int x86_prefetch_sse;
1799 /* ix86_regparm_string as a number */
1800 static int ix86_regparm;
1802 /* -mstackrealign option */
1803 extern int ix86_force_align_arg_pointer;
1804 static const char ix86_force_align_arg_pointer_string[]
1805 = "force_align_arg_pointer";
1807 static rtx (*ix86_gen_leave) (void);
1808 static rtx (*ix86_gen_pop1) (rtx);
1809 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1810 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1812 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1813 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1814 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1816 /* Preferred alignment for stack boundary in bits. */
1817 unsigned int ix86_preferred_stack_boundary;
1819 /* Alignment for incoming stack boundary in bits specified at
1821 static unsigned int ix86_user_incoming_stack_boundary;
1823 /* Default alignment for incoming stack boundary in bits. */
1824 static unsigned int ix86_default_incoming_stack_boundary;
1826 /* Alignment for incoming stack boundary in bits. */
1827 unsigned int ix86_incoming_stack_boundary;
1829 /* The abi used by target. */
1830 enum calling_abi ix86_abi;
1832 /* Values 1-5: see jump.c */
1833 int ix86_branch_cost;
1835 /* Calling abi specific va_list type nodes. */
1836 static GTY(()) tree sysv_va_list_type_node;
1837 static GTY(()) tree ms_va_list_type_node;
1839 /* Variables which are this size or smaller are put in the data/bss
1840 or ldata/lbss sections. */
1842 int ix86_section_threshold = 65536;
1844 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1845 char internal_label_prefix[16];
1846 int internal_label_prefix_len;
1848 /* Fence to use after loop using movnt. */
1851 /* Register class used for passing given 64bit part of the argument.
1852 These represent classes as documented by the PS ABI, with the exception
1853 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1854 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1856 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1857 whenever possible (upper half does contain padding). */
1858 enum x86_64_reg_class
1861 X86_64_INTEGER_CLASS,
1862 X86_64_INTEGERSI_CLASS,
1869 X86_64_COMPLEX_X87_CLASS,
1873 #define MAX_CLASSES 4
1875 /* Table of constants used by fldpi, fldln2, etc.... */
1876 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1877 static bool ext_80387_constants_init = 0;
1880 static struct machine_function * ix86_init_machine_status (void);
1881 static rtx ix86_function_value (const_tree, const_tree, bool);
1882 static int ix86_function_regparm (const_tree, const_tree);
1883 static void ix86_compute_frame_layout (struct ix86_frame *);
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1886 static void ix86_add_new_builtins (int);
1888 enum ix86_function_specific_strings
1890 IX86_FUNCTION_SPECIFIC_ARCH,
1891 IX86_FUNCTION_SPECIFIC_TUNE,
1892 IX86_FUNCTION_SPECIFIC_FPMATH,
1893 IX86_FUNCTION_SPECIFIC_MAX
1896 static char *ix86_target_string (int, int, const char *, const char *,
1897 const char *, bool);
1898 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1899 static void ix86_function_specific_save (struct cl_target_option *);
1900 static void ix86_function_specific_restore (struct cl_target_option *);
1901 static void ix86_function_specific_print (FILE *, int,
1902 struct cl_target_option *);
1903 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1904 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1905 static bool ix86_can_inline_p (tree, tree);
1906 static void ix86_set_current_function (tree);
1908 static enum calling_abi ix86_function_abi (const_tree);
1911 /* The svr4 ABI for the i386 says that records and unions are returned
1913 #ifndef DEFAULT_PCC_STRUCT_RETURN
1914 #define DEFAULT_PCC_STRUCT_RETURN 1
1917 /* Whether -mtune= or -march= were specified */
1918 static int ix86_tune_defaulted;
1919 static int ix86_arch_specified;
1921 /* Bit flags that specify the ISA we are compiling for. */
1922 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1924 /* A mask of ix86_isa_flags that includes bit X if X
1925 was set or cleared on the command line. */
1926 static int ix86_isa_flags_explicit;
1928 /* Define a set of ISAs which are available when a given ISA is
1929 enabled. MMX and SSE ISAs are handled separately. */
1931 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1932 #define OPTION_MASK_ISA_3DNOW_SET \
1933 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1935 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1936 #define OPTION_MASK_ISA_SSE2_SET \
1937 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1938 #define OPTION_MASK_ISA_SSE3_SET \
1939 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1940 #define OPTION_MASK_ISA_SSSE3_SET \
1941 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1942 #define OPTION_MASK_ISA_SSE4_1_SET \
1943 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1944 #define OPTION_MASK_ISA_SSE4_2_SET \
1945 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1946 #define OPTION_MASK_ISA_AVX_SET \
1947 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1948 #define OPTION_MASK_ISA_FMA_SET \
1949 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1951 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1953 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1955 #define OPTION_MASK_ISA_SSE4A_SET \
1956 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1957 #define OPTION_MASK_ISA_SSE5_SET \
1958 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1960 /* AES and PCLMUL need SSE2 because they use xmm registers */
1961 #define OPTION_MASK_ISA_AES_SET \
1962 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1963 #define OPTION_MASK_ISA_PCLMUL_SET \
1964 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1966 #define OPTION_MASK_ISA_ABM_SET \
1967 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1969 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1970 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1971 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1972 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1973 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
1975 /* Define a set of ISAs which aren't available when a given ISA is
1976 disabled. MMX and SSE ISAs are handled separately. */
1978 #define OPTION_MASK_ISA_MMX_UNSET \
1979 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1980 #define OPTION_MASK_ISA_3DNOW_UNSET \
1981 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1982 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1984 #define OPTION_MASK_ISA_SSE_UNSET \
1985 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1986 #define OPTION_MASK_ISA_SSE2_UNSET \
1987 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1988 #define OPTION_MASK_ISA_SSE3_UNSET \
1989 (OPTION_MASK_ISA_SSE3 \
1990 | OPTION_MASK_ISA_SSSE3_UNSET \
1991 | OPTION_MASK_ISA_SSE4A_UNSET )
1992 #define OPTION_MASK_ISA_SSSE3_UNSET \
1993 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1994 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1995 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1996 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1997 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1998 #define OPTION_MASK_ISA_AVX_UNSET \
1999 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
2000 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2002 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2004 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2006 #define OPTION_MASK_ISA_SSE4A_UNSET \
2007 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
2008 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
2009 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2010 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2011 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2012 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2013 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2014 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2015 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2016 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2018 /* Vectorization library interface and handlers. */
2019 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2020 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2021 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2023 /* Processor target table, indexed by processor number */
2026 const struct processor_costs *cost; /* Processor costs */
2027 const int align_loop; /* Default alignments. */
2028 const int align_loop_max_skip;
2029 const int align_jump;
2030 const int align_jump_max_skip;
2031 const int align_func;
2034 static const struct ptt processor_target_table[PROCESSOR_max] =
2036 {&i386_cost, 4, 3, 4, 3, 4},
2037 {&i486_cost, 16, 15, 16, 15, 16},
2038 {&pentium_cost, 16, 7, 16, 7, 16},
2039 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2040 {&geode_cost, 0, 0, 0, 0, 0},
2041 {&k6_cost, 32, 7, 32, 7, 32},
2042 {&athlon_cost, 16, 7, 16, 7, 16},
2043 {&pentium4_cost, 0, 0, 0, 0, 0},
2044 {&k8_cost, 16, 7, 16, 7, 16},
2045 {&nocona_cost, 0, 0, 0, 0, 0},
2046 {&core2_cost, 16, 10, 16, 10, 16},
2047 {&generic32_cost, 16, 7, 16, 7, 16},
2048 {&generic64_cost, 16, 10, 16, 10, 16},
2049 {&amdfam10_cost, 32, 24, 32, 7, 32},
2050 {&atom_cost, 16, 7, 16, 7, 16}
2053 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2079 /* Implement TARGET_HANDLE_OPTION. */
2082 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2089 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2090 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2094 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2095 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2102 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2103 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2107 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2108 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2118 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2119 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2123 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2124 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2131 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2132 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2136 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2137 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2144 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2145 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2149 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2150 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2157 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2158 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2162 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2163 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2170 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2171 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2175 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2176 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2183 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2184 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2188 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2189 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2196 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2197 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2201 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2202 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2209 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2210 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2214 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2215 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2220 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2225 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2232 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2237 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2238 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2245 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2246 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2250 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2251 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2258 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2259 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2263 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2264 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2271 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2272 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2276 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2277 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2284 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2285 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2289 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2290 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2297 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2298 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2302 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2303 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2310 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2311 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2315 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2316 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2323 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2324 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2328 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2329 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2336 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2337 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2341 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2342 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2349 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2350 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2354 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2355 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2364 /* Return a string the documents the current -m options. The caller is
2365 responsible for freeing the string. */
2368 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2369 const char *fpmath, bool add_nl_p)
2371 struct ix86_target_opts
2373 const char *option; /* option string */
2374 int mask; /* isa mask options */
2377 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2378 preceding options while match those first. */
2379 static struct ix86_target_opts isa_opts[] =
2381 { "-m64", OPTION_MASK_ISA_64BIT },
2382 { "-msse5", OPTION_MASK_ISA_SSE5 },
2383 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2384 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2385 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2386 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2387 { "-msse3", OPTION_MASK_ISA_SSE3 },
2388 { "-msse2", OPTION_MASK_ISA_SSE2 },
2389 { "-msse", OPTION_MASK_ISA_SSE },
2390 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2391 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2392 { "-mmmx", OPTION_MASK_ISA_MMX },
2393 { "-mabm", OPTION_MASK_ISA_ABM },
2394 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2395 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2396 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2397 { "-maes", OPTION_MASK_ISA_AES },
2398 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2402 static struct ix86_target_opts flag_opts[] =
2404 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2405 { "-m80387", MASK_80387 },
2406 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2407 { "-malign-double", MASK_ALIGN_DOUBLE },
2408 { "-mcld", MASK_CLD },
2409 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2410 { "-mieee-fp", MASK_IEEE_FP },
2411 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2412 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2413 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2414 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2415 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2416 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2417 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2418 { "-mno-red-zone", MASK_NO_RED_ZONE },
2419 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2420 { "-mrecip", MASK_RECIP },
2421 { "-mrtd", MASK_RTD },
2422 { "-msseregparm", MASK_SSEREGPARM },
2423 { "-mstack-arg-probe", MASK_STACK_PROBE },
2424 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2427 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2430 char target_other[40];
2439 memset (opts, '\0', sizeof (opts));
2441 /* Add -march= option. */
2444 opts[num][0] = "-march=";
2445 opts[num++][1] = arch;
2448 /* Add -mtune= option. */
2451 opts[num][0] = "-mtune=";
2452 opts[num++][1] = tune;
2455 /* Pick out the options in isa options. */
2456 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2458 if ((isa & isa_opts[i].mask) != 0)
2460 opts[num++][0] = isa_opts[i].option;
2461 isa &= ~ isa_opts[i].mask;
2465 if (isa && add_nl_p)
2467 opts[num++][0] = isa_other;
2468 sprintf (isa_other, "(other isa: 0x%x)", isa);
2471 /* Add flag options. */
2472 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2474 if ((flags & flag_opts[i].mask) != 0)
2476 opts[num++][0] = flag_opts[i].option;
2477 flags &= ~ flag_opts[i].mask;
2481 if (flags && add_nl_p)
2483 opts[num++][0] = target_other;
2484 sprintf (target_other, "(other flags: 0x%x)", isa);
2487 /* Add -fpmath= option. */
2490 opts[num][0] = "-mfpmath=";
2491 opts[num++][1] = fpmath;
2498 gcc_assert (num < ARRAY_SIZE (opts));
2500 /* Size the string. */
2502 sep_len = (add_nl_p) ? 3 : 1;
2503 for (i = 0; i < num; i++)
2506 for (j = 0; j < 2; j++)
2508 len += strlen (opts[i][j]);
2511 /* Build the string. */
2512 ret = ptr = (char *) xmalloc (len);
2515 for (i = 0; i < num; i++)
2519 for (j = 0; j < 2; j++)
2520 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2527 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2535 for (j = 0; j < 2; j++)
2538 memcpy (ptr, opts[i][j], len2[j]);
2540 line_len += len2[j];
2545 gcc_assert (ret + len >= ptr);
2550 /* Function that is callable from the debugger to print the current
2553 ix86_debug_options (void)
2555 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2556 ix86_arch_string, ix86_tune_string,
2557 ix86_fpmath_string, true);
2561 fprintf (stderr, "%s\n\n", opts);
2565 fputs ("<no options>\n\n", stderr);
2570 /* Sometimes certain combinations of command options do not make
2571 sense on a particular target machine. You can define a macro
2572 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2573 defined, is executed once just after all the command options have
2576 Don't use this macro to turn on various extra optimizations for
2577 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2580 override_options (bool main_args_p)
2583 unsigned int ix86_arch_mask, ix86_tune_mask;
2588 /* Comes from final.c -- no real reason to change it. */
2589 #define MAX_CODE_ALIGN 16
2597 PTA_PREFETCH_SSE = 1 << 4,
2599 PTA_3DNOW_A = 1 << 6,
2603 PTA_POPCNT = 1 << 10,
2605 PTA_SSE4A = 1 << 12,
2606 PTA_NO_SAHF = 1 << 13,
2607 PTA_SSE4_1 = 1 << 14,
2608 PTA_SSE4_2 = 1 << 15,
2611 PTA_PCLMUL = 1 << 18,
2619 const char *const name; /* processor name or nickname. */
2620 const enum processor_type processor;
2621 const enum attr_cpu schedule;
2622 const unsigned /*enum pta_flags*/ flags;
2624 const processor_alias_table[] =
2626 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2627 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2628 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2629 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2630 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2631 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2632 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2633 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2634 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2635 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2636 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2637 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2638 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2640 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2642 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2643 PTA_MMX | PTA_SSE | PTA_SSE2},
2644 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2645 PTA_MMX |PTA_SSE | PTA_SSE2},
2646 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2647 PTA_MMX | PTA_SSE | PTA_SSE2},
2648 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2649 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2650 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2651 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2652 | PTA_CX16 | PTA_NO_SAHF},
2653 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2654 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2655 | PTA_SSSE3 | PTA_CX16},
2656 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2657 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2658 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2659 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2660 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2661 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2662 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2663 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2664 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2665 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2666 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2667 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2668 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2669 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2670 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2671 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2672 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2673 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2674 {"x86-64", PROCESSOR_K8, CPU_K8,
2675 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2676 {"k8", PROCESSOR_K8, CPU_K8,
2677 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2678 | PTA_SSE2 | PTA_NO_SAHF},
2679 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2680 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2681 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2682 {"opteron", PROCESSOR_K8, CPU_K8,
2683 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2684 | PTA_SSE2 | PTA_NO_SAHF},
2685 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2686 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2687 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2688 {"athlon64", PROCESSOR_K8, CPU_K8,
2689 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2690 | PTA_SSE2 | PTA_NO_SAHF},
2691 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2692 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2693 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2694 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2695 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2696 | PTA_SSE2 | PTA_NO_SAHF},
2697 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2698 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2699 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2700 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2701 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2702 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2703 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2704 0 /* flags are only used for -march switch. */ },
2705 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2706 PTA_64BIT /* flags are only used for -march switch. */ },
2709 int const pta_size = ARRAY_SIZE (processor_alias_table);
2711 /* Set up prefix/suffix so the error messages refer to either the command
2712 line argument, or the attribute(target). */
2721 prefix = "option(\"";
2726 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2727 SUBTARGET_OVERRIDE_OPTIONS;
2730 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2731 SUBSUBTARGET_OVERRIDE_OPTIONS;
2734 /* -fPIC is the default for x86_64. */
2735 if (TARGET_MACHO && TARGET_64BIT)
2738 /* Set the default values for switches whose default depends on TARGET_64BIT
2739 in case they weren't overwritten by command line options. */
2742 /* Mach-O doesn't support omitting the frame pointer for now. */
2743 if (flag_omit_frame_pointer == 2)
2744 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2745 if (flag_asynchronous_unwind_tables == 2)
2746 flag_asynchronous_unwind_tables = 1;
2747 if (flag_pcc_struct_return == 2)
2748 flag_pcc_struct_return = 0;
2752 if (flag_omit_frame_pointer == 2)
2753 flag_omit_frame_pointer = 0;
2754 if (flag_asynchronous_unwind_tables == 2)
2755 flag_asynchronous_unwind_tables = 0;
2756 if (flag_pcc_struct_return == 2)
2757 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2760 /* Need to check -mtune=generic first. */
2761 if (ix86_tune_string)
2763 if (!strcmp (ix86_tune_string, "generic")
2764 || !strcmp (ix86_tune_string, "i686")
2765 /* As special support for cross compilers we read -mtune=native
2766 as -mtune=generic. With native compilers we won't see the
2767 -mtune=native, as it was changed by the driver. */
2768 || !strcmp (ix86_tune_string, "native"))
2771 ix86_tune_string = "generic64";
2773 ix86_tune_string = "generic32";
2775 /* If this call is for setting the option attribute, allow the
2776 generic32/generic64 that was previously set. */
2777 else if (!main_args_p
2778 && (!strcmp (ix86_tune_string, "generic32")
2779 || !strcmp (ix86_tune_string, "generic64")))
2781 else if (!strncmp (ix86_tune_string, "generic", 7))
2782 error ("bad value (%s) for %stune=%s %s",
2783 ix86_tune_string, prefix, suffix, sw);
2787 if (ix86_arch_string)
2788 ix86_tune_string = ix86_arch_string;
2789 if (!ix86_tune_string)
2791 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2792 ix86_tune_defaulted = 1;
2795 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2796 need to use a sensible tune option. */
2797 if (!strcmp (ix86_tune_string, "generic")
2798 || !strcmp (ix86_tune_string, "x86-64")
2799 || !strcmp (ix86_tune_string, "i686"))
2802 ix86_tune_string = "generic64";
2804 ix86_tune_string = "generic32";
2807 if (ix86_stringop_string)
2809 if (!strcmp (ix86_stringop_string, "rep_byte"))
2810 stringop_alg = rep_prefix_1_byte;
2811 else if (!strcmp (ix86_stringop_string, "libcall"))
2812 stringop_alg = libcall;
2813 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2814 stringop_alg = rep_prefix_4_byte;
2815 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2817 /* rep; movq isn't available in 32-bit code. */
2818 stringop_alg = rep_prefix_8_byte;
2819 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2820 stringop_alg = loop_1_byte;
2821 else if (!strcmp (ix86_stringop_string, "loop"))
2822 stringop_alg = loop;
2823 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2824 stringop_alg = unrolled_loop;
2826 error ("bad value (%s) for %sstringop-strategy=%s %s",
2827 ix86_stringop_string, prefix, suffix, sw);
2829 if (!strcmp (ix86_tune_string, "x86-64"))
2830 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2831 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2832 prefix, suffix, prefix, suffix, prefix, suffix);
2834 if (!ix86_arch_string)
2835 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2837 ix86_arch_specified = 1;
2839 if (!strcmp (ix86_arch_string, "generic"))
2840 error ("generic CPU can be used only for %stune=%s %s",
2841 prefix, suffix, sw);
2842 if (!strncmp (ix86_arch_string, "generic", 7))
2843 error ("bad value (%s) for %sarch=%s %s",
2844 ix86_arch_string, prefix, suffix, sw);
2846 /* Validate -mabi= value. */
2847 if (ix86_abi_string)
2849 if (strcmp (ix86_abi_string, "sysv") == 0)
2850 ix86_abi = SYSV_ABI;
2851 else if (strcmp (ix86_abi_string, "ms") == 0)
2854 error ("unknown ABI (%s) for %sabi=%s %s",
2855 ix86_abi_string, prefix, suffix, sw);
2858 ix86_abi = DEFAULT_ABI;
2860 if (ix86_cmodel_string != 0)
2862 if (!strcmp (ix86_cmodel_string, "small"))
2863 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2864 else if (!strcmp (ix86_cmodel_string, "medium"))
2865 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2866 else if (!strcmp (ix86_cmodel_string, "large"))
2867 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2869 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2870 else if (!strcmp (ix86_cmodel_string, "32"))
2871 ix86_cmodel = CM_32;
2872 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2873 ix86_cmodel = CM_KERNEL;
2875 error ("bad value (%s) for %scmodel=%s %s",
2876 ix86_cmodel_string, prefix, suffix, sw);
2880 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2881 use of rip-relative addressing. This eliminates fixups that
2882 would otherwise be needed if this object is to be placed in a
2883 DLL, and is essentially just as efficient as direct addressing. */
2884 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2885 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2886 else if (TARGET_64BIT)
2887 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2889 ix86_cmodel = CM_32;
2891 if (ix86_asm_string != 0)
2894 && !strcmp (ix86_asm_string, "intel"))
2895 ix86_asm_dialect = ASM_INTEL;
2896 else if (!strcmp (ix86_asm_string, "att"))
2897 ix86_asm_dialect = ASM_ATT;
2899 error ("bad value (%s) for %sasm=%s %s",
2900 ix86_asm_string, prefix, suffix, sw);
2902 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2903 error ("code model %qs not supported in the %s bit mode",
2904 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2905 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2906 sorry ("%i-bit mode not compiled in",
2907 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2909 for (i = 0; i < pta_size; i++)
2910 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2912 ix86_schedule = processor_alias_table[i].schedule;
2913 ix86_arch = processor_alias_table[i].processor;
2914 /* Default cpu tuning to the architecture. */
2915 ix86_tune = ix86_arch;
2917 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2918 error ("CPU you selected does not support x86-64 "
2921 if (processor_alias_table[i].flags & PTA_MMX
2922 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2923 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2924 if (processor_alias_table[i].flags & PTA_3DNOW
2925 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2926 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2927 if (processor_alias_table[i].flags & PTA_3DNOW_A
2928 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2929 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2930 if (processor_alias_table[i].flags & PTA_SSE
2931 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2932 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2933 if (processor_alias_table[i].flags & PTA_SSE2
2934 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2935 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2936 if (processor_alias_table[i].flags & PTA_SSE3
2937 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2938 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2939 if (processor_alias_table[i].flags & PTA_SSSE3
2940 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2941 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2942 if (processor_alias_table[i].flags & PTA_SSE4_1
2943 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2944 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2945 if (processor_alias_table[i].flags & PTA_SSE4_2
2946 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2947 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2948 if (processor_alias_table[i].flags & PTA_AVX
2949 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2950 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2951 if (processor_alias_table[i].flags & PTA_FMA
2952 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2953 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2954 if (processor_alias_table[i].flags & PTA_SSE4A
2955 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2956 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2957 if (processor_alias_table[i].flags & PTA_SSE5
2958 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2959 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2960 if (processor_alias_table[i].flags & PTA_ABM
2961 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2962 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2963 if (processor_alias_table[i].flags & PTA_CX16
2964 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2965 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2966 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2967 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2968 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2969 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2970 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2971 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2972 if (processor_alias_table[i].flags & PTA_MOVBE
2973 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
2974 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
2975 if (processor_alias_table[i].flags & PTA_AES
2976 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2977 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2978 if (processor_alias_table[i].flags & PTA_PCLMUL
2979 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2980 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2981 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2982 x86_prefetch_sse = true;
2988 error ("bad value (%s) for %sarch=%s %s",
2989 ix86_arch_string, prefix, suffix, sw);
2991 ix86_arch_mask = 1u << ix86_arch;
2992 for (i = 0; i < X86_ARCH_LAST; ++i)
2993 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2995 for (i = 0; i < pta_size; i++)
2996 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2998 ix86_schedule = processor_alias_table[i].schedule;
2999 ix86_tune = processor_alias_table[i].processor;
3000 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3002 if (ix86_tune_defaulted)
3004 ix86_tune_string = "x86-64";
3005 for (i = 0; i < pta_size; i++)
3006 if (! strcmp (ix86_tune_string,
3007 processor_alias_table[i].name))
3009 ix86_schedule = processor_alias_table[i].schedule;
3010 ix86_tune = processor_alias_table[i].processor;
3013 error ("CPU you selected does not support x86-64 "
3016 /* Intel CPUs have always interpreted SSE prefetch instructions as
3017 NOPs; so, we can enable SSE prefetch instructions even when
3018 -mtune (rather than -march) points us to a processor that has them.
3019 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3020 higher processors. */
3022 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3023 x86_prefetch_sse = true;
3027 error ("bad value (%s) for %stune=%s %s",
3028 ix86_tune_string, prefix, suffix, sw);
3030 ix86_tune_mask = 1u << ix86_tune;
3031 for (i = 0; i < X86_TUNE_LAST; ++i)
3032 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3035 ix86_cost = &ix86_size_cost;
3037 ix86_cost = processor_target_table[ix86_tune].cost;
3039 /* Arrange to set up i386_stack_locals for all functions. */
3040 init_machine_status = ix86_init_machine_status;
3042 /* Validate -mregparm= value. */
3043 if (ix86_regparm_string)
3046 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3047 i = atoi (ix86_regparm_string);
3048 if (i < 0 || i > REGPARM_MAX)
3049 error ("%sregparm=%d%s is not between 0 and %d",
3050 prefix, i, suffix, REGPARM_MAX);
3055 ix86_regparm = REGPARM_MAX;
3057 /* If the user has provided any of the -malign-* options,
3058 warn and use that value only if -falign-* is not set.
3059 Remove this code in GCC 3.2 or later. */
3060 if (ix86_align_loops_string)
3062 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3063 prefix, suffix, suffix);
3064 if (align_loops == 0)
3066 i = atoi (ix86_align_loops_string);
3067 if (i < 0 || i > MAX_CODE_ALIGN)
3068 error ("%salign-loops=%d%s is not between 0 and %d",
3069 prefix, i, suffix, MAX_CODE_ALIGN);
3071 align_loops = 1 << i;
3075 if (ix86_align_jumps_string)
3077 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3078 prefix, suffix, suffix);
3079 if (align_jumps == 0)
3081 i = atoi (ix86_align_jumps_string);
3082 if (i < 0 || i > MAX_CODE_ALIGN)
3083 error ("%salign-loops=%d%s is not between 0 and %d",
3084 prefix, i, suffix, MAX_CODE_ALIGN);
3086 align_jumps = 1 << i;
3090 if (ix86_align_funcs_string)
3092 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3093 prefix, suffix, suffix);
3094 if (align_functions == 0)
3096 i = atoi (ix86_align_funcs_string);
3097 if (i < 0 || i > MAX_CODE_ALIGN)
3098 error ("%salign-loops=%d%s is not between 0 and %d",
3099 prefix, i, suffix, MAX_CODE_ALIGN);
3101 align_functions = 1 << i;
3105 /* Default align_* from the processor table. */
3106 if (align_loops == 0)
3108 align_loops = processor_target_table[ix86_tune].align_loop;
3109 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3111 if (align_jumps == 0)
3113 align_jumps = processor_target_table[ix86_tune].align_jump;
3114 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3116 if (align_functions == 0)
3118 align_functions = processor_target_table[ix86_tune].align_func;
3121 /* Validate -mbranch-cost= value, or provide default. */
3122 ix86_branch_cost = ix86_cost->branch_cost;
3123 if (ix86_branch_cost_string)
3125 i = atoi (ix86_branch_cost_string);
3127 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3129 ix86_branch_cost = i;
3131 if (ix86_section_threshold_string)
3133 i = atoi (ix86_section_threshold_string);
3135 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3137 ix86_section_threshold = i;
3140 if (ix86_tls_dialect_string)
3142 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3143 ix86_tls_dialect = TLS_DIALECT_GNU;
3144 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3145 ix86_tls_dialect = TLS_DIALECT_GNU2;
3146 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3147 ix86_tls_dialect = TLS_DIALECT_SUN;
3149 error ("bad value (%s) for %stls-dialect=%s %s",
3150 ix86_tls_dialect_string, prefix, suffix, sw);
3153 if (ix87_precision_string)
3155 i = atoi (ix87_precision_string);
3156 if (i != 32 && i != 64 && i != 80)
3157 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3162 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3164 /* Enable by default the SSE and MMX builtins. Do allow the user to
3165 explicitly disable any of these. In particular, disabling SSE and
3166 MMX for kernel code is extremely useful. */
3167 if (!ix86_arch_specified)
3169 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3170 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3173 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3177 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3179 if (!ix86_arch_specified)
3181 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3183 /* i386 ABI does not specify red zone. It still makes sense to use it
3184 when programmer takes care to stack from being destroyed. */
3185 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3186 target_flags |= MASK_NO_RED_ZONE;
3189 /* Keep nonleaf frame pointers. */
3190 if (flag_omit_frame_pointer)
3191 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3192 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3193 flag_omit_frame_pointer = 1;
3195 /* If we're doing fast math, we don't care about comparison order
3196 wrt NaNs. This lets us use a shorter comparison sequence. */
3197 if (flag_finite_math_only)
3198 target_flags &= ~MASK_IEEE_FP;
3200 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3201 since the insns won't need emulation. */
3202 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3203 target_flags &= ~MASK_NO_FANCY_MATH_387;
3205 /* Likewise, if the target doesn't have a 387, or we've specified
3206 software floating point, don't use 387 inline intrinsics. */
3208 target_flags |= MASK_NO_FANCY_MATH_387;
3210 /* Turn on MMX builtins for -msse. */
3213 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3214 x86_prefetch_sse = true;
3217 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3218 if (TARGET_SSE4_2 || TARGET_ABM)
3219 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3221 /* Validate -mpreferred-stack-boundary= value or default it to
3222 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3223 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3224 if (ix86_preferred_stack_boundary_string)
3226 i = atoi (ix86_preferred_stack_boundary_string);
3227 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3228 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3229 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3231 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3234 /* Set the default value for -mstackrealign. */
3235 if (ix86_force_align_arg_pointer == -1)
3236 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3238 /* Validate -mincoming-stack-boundary= value or default it to
3239 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3240 if (ix86_force_align_arg_pointer)
3241 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3243 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3244 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3245 if (ix86_incoming_stack_boundary_string)
3247 i = atoi (ix86_incoming_stack_boundary_string);
3248 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3249 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3250 i, TARGET_64BIT ? 4 : 2);
3253 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3254 ix86_incoming_stack_boundary
3255 = ix86_user_incoming_stack_boundary;
3259 /* Accept -msseregparm only if at least SSE support is enabled. */
3260 if (TARGET_SSEREGPARM
3262 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3264 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3265 if (ix86_fpmath_string != 0)
3267 if (! strcmp (ix86_fpmath_string, "387"))
3268 ix86_fpmath = FPMATH_387;
3269 else if (! strcmp (ix86_fpmath_string, "sse"))
3273 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3274 ix86_fpmath = FPMATH_387;
3277 ix86_fpmath = FPMATH_SSE;
3279 else if (! strcmp (ix86_fpmath_string, "387,sse")
3280 || ! strcmp (ix86_fpmath_string, "387+sse")
3281 || ! strcmp (ix86_fpmath_string, "sse,387")
3282 || ! strcmp (ix86_fpmath_string, "sse+387")
3283 || ! strcmp (ix86_fpmath_string, "both"))
3287 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3288 ix86_fpmath = FPMATH_387;
3290 else if (!TARGET_80387)
3292 warning (0, "387 instruction set disabled, using SSE arithmetics");
3293 ix86_fpmath = FPMATH_SSE;
3296 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3299 error ("bad value (%s) for %sfpmath=%s %s",
3300 ix86_fpmath_string, prefix, suffix, sw);
3303 /* If the i387 is disabled, then do not return values in it. */
3305 target_flags &= ~MASK_FLOAT_RETURNS;
3307 /* Use external vectorized library in vectorizing intrinsics. */
3308 if (ix86_veclibabi_string)
3310 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3311 ix86_veclib_handler = ix86_veclibabi_svml;
3312 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3313 ix86_veclib_handler = ix86_veclibabi_acml;
3315 error ("unknown vectorization library ABI type (%s) for "
3316 "%sveclibabi=%s %s", ix86_veclibabi_string,
3317 prefix, suffix, sw);
3320 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3321 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3323 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3325 /* ??? Unwind info is not correct around the CFG unless either a frame
3326 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3327 unwind info generation to be aware of the CFG and propagating states
3329 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3330 || flag_exceptions || flag_non_call_exceptions)
3331 && flag_omit_frame_pointer
3332 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3334 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3335 warning (0, "unwind tables currently require either a frame pointer "
3336 "or %saccumulate-outgoing-args%s for correctness",
3338 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3341 /* If stack probes are required, the space used for large function
3342 arguments on the stack must also be probed, so enable
3343 -maccumulate-outgoing-args so this happens in the prologue. */
3344 if (TARGET_STACK_PROBE
3345 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3347 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3348 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3349 "for correctness", prefix, suffix);
3350 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3353 /* For sane SSE instruction set generation we need fcomi instruction.
3354 It is safe to enable all CMOVE instructions. */
3358 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3361 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3362 p = strchr (internal_label_prefix, 'X');
3363 internal_label_prefix_len = p - internal_label_prefix;
3367 /* When scheduling description is not available, disable scheduler pass
3368 so it won't slow down the compilation and make x87 code slower. */
3369 if (!TARGET_SCHEDULE)
3370 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3372 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3373 set_param_value ("simultaneous-prefetches",
3374 ix86_cost->simultaneous_prefetches);
3375 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3376 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3377 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3378 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3379 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3380 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3382 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3383 can be optimized to ap = __builtin_next_arg (0). */
3385 targetm.expand_builtin_va_start = NULL;
3389 ix86_gen_leave = gen_leave_rex64;
3390 ix86_gen_pop1 = gen_popdi1;
3391 ix86_gen_add3 = gen_adddi3;
3392 ix86_gen_sub3 = gen_subdi3;
3393 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3394 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3395 ix86_gen_monitor = gen_sse3_monitor64;
3396 ix86_gen_andsp = gen_anddi3;
3400 ix86_gen_leave = gen_leave;
3401 ix86_gen_pop1 = gen_popsi1;
3402 ix86_gen_add3 = gen_addsi3;
3403 ix86_gen_sub3 = gen_subsi3;
3404 ix86_gen_sub3_carry = gen_subsi3_carry;
3405 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3406 ix86_gen_monitor = gen_sse3_monitor;
3407 ix86_gen_andsp = gen_andsi3;
3411 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3413 target_flags |= MASK_CLD & ~target_flags_explicit;
3416 /* Save the initial options in case the user does function specific options */
3418 target_option_default_node = target_option_current_node
3419 = build_target_option_node ();
3422 /* Update register usage after having seen the compiler flags. */
3425 ix86_conditional_register_usage (void)
3430 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3432 if (fixed_regs[i] > 1)
3433 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3434 if (call_used_regs[i] > 1)
3435 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3438 /* The PIC register, if it exists, is fixed. */
3439 j = PIC_OFFSET_TABLE_REGNUM;
3440 if (j != INVALID_REGNUM)
3441 fixed_regs[j] = call_used_regs[j] = 1;
3443 /* The MS_ABI changes the set of call-used registers. */
3444 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3446 call_used_regs[SI_REG] = 0;
3447 call_used_regs[DI_REG] = 0;
3448 call_used_regs[XMM6_REG] = 0;
3449 call_used_regs[XMM7_REG] = 0;
3450 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3451 call_used_regs[i] = 0;
3454 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3455 other call-clobbered regs for 64-bit. */
3458 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3460 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3461 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3462 && call_used_regs[i])
3463 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3466 /* If MMX is disabled, squash the registers. */
3468 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3469 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3470 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3472 /* If SSE is disabled, squash the registers. */
3474 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3475 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3476 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3478 /* If the FPU is disabled, squash the registers. */
3479 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3480 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3481 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3482 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3484 /* If 32-bit, squash the 64-bit registers. */
3487 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3489 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3495 /* Save the current options */
3498 ix86_function_specific_save (struct cl_target_option *ptr)
3500 ptr->arch = ix86_arch;
3501 ptr->schedule = ix86_schedule;
3502 ptr->tune = ix86_tune;
3503 ptr->fpmath = ix86_fpmath;
3504 ptr->branch_cost = ix86_branch_cost;
3505 ptr->tune_defaulted = ix86_tune_defaulted;
3506 ptr->arch_specified = ix86_arch_specified;
3507 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3508 ptr->target_flags_explicit = target_flags_explicit;
3510 /* The fields are char but the variables are not; make sure the
3511 values fit in the fields. */
3512 gcc_assert (ptr->arch == ix86_arch);
3513 gcc_assert (ptr->schedule == ix86_schedule);
3514 gcc_assert (ptr->tune == ix86_tune);
3515 gcc_assert (ptr->fpmath == ix86_fpmath);
3516 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3519 /* Restore the current options */
3522 ix86_function_specific_restore (struct cl_target_option *ptr)
3524 enum processor_type old_tune = ix86_tune;
3525 enum processor_type old_arch = ix86_arch;
3526 unsigned int ix86_arch_mask, ix86_tune_mask;
3529 ix86_arch = (enum processor_type) ptr->arch;
3530 ix86_schedule = (enum attr_cpu) ptr->schedule;
3531 ix86_tune = (enum processor_type) ptr->tune;
3532 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3533 ix86_branch_cost = ptr->branch_cost;
3534 ix86_tune_defaulted = ptr->tune_defaulted;
3535 ix86_arch_specified = ptr->arch_specified;
3536 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3537 target_flags_explicit = ptr->target_flags_explicit;
3539 /* Recreate the arch feature tests if the arch changed */
3540 if (old_arch != ix86_arch)
3542 ix86_arch_mask = 1u << ix86_arch;
3543 for (i = 0; i < X86_ARCH_LAST; ++i)
3544 ix86_arch_features[i]
3545 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3548 /* Recreate the tune optimization tests */
3549 if (old_tune != ix86_tune)
3551 ix86_tune_mask = 1u << ix86_tune;
3552 for (i = 0; i < X86_TUNE_LAST; ++i)
3553 ix86_tune_features[i]
3554 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3558 /* Print the current options */
3561 ix86_function_specific_print (FILE *file, int indent,
3562 struct cl_target_option *ptr)
3565 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3566 NULL, NULL, NULL, false);
3568 fprintf (file, "%*sarch = %d (%s)\n",
3571 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3572 ? cpu_names[ptr->arch]
3575 fprintf (file, "%*stune = %d (%s)\n",
3578 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3579 ? cpu_names[ptr->tune]
3582 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3583 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3584 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3585 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3589 fprintf (file, "%*s%s\n", indent, "", target_string);
3590 free (target_string);
3595 /* Inner function to process the attribute((target(...))), take an argument and
3596 set the current options from the argument. If we have a list, recursively go
3600 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3605 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3606 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3607 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3608 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3623 enum ix86_opt_type type;
3628 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3629 IX86_ATTR_ISA ("abm", OPT_mabm),
3630 IX86_ATTR_ISA ("aes", OPT_maes),
3631 IX86_ATTR_ISA ("avx", OPT_mavx),
3632 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3633 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3634 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3635 IX86_ATTR_ISA ("sse", OPT_msse),
3636 IX86_ATTR_ISA ("sse2", OPT_msse2),
3637 IX86_ATTR_ISA ("sse3", OPT_msse3),
3638 IX86_ATTR_ISA ("sse4", OPT_msse4),
3639 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3640 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3641 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3642 IX86_ATTR_ISA ("sse5", OPT_msse5),
3643 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3645 /* string options */
3646 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3647 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3648 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3651 IX86_ATTR_YES ("cld",
3655 IX86_ATTR_NO ("fancy-math-387",
3656 OPT_mfancy_math_387,
3657 MASK_NO_FANCY_MATH_387),
3659 IX86_ATTR_NO ("fused-madd",
3661 MASK_NO_FUSED_MADD),
3663 IX86_ATTR_YES ("ieee-fp",
3667 IX86_ATTR_YES ("inline-all-stringops",
3668 OPT_minline_all_stringops,
3669 MASK_INLINE_ALL_STRINGOPS),
3671 IX86_ATTR_YES ("inline-stringops-dynamically",
3672 OPT_minline_stringops_dynamically,
3673 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3675 IX86_ATTR_NO ("align-stringops",
3676 OPT_mno_align_stringops,
3677 MASK_NO_ALIGN_STRINGOPS),
3679 IX86_ATTR_YES ("recip",
3685 /* If this is a list, recurse to get the options. */
3686 if (TREE_CODE (args) == TREE_LIST)
3690 for (; args; args = TREE_CHAIN (args))
3691 if (TREE_VALUE (args)
3692 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3698 else if (TREE_CODE (args) != STRING_CST)
3701 /* Handle multiple arguments separated by commas. */
3702 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3704 while (next_optstr && *next_optstr != '\0')
3706 char *p = next_optstr;
3708 char *comma = strchr (next_optstr, ',');
3709 const char *opt_string;
3710 size_t len, opt_len;
3715 enum ix86_opt_type type = ix86_opt_unknown;
3721 len = comma - next_optstr;
3722 next_optstr = comma + 1;
3730 /* Recognize no-xxx. */
3731 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3740 /* Find the option. */
3743 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3745 type = attrs[i].type;
3746 opt_len = attrs[i].len;
3747 if (ch == attrs[i].string[0]
3748 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3749 && memcmp (p, attrs[i].string, opt_len) == 0)
3752 mask = attrs[i].mask;
3753 opt_string = attrs[i].string;
3758 /* Process the option. */
3761 error ("attribute(target(\"%s\")) is unknown", orig_p);
3765 else if (type == ix86_opt_isa)
3766 ix86_handle_option (opt, p, opt_set_p);
3768 else if (type == ix86_opt_yes || type == ix86_opt_no)
3770 if (type == ix86_opt_no)
3771 opt_set_p = !opt_set_p;
3774 target_flags |= mask;
3776 target_flags &= ~mask;
3779 else if (type == ix86_opt_str)
3783 error ("option(\"%s\") was already specified", opt_string);
3787 p_strings[opt] = xstrdup (p + opt_len);
3797 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3800 ix86_valid_target_attribute_tree (tree args)
3802 const char *orig_arch_string = ix86_arch_string;
3803 const char *orig_tune_string = ix86_tune_string;
3804 const char *orig_fpmath_string = ix86_fpmath_string;
3805 int orig_tune_defaulted = ix86_tune_defaulted;
3806 int orig_arch_specified = ix86_arch_specified;
3807 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3810 struct cl_target_option *def
3811 = TREE_TARGET_OPTION (target_option_default_node);
3813 /* Process each of the options on the chain. */
3814 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3817 /* If the changed options are different from the default, rerun override_options,
3818 and then save the options away. The string options are are attribute options,
3819 and will be undone when we copy the save structure. */
3820 if (ix86_isa_flags != def->ix86_isa_flags
3821 || target_flags != def->target_flags
3822 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3823 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3824 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3826 /* If we are using the default tune= or arch=, undo the string assigned,
3827 and use the default. */
3828 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3829 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3830 else if (!orig_arch_specified)
3831 ix86_arch_string = NULL;
3833 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3834 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3835 else if (orig_tune_defaulted)
3836 ix86_tune_string = NULL;
3838 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3839 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3840 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3841 else if (!TARGET_64BIT && TARGET_SSE)
3842 ix86_fpmath_string = "sse,387";
3844 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3845 override_options (false);
3847 /* Add any builtin functions with the new isa if any. */
3848 ix86_add_new_builtins (ix86_isa_flags);
3850 /* Save the current options unless we are validating options for
3852 t = build_target_option_node ();
3854 ix86_arch_string = orig_arch_string;
3855 ix86_tune_string = orig_tune_string;
3856 ix86_fpmath_string = orig_fpmath_string;
3858 /* Free up memory allocated to hold the strings */
3859 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3860 if (option_strings[i])
3861 free (option_strings[i]);
3867 /* Hook to validate attribute((target("string"))). */
3870 ix86_valid_target_attribute_p (tree fndecl,
3871 tree ARG_UNUSED (name),
3873 int ARG_UNUSED (flags))
3875 struct cl_target_option cur_target;
3877 tree old_optimize = build_optimization_node ();
3878 tree new_target, new_optimize;
3879 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3881 /* If the function changed the optimization levels as well as setting target
3882 options, start with the optimizations specified. */
3883 if (func_optimize && func_optimize != old_optimize)
3884 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3886 /* The target attributes may also change some optimization flags, so update
3887 the optimization options if necessary. */
3888 cl_target_option_save (&cur_target);
3889 new_target = ix86_valid_target_attribute_tree (args);
3890 new_optimize = build_optimization_node ();
3897 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3899 if (old_optimize != new_optimize)
3900 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3903 cl_target_option_restore (&cur_target);
3905 if (old_optimize != new_optimize)
3906 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3912 /* Hook to determine if one function can safely inline another. */
3915 ix86_can_inline_p (tree caller, tree callee)
3918 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3919 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3921 /* If callee has no option attributes, then it is ok to inline. */
3925 /* If caller has no option attributes, but callee does then it is not ok to
3927 else if (!caller_tree)
3932 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3933 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3935 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3936 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3938 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3939 != callee_opts->ix86_isa_flags)
3942 /* See if we have the same non-isa options. */
3943 else if (caller_opts->target_flags != callee_opts->target_flags)
3946 /* See if arch, tune, etc. are the same. */
3947 else if (caller_opts->arch != callee_opts->arch)
3950 else if (caller_opts->tune != callee_opts->tune)
3953 else if (caller_opts->fpmath != callee_opts->fpmath)
3956 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3967 /* Remember the last target of ix86_set_current_function. */
3968 static GTY(()) tree ix86_previous_fndecl;
3970 /* Establish appropriate back-end context for processing the function
3971 FNDECL. The argument might be NULL to indicate processing at top
3972 level, outside of any function scope. */
3974 ix86_set_current_function (tree fndecl)
3976 /* Only change the context if the function changes. This hook is called
3977 several times in the course of compiling a function, and we don't want to
3978 slow things down too much or call target_reinit when it isn't safe. */
3979 if (fndecl && fndecl != ix86_previous_fndecl)
3981 tree old_tree = (ix86_previous_fndecl
3982 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3985 tree new_tree = (fndecl
3986 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3989 ix86_previous_fndecl = fndecl;
3990 if (old_tree == new_tree)
3995 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4001 struct cl_target_option *def
4002 = TREE_TARGET_OPTION (target_option_current_node);
4004 cl_target_option_restore (def);
4011 /* Return true if this goes in large data/bss. */
4014 ix86_in_large_data_p (tree exp)
4016 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4019 /* Functions are never large data. */
4020 if (TREE_CODE (exp) == FUNCTION_DECL)
4023 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4025 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4026 if (strcmp (section, ".ldata") == 0
4027 || strcmp (section, ".lbss") == 0)
4033 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4035 /* If this is an incomplete type with size 0, then we can't put it
4036 in data because it might be too big when completed. */
4037 if (!size || size > ix86_section_threshold)
4044 /* Switch to the appropriate section for output of DECL.
4045 DECL is either a `VAR_DECL' node or a constant of some sort.
4046 RELOC indicates whether forming the initial value of DECL requires
4047 link-time relocations. */
4049 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4053 x86_64_elf_select_section (tree decl, int reloc,
4054 unsigned HOST_WIDE_INT align)
4056 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4057 && ix86_in_large_data_p (decl))
4059 const char *sname = NULL;
4060 unsigned int flags = SECTION_WRITE;
4061 switch (categorize_decl_for_section (decl, reloc))
4066 case SECCAT_DATA_REL:
4067 sname = ".ldata.rel";
4069 case SECCAT_DATA_REL_LOCAL:
4070 sname = ".ldata.rel.local";
4072 case SECCAT_DATA_REL_RO:
4073 sname = ".ldata.rel.ro";
4075 case SECCAT_DATA_REL_RO_LOCAL:
4076 sname = ".ldata.rel.ro.local";
4080 flags |= SECTION_BSS;
4083 case SECCAT_RODATA_MERGE_STR:
4084 case SECCAT_RODATA_MERGE_STR_INIT:
4085 case SECCAT_RODATA_MERGE_CONST:
4089 case SECCAT_SRODATA:
4096 /* We don't split these for medium model. Place them into
4097 default sections and hope for best. */
4099 case SECCAT_EMUTLS_VAR:
4100 case SECCAT_EMUTLS_TMPL:
4105 /* We might get called with string constants, but get_named_section
4106 doesn't like them as they are not DECLs. Also, we need to set
4107 flags in that case. */
4109 return get_section (sname, flags, NULL);
4110 return get_named_section (decl, sname, reloc);
4113 return default_elf_select_section (decl, reloc, align);
4116 /* Build up a unique section name, expressed as a
4117 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4118 RELOC indicates whether the initial value of EXP requires
4119 link-time relocations. */
4121 static void ATTRIBUTE_UNUSED
4122 x86_64_elf_unique_section (tree decl, int reloc)
4124 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4125 && ix86_in_large_data_p (decl))
4127 const char *prefix = NULL;
4128 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4129 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4131 switch (categorize_decl_for_section (decl, reloc))
4134 case SECCAT_DATA_REL:
4135 case SECCAT_DATA_REL_LOCAL:
4136 case SECCAT_DATA_REL_RO:
4137 case SECCAT_DATA_REL_RO_LOCAL:
4138 prefix = one_only ? ".ld" : ".ldata";
4141 prefix = one_only ? ".lb" : ".lbss";
4144 case SECCAT_RODATA_MERGE_STR:
4145 case SECCAT_RODATA_MERGE_STR_INIT:
4146 case SECCAT_RODATA_MERGE_CONST:
4147 prefix = one_only ? ".lr" : ".lrodata";
4149 case SECCAT_SRODATA:
4156 /* We don't split these for medium model. Place them into
4157 default sections and hope for best. */
4159 case SECCAT_EMUTLS_VAR:
4160 prefix = targetm.emutls.var_section;
4162 case SECCAT_EMUTLS_TMPL:
4163 prefix = targetm.emutls.tmpl_section;
4168 const char *name, *linkonce;
4171 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4172 name = targetm.strip_name_encoding (name);
4174 /* If we're using one_only, then there needs to be a .gnu.linkonce
4175 prefix to the section name. */
4176 linkonce = one_only ? ".gnu.linkonce" : "";
4178 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4180 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4184 default_unique_section (decl, reloc);
4187 #ifdef COMMON_ASM_OP
4188 /* This says how to output assembler code to declare an
4189 uninitialized external linkage data object.
4191 For medium model x86-64 we need to use .largecomm opcode for
4194 x86_elf_aligned_common (FILE *file,
4195 const char *name, unsigned HOST_WIDE_INT size,
4198 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4199 && size > (unsigned int)ix86_section_threshold)
4200 fputs (".largecomm\t", file);
4202 fputs (COMMON_ASM_OP, file);
4203 assemble_name (file, name);
4204 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4205 size, align / BITS_PER_UNIT);
4209 /* Utility function for targets to use in implementing
4210 ASM_OUTPUT_ALIGNED_BSS. */
4213 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4214 const char *name, unsigned HOST_WIDE_INT size,
4217 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4218 && size > (unsigned int)ix86_section_threshold)
4219 switch_to_section (get_named_section (decl, ".lbss", 0));
4221 switch_to_section (bss_section);
4222 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4223 #ifdef ASM_DECLARE_OBJECT_NAME
4224 last_assemble_variable_decl = decl;
4225 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4227 /* Standard thing is just output label for the object. */
4228 ASM_OUTPUT_LABEL (file, name);
4229 #endif /* ASM_DECLARE_OBJECT_NAME */
4230 ASM_OUTPUT_SKIP (file, size ? size : 1);
4234 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4236 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4237 make the problem with not enough registers even worse. */
4238 #ifdef INSN_SCHEDULING
4240 flag_schedule_insns = 0;
4244 /* The Darwin libraries never set errno, so we might as well
4245 avoid calling them when that's the only reason we would. */
4246 flag_errno_math = 0;
4248 /* The default values of these switches depend on the TARGET_64BIT
4249 that is not known at this moment. Mark these values with 2 and
4250 let user the to override these. In case there is no command line option
4251 specifying them, we will set the defaults in override_options. */
4253 flag_omit_frame_pointer = 2;
4254 flag_pcc_struct_return = 2;
4255 flag_asynchronous_unwind_tables = 2;
4256 flag_vect_cost_model = 1;
4257 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4258 SUBTARGET_OPTIMIZATION_OPTIONS;
4262 /* Decide whether we can make a sibling call to a function. DECL is the
4263 declaration of the function being targeted by the call and EXP is the
4264 CALL_EXPR representing the call. */
4267 ix86_function_ok_for_sibcall (tree decl, tree exp)
4269 tree type, decl_or_type;
4272 /* If we are generating position-independent code, we cannot sibcall
4273 optimize any indirect call, or a direct call to a global function,
4274 as the PLT requires %ebx be live. */
4275 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4278 /* If we need to align the outgoing stack, then sibcalling would
4279 unalign the stack, which may break the called function. */
4280 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4285 decl_or_type = decl;
4286 type = TREE_TYPE (decl);
4290 /* We're looking at the CALL_EXPR, we need the type of the function. */
4291 type = CALL_EXPR_FN (exp); /* pointer expression */
4292 type = TREE_TYPE (type); /* pointer type */
4293 type = TREE_TYPE (type); /* function type */
4294 decl_or_type = type;
4297 /* Check that the return value locations are the same. Like
4298 if we are returning floats on the 80387 register stack, we cannot
4299 make a sibcall from a function that doesn't return a float to a
4300 function that does or, conversely, from a function that does return
4301 a float to a function that doesn't; the necessary stack adjustment
4302 would not be executed. This is also the place we notice
4303 differences in the return value ABI. Note that it is ok for one
4304 of the functions to have void return type as long as the return
4305 value of the other is passed in a register. */
4306 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4307 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4309 if (STACK_REG_P (a) || STACK_REG_P (b))
4311 if (!rtx_equal_p (a, b))
4314 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4316 else if (!rtx_equal_p (a, b))
4321 /* The SYSV ABI has more call-clobbered registers;
4322 disallow sibcalls from MS to SYSV. */
4323 if (cfun->machine->call_abi == MS_ABI
4324 && ix86_function_type_abi (type) == SYSV_ABI)
4329 /* If this call is indirect, we'll need to be able to use a
4330 call-clobbered register for the address of the target function.
4331 Make sure that all such registers are not used for passing
4332 parameters. Note that DLLIMPORT functions are indirect. */
4334 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4336 if (ix86_function_regparm (type, NULL) >= 3)
4338 /* ??? Need to count the actual number of registers to be used,
4339 not the possible number of registers. Fix later. */
4345 /* Otherwise okay. That also includes certain types of indirect calls. */
4349 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4350 calling convention attributes;
4351 arguments as in struct attribute_spec.handler. */
4354 ix86_handle_cconv_attribute (tree *node, tree name,
4356 int flags ATTRIBUTE_UNUSED,
4359 if (TREE_CODE (*node) != FUNCTION_TYPE
4360 && TREE_CODE (*node) != METHOD_TYPE
4361 && TREE_CODE (*node) != FIELD_DECL
4362 && TREE_CODE (*node) != TYPE_DECL)
4364 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4366 *no_add_attrs = true;
4370 /* Can combine regparm with all attributes but fastcall. */
4371 if (is_attribute_p ("regparm", name))
4375 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4377 error ("fastcall and regparm attributes are not compatible");
4380 cst = TREE_VALUE (args);
4381 if (TREE_CODE (cst) != INTEGER_CST)
4383 warning (OPT_Wattributes,
4384 "%qE attribute requires an integer constant argument",
4386 *no_add_attrs = true;
4388 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4390 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4392 *no_add_attrs = true;
4400 /* Do not warn when emulating the MS ABI. */
4401 if (TREE_CODE (*node) != FUNCTION_TYPE
4402 || ix86_function_type_abi (*node) != MS_ABI)
4403 warning (OPT_Wattributes, "%qE attribute ignored",
4405 *no_add_attrs = true;
4409 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4410 if (is_attribute_p ("fastcall", name))
4412 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4414 error ("fastcall and cdecl attributes are not compatible");
4416 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4418 error ("fastcall and stdcall attributes are not compatible");
4420 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4422 error ("fastcall and regparm attributes are not compatible");
4426 /* Can combine stdcall with fastcall (redundant), regparm and
4428 else if (is_attribute_p ("stdcall", name))
4430 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4432 error ("stdcall and cdecl attributes are not compatible");
4434 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4436 error ("stdcall and fastcall attributes are not compatible");
4440 /* Can combine cdecl with regparm and sseregparm. */
4441 else if (is_attribute_p ("cdecl", name))
4443 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4445 error ("stdcall and cdecl attributes are not compatible");
4447 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4449 error ("fastcall and cdecl attributes are not compatible");
4453 /* Can combine sseregparm with all attributes. */
4458 /* Return 0 if the attributes for two types are incompatible, 1 if they
4459 are compatible, and 2 if they are nearly compatible (which causes a
4460 warning to be generated). */
4463 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4465 /* Check for mismatch of non-default calling convention. */
4466 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4468 if (TREE_CODE (type1) != FUNCTION_TYPE
4469 && TREE_CODE (type1) != METHOD_TYPE)
4472 /* Check for mismatched fastcall/regparm types. */
4473 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4474 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4475 || (ix86_function_regparm (type1, NULL)
4476 != ix86_function_regparm (type2, NULL)))
4479 /* Check for mismatched sseregparm types. */
4480 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4481 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4484 /* Check for mismatched return types (cdecl vs stdcall). */
4485 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4486 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4492 /* Return the regparm value for a function with the indicated TYPE and DECL.
4493 DECL may be NULL when calling function indirectly
4494 or considering a libcall. */
4497 ix86_function_regparm (const_tree type, const_tree decl)
4502 static bool error_issued;
4505 return (ix86_function_type_abi (type) == SYSV_ABI
4506 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4508 regparm = ix86_regparm;
4509 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4513 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4515 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4517 /* We can't use regparm(3) for nested functions because
4518 these pass static chain pointer in %ecx register. */
4519 if (!error_issued && regparm == 3
4520 && decl_function_context (decl)
4521 && !DECL_NO_STATIC_CHAIN (decl))
4523 error ("nested functions are limited to 2 register parameters");
4524 error_issued = true;
4532 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4535 /* Use register calling convention for local functions when possible. */
4537 && TREE_CODE (decl) == FUNCTION_DECL
4541 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4542 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4545 int local_regparm, globals = 0, regno;
4548 /* Make sure no regparm register is taken by a
4549 fixed register variable. */
4550 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4551 if (fixed_regs[local_regparm])
4554 /* We can't use regparm(3) for nested functions as these use
4555 static chain pointer in third argument. */
4556 if (local_regparm == 3
4557 && decl_function_context (decl)
4558 && !DECL_NO_STATIC_CHAIN (decl))
4561 /* If the function realigns its stackpointer, the prologue will
4562 clobber %ecx. If we've already generated code for the callee,
4563 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4564 scanning the attributes for the self-realigning property. */
4565 f = DECL_STRUCT_FUNCTION (decl);
4566 /* Since current internal arg pointer won't conflict with
4567 parameter passing regs, so no need to change stack
4568 realignment and adjust regparm number.
4570 Each fixed register usage increases register pressure,
4571 so less registers should be used for argument passing.
4572 This functionality can be overriden by an explicit
4574 for (regno = 0; regno <= DI_REG; regno++)
4575 if (fixed_regs[regno])
4579 = globals < local_regparm ? local_regparm - globals : 0;
4581 if (local_regparm > regparm)
4582 regparm = local_regparm;
4589 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4590 DFmode (2) arguments in SSE registers for a function with the
4591 indicated TYPE and DECL. DECL may be NULL when calling function
4592 indirectly or considering a libcall. Otherwise return 0. */
4595 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4597 gcc_assert (!TARGET_64BIT);
4599 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4600 by the sseregparm attribute. */
4601 if (TARGET_SSEREGPARM
4602 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4609 error ("Calling %qD with attribute sseregparm without "
4610 "SSE/SSE2 enabled", decl);
4612 error ("Calling %qT with attribute sseregparm without "
4613 "SSE/SSE2 enabled", type);
4621 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4622 (and DFmode for SSE2) arguments in SSE registers. */
4623 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4625 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4626 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4628 return TARGET_SSE2 ? 2 : 1;
4634 /* Return true if EAX is live at the start of the function. Used by
4635 ix86_expand_prologue to determine if we need special help before
4636 calling allocate_stack_worker. */
4639 ix86_eax_live_at_start_p (void)
4641 /* Cheat. Don't bother working forward from ix86_function_regparm
4642 to the function type to whether an actual argument is located in
4643 eax. Instead just look at cfg info, which is still close enough
4644 to correct at this point. This gives false positives for broken
4645 functions that might use uninitialized data that happens to be
4646 allocated in eax, but who cares? */
4647 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4650 /* Value is the number of bytes of arguments automatically
4651 popped when returning from a subroutine call.
4652 FUNDECL is the declaration node of the function (as a tree),
4653 FUNTYPE is the data type of the function (as a tree),
4654 or for a library call it is an identifier node for the subroutine name.
4655 SIZE is the number of bytes of arguments passed on the stack.
4657 On the 80386, the RTD insn may be used to pop them if the number
4658 of args is fixed, but if the number is variable then the caller
4659 must pop them all. RTD can't be used for library calls now
4660 because the library is compiled with the Unix compiler.
4661 Use of RTD is a selectable option, since it is incompatible with
4662 standard Unix calling sequences. If the option is not selected,
4663 the caller must always pop the args.
4665 The attribute stdcall is equivalent to RTD on a per module basis. */
4668 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4672 /* None of the 64-bit ABIs pop arguments. */
4676 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4678 /* Cdecl functions override -mrtd, and never pop the stack. */
4679 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4681 /* Stdcall and fastcall functions will pop the stack if not
4683 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4684 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4687 if (rtd && ! stdarg_p (funtype))
4691 /* Lose any fake structure return argument if it is passed on the stack. */
4692 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4693 && !KEEP_AGGREGATE_RETURN_POINTER)
4695 int nregs = ix86_function_regparm (funtype, fundecl);
4697 return GET_MODE_SIZE (Pmode);
4703 /* Argument support functions. */
4705 /* Return true when register may be used to pass function parameters. */
4707 ix86_function_arg_regno_p (int regno)
4710 const int *parm_regs;
4715 return (regno < REGPARM_MAX
4716 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4718 return (regno < REGPARM_MAX
4719 || (TARGET_MMX && MMX_REGNO_P (regno)
4720 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4721 || (TARGET_SSE && SSE_REGNO_P (regno)
4722 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4727 if (SSE_REGNO_P (regno) && TARGET_SSE)
4732 if (TARGET_SSE && SSE_REGNO_P (regno)
4733 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4737 /* TODO: The function should depend on current function ABI but
4738 builtins.c would need updating then. Therefore we use the
4741 /* RAX is used as hidden argument to va_arg functions. */
4742 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4745 if (ix86_abi == MS_ABI)
4746 parm_regs = x86_64_ms_abi_int_parameter_registers;
4748 parm_regs = x86_64_int_parameter_registers;
4749 for (i = 0; i < (ix86_abi == MS_ABI
4750 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
4751 if (regno == parm_regs[i])
4756 /* Return if we do not know how to pass TYPE solely in registers. */
4759 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4761 if (must_pass_in_stack_var_size_or_pad (mode, type))
4764 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4765 The layout_type routine is crafty and tries to trick us into passing
4766 currently unsupported vector types on the stack by using TImode. */
4767 return (!TARGET_64BIT && mode == TImode
4768 && type && TREE_CODE (type) != VECTOR_TYPE);
4771 /* It returns the size, in bytes, of the area reserved for arguments passed
4772 in registers for the function represented by fndecl dependent to the used
4775 ix86_reg_parm_stack_space (const_tree fndecl)
4777 enum calling_abi call_abi = SYSV_ABI;
4778 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4779 call_abi = ix86_function_abi (fndecl);
4781 call_abi = ix86_function_type_abi (fndecl);
4782 if (call_abi == MS_ABI)
4787 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4790 ix86_function_type_abi (const_tree fntype)
4792 if (TARGET_64BIT && fntype != NULL)
4794 enum calling_abi abi = ix86_abi;
4795 if (abi == SYSV_ABI)
4797 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4800 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4807 static enum calling_abi
4808 ix86_function_abi (const_tree fndecl)
4812 return ix86_function_type_abi (TREE_TYPE (fndecl));
4815 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4818 ix86_cfun_abi (void)
4820 if (! cfun || ! TARGET_64BIT)
4822 return cfun->machine->call_abi;
4826 extern void init_regs (void);
4828 /* Implementation of call abi switching target hook. Specific to FNDECL
4829 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4830 for more details. */
4832 ix86_call_abi_override (const_tree fndecl)
4834 if (fndecl == NULL_TREE)
4835 cfun->machine->call_abi = ix86_abi;
4837 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4840 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4841 re-initialization of init_regs each time we switch function context since
4842 this is needed only during RTL expansion. */
4844 ix86_maybe_switch_abi (void)
4847 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4851 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4852 for a call to a function whose data type is FNTYPE.
4853 For a library call, FNTYPE is 0. */
4856 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4857 tree fntype, /* tree ptr for function decl */
4858 rtx libname, /* SYMBOL_REF of library name or 0 */
4861 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4862 memset (cum, 0, sizeof (*cum));
4865 cum->call_abi = ix86_function_abi (fndecl);
4867 cum->call_abi = ix86_function_type_abi (fntype);
4868 /* Set up the number of registers to use for passing arguments. */
4870 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4871 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
4872 "or subtarget optimization implying it");
4873 cum->nregs = ix86_regparm;
4876 if (cum->call_abi != ix86_abi)
4877 cum->nregs = (ix86_abi != SYSV_ABI
4878 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4882 cum->sse_nregs = SSE_REGPARM_MAX;
4885 if (cum->call_abi != ix86_abi)
4886 cum->sse_nregs = (ix86_abi != SYSV_ABI
4887 ? X86_64_SSE_REGPARM_MAX
4888 : X86_64_MS_SSE_REGPARM_MAX);
4892 cum->mmx_nregs = MMX_REGPARM_MAX;
4893 cum->warn_avx = true;
4894 cum->warn_sse = true;
4895 cum->warn_mmx = true;
4897 /* Because type might mismatch in between caller and callee, we need to
4898 use actual type of function for local calls.
4899 FIXME: cgraph_analyze can be told to actually record if function uses
4900 va_start so for local functions maybe_vaarg can be made aggressive
4902 FIXME: once typesytem is fixed, we won't need this code anymore. */
4904 fntype = TREE_TYPE (fndecl);
4905 cum->maybe_vaarg = (fntype
4906 ? (!prototype_p (fntype) || stdarg_p (fntype))
4911 /* If there are variable arguments, then we won't pass anything
4912 in registers in 32-bit mode. */
4913 if (stdarg_p (fntype))
4924 /* Use ecx and edx registers if function has fastcall attribute,
4925 else look for regparm information. */
4928 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4934 cum->nregs = ix86_function_regparm (fntype, fndecl);
4937 /* Set up the number of SSE registers used for passing SFmode
4938 and DFmode arguments. Warn for mismatching ABI. */
4939 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4943 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4944 But in the case of vector types, it is some vector mode.
4946 When we have only some of our vector isa extensions enabled, then there
4947 are some modes for which vector_mode_supported_p is false. For these
4948 modes, the generic vector support in gcc will choose some non-vector mode
4949 in order to implement the type. By computing the natural mode, we'll
4950 select the proper ABI location for the operand and not depend on whatever
4951 the middle-end decides to do with these vector types.
4953 The midde-end can't deal with the vector types > 16 bytes. In this
4954 case, we return the original mode and warn ABI change if CUM isn't
4957 static enum machine_mode
4958 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4960 enum machine_mode mode = TYPE_MODE (type);
4962 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4964 HOST_WIDE_INT size = int_size_in_bytes (type);
4965 if ((size == 8 || size == 16 || size == 32)
4966 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4967 && TYPE_VECTOR_SUBPARTS (type) > 1)
4969 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4971 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4972 mode = MIN_MODE_VECTOR_FLOAT;
4974 mode = MIN_MODE_VECTOR_INT;
4976 /* Get the mode which has this inner mode and number of units. */
4977 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4978 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4979 && GET_MODE_INNER (mode) == innermode)
4981 if (size == 32 && !TARGET_AVX)
4983 static bool warnedavx;
4990 warning (0, "AVX vector argument without AVX "
4991 "enabled changes the ABI");
4993 return TYPE_MODE (type);
5006 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5007 this may not agree with the mode that the type system has chosen for the
5008 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5009 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5012 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5017 if (orig_mode != BLKmode)
5018 tmp = gen_rtx_REG (orig_mode, regno);
5021 tmp = gen_rtx_REG (mode, regno);
5022 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5023 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5029 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5030 of this code is to classify each 8bytes of incoming argument by the register
5031 class and assign registers accordingly. */
5033 /* Return the union class of CLASS1 and CLASS2.
5034 See the x86-64 PS ABI for details. */
5036 static enum x86_64_reg_class
5037 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5039 /* Rule #1: If both classes are equal, this is the resulting class. */
5040 if (class1 == class2)
5043 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5045 if (class1 == X86_64_NO_CLASS)
5047 if (class2 == X86_64_NO_CLASS)
5050 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5051 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5052 return X86_64_MEMORY_CLASS;
5054 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5055 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5056 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5057 return X86_64_INTEGERSI_CLASS;
5058 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5059 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5060 return X86_64_INTEGER_CLASS;
5062 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5064 if (class1 == X86_64_X87_CLASS
5065 || class1 == X86_64_X87UP_CLASS
5066 || class1 == X86_64_COMPLEX_X87_CLASS
5067 || class2 == X86_64_X87_CLASS
5068 || class2 == X86_64_X87UP_CLASS
5069 || class2 == X86_64_COMPLEX_X87_CLASS)
5070 return X86_64_MEMORY_CLASS;
5072 /* Rule #6: Otherwise class SSE is used. */
5073 return X86_64_SSE_CLASS;
5076 /* Classify the argument of type TYPE and mode MODE.
5077 CLASSES will be filled by the register class used to pass each word
5078 of the operand. The number of words is returned. In case the parameter
5079 should be passed in memory, 0 is returned. As a special case for zero
5080 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5082 BIT_OFFSET is used internally for handling records and specifies offset
5083 of the offset in bits modulo 256 to avoid overflow cases.
5085 See the x86-64 PS ABI for details.
5089 classify_argument (enum machine_mode mode, const_tree type,
5090 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5092 HOST_WIDE_INT bytes =
5093 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5094 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5096 /* Variable sized entities are always passed/returned in memory. */
5100 if (mode != VOIDmode
5101 && targetm.calls.must_pass_in_stack (mode, type))
5104 if (type && AGGREGATE_TYPE_P (type))
5108 enum x86_64_reg_class subclasses[MAX_CLASSES];
5110 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5114 for (i = 0; i < words; i++)
5115 classes[i] = X86_64_NO_CLASS;
5117 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5118 signalize memory class, so handle it as special case. */
5121 classes[0] = X86_64_NO_CLASS;
5125 /* Classify each field of record and merge classes. */
5126 switch (TREE_CODE (type))
5129 /* And now merge the fields of structure. */
5130 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5132 if (TREE_CODE (field) == FIELD_DECL)
5136 if (TREE_TYPE (field) == error_mark_node)
5139 /* Bitfields are always classified as integer. Handle them
5140 early, since later code would consider them to be
5141 misaligned integers. */
5142 if (DECL_BIT_FIELD (field))
5144 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5145 i < ((int_bit_position (field) + (bit_offset % 64))
5146 + tree_low_cst (DECL_SIZE (field), 0)
5149 merge_classes (X86_64_INTEGER_CLASS,
5156 type = TREE_TYPE (field);
5158 /* Flexible array member is ignored. */
5159 if (TYPE_MODE (type) == BLKmode
5160 && TREE_CODE (type) == ARRAY_TYPE
5161 && TYPE_SIZE (type) == NULL_TREE
5162 && TYPE_DOMAIN (type) != NULL_TREE
5163 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5168 if (!warned && warn_psabi)
5171 inform (input_location,
5172 "The ABI of passing struct with"
5173 " a flexible array member has"
5174 " changed in GCC 4.4");
5178 num = classify_argument (TYPE_MODE (type), type,
5180 (int_bit_position (field)
5181 + bit_offset) % 256);
5184 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5185 for (i = 0; i < num && (i + pos) < words; i++)
5187 merge_classes (subclasses[i], classes[i + pos]);
5194 /* Arrays are handled as small records. */
5197 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5198 TREE_TYPE (type), subclasses, bit_offset);
5202 /* The partial classes are now full classes. */
5203 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5204 subclasses[0] = X86_64_SSE_CLASS;
5205 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5206 && !((bit_offset % 64) == 0 && bytes == 4))
5207 subclasses[0] = X86_64_INTEGER_CLASS;
5209 for (i = 0; i < words; i++)
5210 classes[i] = subclasses[i % num];
5215 case QUAL_UNION_TYPE:
5216 /* Unions are similar to RECORD_TYPE but offset is always 0.
5218 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5220 if (TREE_CODE (field) == FIELD_DECL)
5224 if (TREE_TYPE (field) == error_mark_node)
5227 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5228 TREE_TYPE (field), subclasses,
5232 for (i = 0; i < num; i++)
5233 classes[i] = merge_classes (subclasses[i], classes[i]);
5244 /* When size > 16 bytes, if the first one isn't
5245 X86_64_SSE_CLASS or any other ones aren't
5246 X86_64_SSEUP_CLASS, everything should be passed in
5248 if (classes[0] != X86_64_SSE_CLASS)
5251 for (i = 1; i < words; i++)
5252 if (classes[i] != X86_64_SSEUP_CLASS)
5256 /* Final merger cleanup. */
5257 for (i = 0; i < words; i++)
5259 /* If one class is MEMORY, everything should be passed in
5261 if (classes[i] == X86_64_MEMORY_CLASS)
5264 /* The X86_64_SSEUP_CLASS should be always preceded by
5265 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5266 if (classes[i] == X86_64_SSEUP_CLASS
5267 && classes[i - 1] != X86_64_SSE_CLASS
5268 && classes[i - 1] != X86_64_SSEUP_CLASS)
5270 /* The first one should never be X86_64_SSEUP_CLASS. */
5271 gcc_assert (i != 0);
5272 classes[i] = X86_64_SSE_CLASS;
5275 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5276 everything should be passed in memory. */
5277 if (classes[i] == X86_64_X87UP_CLASS
5278 && (classes[i - 1] != X86_64_X87_CLASS))
5282 /* The first one should never be X86_64_X87UP_CLASS. */
5283 gcc_assert (i != 0);
5284 if (!warned && warn_psabi)
5287 inform (input_location,
5288 "The ABI of passing union with long double"
5289 " has changed in GCC 4.4");
5297 /* Compute alignment needed. We align all types to natural boundaries with
5298 exception of XFmode that is aligned to 64bits. */
5299 if (mode != VOIDmode && mode != BLKmode)
5301 int mode_alignment = GET_MODE_BITSIZE (mode);
5304 mode_alignment = 128;
5305 else if (mode == XCmode)
5306 mode_alignment = 256;
5307 if (COMPLEX_MODE_P (mode))
5308 mode_alignment /= 2;
5309 /* Misaligned fields are always returned in memory. */
5310 if (bit_offset % mode_alignment)
5314 /* for V1xx modes, just use the base mode */
5315 if (VECTOR_MODE_P (mode) && mode != V1DImode
5316 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5317 mode = GET_MODE_INNER (mode);
5319 /* Classification of atomic types. */
5324 classes[0] = X86_64_SSE_CLASS;
5327 classes[0] = X86_64_SSE_CLASS;
5328 classes[1] = X86_64_SSEUP_CLASS;
5338 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5342 classes[0] = X86_64_INTEGERSI_CLASS;
5345 else if (size <= 64)
5347 classes[0] = X86_64_INTEGER_CLASS;
5350 else if (size <= 64+32)
5352 classes[0] = X86_64_INTEGER_CLASS;
5353 classes[1] = X86_64_INTEGERSI_CLASS;
5356 else if (size <= 64+64)
5358 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5366 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5370 /* OImode shouldn't be used directly. */
5375 if (!(bit_offset % 64))
5376 classes[0] = X86_64_SSESF_CLASS;
5378 classes[0] = X86_64_SSE_CLASS;
5381 classes[0] = X86_64_SSEDF_CLASS;
5384 classes[0] = X86_64_X87_CLASS;
5385 classes[1] = X86_64_X87UP_CLASS;
5388 classes[0] = X86_64_SSE_CLASS;
5389 classes[1] = X86_64_SSEUP_CLASS;
5392 classes[0] = X86_64_SSE_CLASS;
5393 if (!(bit_offset % 64))
5399 if (!warned && warn_psabi)
5402 inform (input_location,
5403 "The ABI of passing structure with complex float"
5404 " member has changed in GCC 4.4");
5406 classes[1] = X86_64_SSESF_CLASS;
5410 classes[0] = X86_64_SSEDF_CLASS;
5411 classes[1] = X86_64_SSEDF_CLASS;
5414 classes[0] = X86_64_COMPLEX_X87_CLASS;
5417 /* This modes is larger than 16 bytes. */
5425 classes[0] = X86_64_SSE_CLASS;
5426 classes[1] = X86_64_SSEUP_CLASS;
5427 classes[2] = X86_64_SSEUP_CLASS;
5428 classes[3] = X86_64_SSEUP_CLASS;
5436 classes[0] = X86_64_SSE_CLASS;
5437 classes[1] = X86_64_SSEUP_CLASS;
5444 classes[0] = X86_64_SSE_CLASS;
5450 gcc_assert (VECTOR_MODE_P (mode));
5455 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5457 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5458 classes[0] = X86_64_INTEGERSI_CLASS;
5460 classes[0] = X86_64_INTEGER_CLASS;
5461 classes[1] = X86_64_INTEGER_CLASS;
5462 return 1 + (bytes > 8);
5466 /* Examine the argument and return set number of register required in each
5467 class. Return 0 iff parameter should be passed in memory. */
5469 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5470 int *int_nregs, int *sse_nregs)
5472 enum x86_64_reg_class regclass[MAX_CLASSES];
5473 int n = classify_argument (mode, type, regclass, 0);
5479 for (n--; n >= 0; n--)
5480 switch (regclass[n])
5482 case X86_64_INTEGER_CLASS:
5483 case X86_64_INTEGERSI_CLASS:
5486 case X86_64_SSE_CLASS:
5487 case X86_64_SSESF_CLASS:
5488 case X86_64_SSEDF_CLASS:
5491 case X86_64_NO_CLASS:
5492 case X86_64_SSEUP_CLASS:
5494 case X86_64_X87_CLASS:
5495 case X86_64_X87UP_CLASS:
5499 case X86_64_COMPLEX_X87_CLASS:
5500 return in_return ? 2 : 0;
5501 case X86_64_MEMORY_CLASS:
5507 /* Construct container for the argument used by GCC interface. See
5508 FUNCTION_ARG for the detailed description. */
5511 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5512 const_tree type, int in_return, int nintregs, int nsseregs,
5513 const int *intreg, int sse_regno)
5515 /* The following variables hold the static issued_error state. */
5516 static bool issued_sse_arg_error;
5517 static bool issued_sse_ret_error;
5518 static bool issued_x87_ret_error;
5520 enum machine_mode tmpmode;
5522 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5523 enum x86_64_reg_class regclass[MAX_CLASSES];
5527 int needed_sseregs, needed_intregs;
5528 rtx exp[MAX_CLASSES];
5531 n = classify_argument (mode, type, regclass, 0);
5534 if (!examine_argument (mode, type, in_return, &needed_intregs,
5537 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5540 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5541 some less clueful developer tries to use floating-point anyway. */
5542 if (needed_sseregs && !TARGET_SSE)
5546 if (!issued_sse_ret_error)
5548 error ("SSE register return with SSE disabled");
5549 issued_sse_ret_error = true;
5552 else if (!issued_sse_arg_error)
5554 error ("SSE register argument with SSE disabled");
5555 issued_sse_arg_error = true;
5560 /* Likewise, error if the ABI requires us to return values in the
5561 x87 registers and the user specified -mno-80387. */
5562 if (!TARGET_80387 && in_return)
5563 for (i = 0; i < n; i++)
5564 if (regclass[i] == X86_64_X87_CLASS
5565 || regclass[i] == X86_64_X87UP_CLASS
5566 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5568 if (!issued_x87_ret_error)
5570 error ("x87 register return with x87 disabled");
5571 issued_x87_ret_error = true;
5576 /* First construct simple cases. Avoid SCmode, since we want to use
5577 single register to pass this type. */
5578 if (n == 1 && mode != SCmode)
5579 switch (regclass[0])
5581 case X86_64_INTEGER_CLASS:
5582 case X86_64_INTEGERSI_CLASS:
5583 return gen_rtx_REG (mode, intreg[0]);
5584 case X86_64_SSE_CLASS:
5585 case X86_64_SSESF_CLASS:
5586 case X86_64_SSEDF_CLASS:
5587 if (mode != BLKmode)
5588 return gen_reg_or_parallel (mode, orig_mode,
5589 SSE_REGNO (sse_regno));
5591 case X86_64_X87_CLASS:
5592 case X86_64_COMPLEX_X87_CLASS:
5593 return gen_rtx_REG (mode, FIRST_STACK_REG);
5594 case X86_64_NO_CLASS:
5595 /* Zero sized array, struct or class. */
5600 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5601 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5602 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5604 && regclass[0] == X86_64_SSE_CLASS
5605 && regclass[1] == X86_64_SSEUP_CLASS
5606 && regclass[2] == X86_64_SSEUP_CLASS
5607 && regclass[3] == X86_64_SSEUP_CLASS
5609 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5612 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5613 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5614 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5615 && regclass[1] == X86_64_INTEGER_CLASS
5616 && (mode == CDImode || mode == TImode || mode == TFmode)
5617 && intreg[0] + 1 == intreg[1])
5618 return gen_rtx_REG (mode, intreg[0]);
5620 /* Otherwise figure out the entries of the PARALLEL. */
5621 for (i = 0; i < n; i++)
5625 switch (regclass[i])
5627 case X86_64_NO_CLASS:
5629 case X86_64_INTEGER_CLASS:
5630 case X86_64_INTEGERSI_CLASS:
5631 /* Merge TImodes on aligned occasions here too. */
5632 if (i * 8 + 8 > bytes)
5633 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5634 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5638 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5639 if (tmpmode == BLKmode)
5641 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5642 gen_rtx_REG (tmpmode, *intreg),
5646 case X86_64_SSESF_CLASS:
5647 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5648 gen_rtx_REG (SFmode,
5649 SSE_REGNO (sse_regno)),
5653 case X86_64_SSEDF_CLASS:
5654 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5655 gen_rtx_REG (DFmode,
5656 SSE_REGNO (sse_regno)),
5660 case X86_64_SSE_CLASS:
5668 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5678 && regclass[1] == X86_64_SSEUP_CLASS
5679 && regclass[2] == X86_64_SSEUP_CLASS
5680 && regclass[3] == X86_64_SSEUP_CLASS);
5687 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5688 gen_rtx_REG (tmpmode,
5689 SSE_REGNO (sse_regno)),
5698 /* Empty aligned struct, union or class. */
5702 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5703 for (i = 0; i < nexps; i++)
5704 XVECEXP (ret, 0, i) = exp [i];
5708 /* Update the data in CUM to advance over an argument of mode MODE
5709 and data type TYPE. (TYPE is null for libcalls where that information
5710 may not be available.) */
5713 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5714 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5730 cum->words += words;
5731 cum->nregs -= words;
5732 cum->regno += words;
5734 if (cum->nregs <= 0)
5742 /* OImode shouldn't be used directly. */
5746 if (cum->float_in_sse < 2)
5749 if (cum->float_in_sse < 1)
5766 if (!type || !AGGREGATE_TYPE_P (type))
5768 cum->sse_words += words;
5769 cum->sse_nregs -= 1;
5770 cum->sse_regno += 1;
5771 if (cum->sse_nregs <= 0)
5784 if (!type || !AGGREGATE_TYPE_P (type))
5786 cum->mmx_words += words;
5787 cum->mmx_nregs -= 1;
5788 cum->mmx_regno += 1;
5789 if (cum->mmx_nregs <= 0)
5800 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5801 tree type, HOST_WIDE_INT words, int named)
5803 int int_nregs, sse_nregs;
5805 /* Unnamed 256bit vector mode parameters are passed on stack. */
5806 if (!named && VALID_AVX256_REG_MODE (mode))
5809 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5810 cum->words += words;
5811 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5813 cum->nregs -= int_nregs;
5814 cum->sse_nregs -= sse_nregs;
5815 cum->regno += int_nregs;
5816 cum->sse_regno += sse_nregs;
5819 cum->words += words;
5823 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5824 HOST_WIDE_INT words)
5826 /* Otherwise, this should be passed indirect. */
5827 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5829 cum->words += words;
5838 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5839 tree type, int named)
5841 HOST_WIDE_INT bytes, words;
5843 if (mode == BLKmode)
5844 bytes = int_size_in_bytes (type);
5846 bytes = GET_MODE_SIZE (mode);
5847 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5850 mode = type_natural_mode (type, NULL);
5852 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5853 function_arg_advance_ms_64 (cum, bytes, words);
5854 else if (TARGET_64BIT)
5855 function_arg_advance_64 (cum, mode, type, words, named);
5857 function_arg_advance_32 (cum, mode, type, bytes, words);
5860 /* Define where to put the arguments to a function.
5861 Value is zero to push the argument on the stack,
5862 or a hard register in which to store the argument.
5864 MODE is the argument's machine mode.
5865 TYPE is the data type of the argument (as a tree).
5866 This is null for libcalls where that information may
5868 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5869 the preceding args and about the function being called.
5870 NAMED is nonzero if this argument is a named parameter
5871 (otherwise it is an extra parameter matching an ellipsis). */
5874 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5875 enum machine_mode orig_mode, tree type,
5876 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5878 static bool warnedsse, warnedmmx;
5880 /* Avoid the AL settings for the Unix64 ABI. */
5881 if (mode == VOIDmode)
5897 if (words <= cum->nregs)
5899 int regno = cum->regno;
5901 /* Fastcall allocates the first two DWORD (SImode) or
5902 smaller arguments to ECX and EDX if it isn't an
5908 || (type && AGGREGATE_TYPE_P (type)))
5911 /* ECX not EAX is the first allocated register. */
5912 if (regno == AX_REG)
5915 return gen_rtx_REG (mode, regno);
5920 if (cum->float_in_sse < 2)
5923 if (cum->float_in_sse < 1)
5927 /* In 32bit, we pass TImode in xmm registers. */
5934 if (!type || !AGGREGATE_TYPE_P (type))
5936 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5939 warning (0, "SSE vector argument without SSE enabled "
5943 return gen_reg_or_parallel (mode, orig_mode,
5944 cum->sse_regno + FIRST_SSE_REG);
5949 /* OImode shouldn't be used directly. */
5958 if (!type || !AGGREGATE_TYPE_P (type))
5961 return gen_reg_or_parallel (mode, orig_mode,
5962 cum->sse_regno + FIRST_SSE_REG);
5971 if (!type || !AGGREGATE_TYPE_P (type))
5973 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5976 warning (0, "MMX vector argument without MMX enabled "
5980 return gen_reg_or_parallel (mode, orig_mode,
5981 cum->mmx_regno + FIRST_MMX_REG);
5990 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5991 enum machine_mode orig_mode, tree type, int named)
5993 /* Handle a hidden AL argument containing number of registers
5994 for varargs x86-64 functions. */
5995 if (mode == VOIDmode)
5996 return GEN_INT (cum->maybe_vaarg
5997 ? (cum->sse_nregs < 0
5998 ? (cum->call_abi == ix86_abi
6000 : (ix86_abi != SYSV_ABI
6001 ? X86_64_SSE_REGPARM_MAX
6002 : X86_64_MS_SSE_REGPARM_MAX))
6017 /* Unnamed 256bit vector mode parameters are passed on stack. */
6023 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6025 &x86_64_int_parameter_registers [cum->regno],
6030 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6031 enum machine_mode orig_mode, int named,
6032 HOST_WIDE_INT bytes)
6036 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6037 We use value of -2 to specify that current function call is MSABI. */
6038 if (mode == VOIDmode)
6039 return GEN_INT (-2);
6041 /* If we've run out of registers, it goes on the stack. */
6042 if (cum->nregs == 0)
6045 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6047 /* Only floating point modes are passed in anything but integer regs. */
6048 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6051 regno = cum->regno + FIRST_SSE_REG;
6056 /* Unnamed floating parameters are passed in both the
6057 SSE and integer registers. */
6058 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6059 t2 = gen_rtx_REG (mode, regno);
6060 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6061 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6062 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6065 /* Handle aggregated types passed in register. */
6066 if (orig_mode == BLKmode)
6068 if (bytes > 0 && bytes <= 8)
6069 mode = (bytes > 4 ? DImode : SImode);
6070 if (mode == BLKmode)
6074 return gen_reg_or_parallel (mode, orig_mode, regno);
6078 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6079 tree type, int named)
6081 enum machine_mode mode = omode;
6082 HOST_WIDE_INT bytes, words;
6084 if (mode == BLKmode)
6085 bytes = int_size_in_bytes (type);
6087 bytes = GET_MODE_SIZE (mode);
6088 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6090 /* To simplify the code below, represent vector types with a vector mode
6091 even if MMX/SSE are not active. */
6092 if (type && TREE_CODE (type) == VECTOR_TYPE)
6093 mode = type_natural_mode (type, cum);
6095 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6096 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6097 else if (TARGET_64BIT)
6098 return function_arg_64 (cum, mode, omode, type, named);
6100 return function_arg_32 (cum, mode, omode, type, bytes, words);
6103 /* A C expression that indicates when an argument must be passed by
6104 reference. If nonzero for an argument, a copy of that argument is
6105 made in memory and a pointer to the argument is passed instead of
6106 the argument itself. The pointer is passed in whatever way is
6107 appropriate for passing a pointer to that type. */
6110 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6111 enum machine_mode mode ATTRIBUTE_UNUSED,
6112 const_tree type, bool named ATTRIBUTE_UNUSED)
6114 /* See Windows x64 Software Convention. */
6115 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6117 int msize = (int) GET_MODE_SIZE (mode);
6120 /* Arrays are passed by reference. */
6121 if (TREE_CODE (type) == ARRAY_TYPE)
6124 if (AGGREGATE_TYPE_P (type))
6126 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6127 are passed by reference. */
6128 msize = int_size_in_bytes (type);
6132 /* __m128 is passed by reference. */
6134 case 1: case 2: case 4: case 8:
6140 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6146 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6149 contains_aligned_value_p (tree type)
6151 enum machine_mode mode = TYPE_MODE (type);
6152 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6156 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6158 if (TYPE_ALIGN (type) < 128)
6161 if (AGGREGATE_TYPE_P (type))
6163 /* Walk the aggregates recursively. */
6164 switch (TREE_CODE (type))
6168 case QUAL_UNION_TYPE:
6172 /* Walk all the structure fields. */
6173 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6175 if (TREE_CODE (field) == FIELD_DECL
6176 && contains_aligned_value_p (TREE_TYPE (field)))
6183 /* Just for use if some languages passes arrays by value. */
6184 if (contains_aligned_value_p (TREE_TYPE (type)))
6195 /* Gives the alignment boundary, in bits, of an argument with the
6196 specified mode and type. */
6199 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6204 /* Since canonical type is used for call, we convert it to
6205 canonical type if needed. */
6206 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6207 type = TYPE_CANONICAL (type);
6208 align = TYPE_ALIGN (type);
6211 align = GET_MODE_ALIGNMENT (mode);
6212 if (align < PARM_BOUNDARY)
6213 align = PARM_BOUNDARY;
6214 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6215 natural boundaries. */
6216 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6218 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6219 make an exception for SSE modes since these require 128bit
6222 The handling here differs from field_alignment. ICC aligns MMX
6223 arguments to 4 byte boundaries, while structure fields are aligned
6224 to 8 byte boundaries. */
6227 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6228 align = PARM_BOUNDARY;
6232 if (!contains_aligned_value_p (type))
6233 align = PARM_BOUNDARY;
6236 if (align > BIGGEST_ALIGNMENT)
6237 align = BIGGEST_ALIGNMENT;
6241 /* Return true if N is a possible register number of function value. */
6244 ix86_function_value_regno_p (int regno)
6251 case FIRST_FLOAT_REG:
6252 /* TODO: The function should depend on current function ABI but
6253 builtins.c would need updating then. Therefore we use the
6255 if (TARGET_64BIT && ix86_abi == MS_ABI)
6257 return TARGET_FLOAT_RETURNS_IN_80387;
6263 if (TARGET_MACHO || TARGET_64BIT)
6271 /* Define how to find the value returned by a function.
6272 VALTYPE is the data type of the value (as a tree).
6273 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6274 otherwise, FUNC is 0. */
6277 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6278 const_tree fntype, const_tree fn)
6282 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6283 we normally prevent this case when mmx is not available. However
6284 some ABIs may require the result to be returned like DImode. */
6285 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6286 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6288 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6289 we prevent this case when sse is not available. However some ABIs
6290 may require the result to be returned like integer TImode. */
6291 else if (mode == TImode
6292 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6293 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6295 /* 32-byte vector modes in %ymm0. */
6296 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6297 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6299 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6300 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6301 regno = FIRST_FLOAT_REG;
6303 /* Most things go in %eax. */
6306 /* Override FP return register with %xmm0 for local functions when
6307 SSE math is enabled or for functions with sseregparm attribute. */
6308 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6310 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6311 if ((sse_level >= 1 && mode == SFmode)
6312 || (sse_level == 2 && mode == DFmode))
6313 regno = FIRST_SSE_REG;
6316 /* OImode shouldn't be used directly. */
6317 gcc_assert (mode != OImode);
6319 return gen_rtx_REG (orig_mode, regno);
6323 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6328 /* Handle libcalls, which don't provide a type node. */
6329 if (valtype == NULL)
6341 return gen_rtx_REG (mode, FIRST_SSE_REG);
6344 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6348 return gen_rtx_REG (mode, AX_REG);
6352 ret = construct_container (mode, orig_mode, valtype, 1,
6353 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6354 x86_64_int_return_registers, 0);
6356 /* For zero sized structures, construct_container returns NULL, but we
6357 need to keep rest of compiler happy by returning meaningful value. */
6359 ret = gen_rtx_REG (orig_mode, AX_REG);
6365 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6367 unsigned int regno = AX_REG;
6371 switch (GET_MODE_SIZE (mode))
6374 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6375 && !COMPLEX_MODE_P (mode))
6376 regno = FIRST_SSE_REG;
6380 if (mode == SFmode || mode == DFmode)
6381 regno = FIRST_SSE_REG;
6387 return gen_rtx_REG (orig_mode, regno);
6391 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6392 enum machine_mode orig_mode, enum machine_mode mode)
6394 const_tree fn, fntype;
6397 if (fntype_or_decl && DECL_P (fntype_or_decl))
6398 fn = fntype_or_decl;
6399 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6401 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6402 return function_value_ms_64 (orig_mode, mode);
6403 else if (TARGET_64BIT)
6404 return function_value_64 (orig_mode, mode, valtype);
6406 return function_value_32 (orig_mode, mode, fntype, fn);
6410 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6411 bool outgoing ATTRIBUTE_UNUSED)
6413 enum machine_mode mode, orig_mode;
6415 orig_mode = TYPE_MODE (valtype);
6416 mode = type_natural_mode (valtype, NULL);
6417 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6421 ix86_libcall_value (enum machine_mode mode)
6423 return ix86_function_value_1 (NULL, NULL, mode, mode);
6426 /* Return true iff type is returned in memory. */
6428 static int ATTRIBUTE_UNUSED
6429 return_in_memory_32 (const_tree type, enum machine_mode mode)
6433 if (mode == BLKmode)
6436 size = int_size_in_bytes (type);
6438 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6441 if (VECTOR_MODE_P (mode) || mode == TImode)
6443 /* User-created vectors small enough to fit in EAX. */
6447 /* MMX/3dNow values are returned in MM0,
6448 except when it doesn't exits. */
6450 return (TARGET_MMX ? 0 : 1);
6452 /* SSE values are returned in XMM0, except when it doesn't exist. */
6454 return (TARGET_SSE ? 0 : 1);
6456 /* AVX values are returned in YMM0, except when it doesn't exist. */
6458 return TARGET_AVX ? 0 : 1;
6467 /* OImode shouldn't be used directly. */
6468 gcc_assert (mode != OImode);
6473 static int ATTRIBUTE_UNUSED
6474 return_in_memory_64 (const_tree type, enum machine_mode mode)
6476 int needed_intregs, needed_sseregs;
6477 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6480 static int ATTRIBUTE_UNUSED
6481 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6483 HOST_WIDE_INT size = int_size_in_bytes (type);
6485 /* __m128 is returned in xmm0. */
6486 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6487 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6490 /* Otherwise, the size must be exactly in [1248]. */
6491 return (size != 1 && size != 2 && size != 4 && size != 8);
6495 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6497 #ifdef SUBTARGET_RETURN_IN_MEMORY
6498 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6500 const enum machine_mode mode = type_natural_mode (type, NULL);
6504 if (ix86_function_type_abi (fntype) == MS_ABI)
6505 return return_in_memory_ms_64 (type, mode);
6507 return return_in_memory_64 (type, mode);
6510 return return_in_memory_32 (type, mode);
6514 /* Return false iff TYPE is returned in memory. This version is used
6515 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6516 but differs notably in that when MMX is available, 8-byte vectors
6517 are returned in memory, rather than in MMX registers. */
6520 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6523 enum machine_mode mode = type_natural_mode (type, NULL);
6526 return return_in_memory_64 (type, mode);
6528 if (mode == BLKmode)
6531 size = int_size_in_bytes (type);
6533 if (VECTOR_MODE_P (mode))
6535 /* Return in memory only if MMX registers *are* available. This
6536 seems backwards, but it is consistent with the existing
6543 else if (mode == TImode)
6545 else if (mode == XFmode)
6551 /* When returning SSE vector types, we have a choice of either
6552 (1) being abi incompatible with a -march switch, or
6553 (2) generating an error.
6554 Given no good solution, I think the safest thing is one warning.
6555 The user won't be able to use -Werror, but....
6557 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6558 called in response to actually generating a caller or callee that
6559 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6560 via aggregate_value_p for general type probing from tree-ssa. */
6563 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6565 static bool warnedsse, warnedmmx;
6567 if (!TARGET_64BIT && type)
6569 /* Look at the return type of the function, not the function type. */
6570 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6572 if (!TARGET_SSE && !warnedsse)
6575 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6578 warning (0, "SSE vector return without SSE enabled "
6583 if (!TARGET_MMX && !warnedmmx)
6585 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6588 warning (0, "MMX vector return without MMX enabled "
6598 /* Create the va_list data type. */
6600 /* Returns the calling convention specific va_list date type.
6601 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6604 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6606 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6608 /* For i386 we use plain pointer to argument area. */
6609 if (!TARGET_64BIT || abi == MS_ABI)
6610 return build_pointer_type (char_type_node);
6612 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6613 type_decl = build_decl (BUILTINS_LOCATION,
6614 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6616 f_gpr = build_decl (BUILTINS_LOCATION,
6617 FIELD_DECL, get_identifier ("gp_offset"),
6618 unsigned_type_node);
6619 f_fpr = build_decl (BUILTINS_LOCATION,
6620 FIELD_DECL, get_identifier ("fp_offset"),
6621 unsigned_type_node);
6622 f_ovf = build_decl (BUILTINS_LOCATION,
6623 FIELD_DECL, get_identifier ("overflow_arg_area"),
6625 f_sav = build_decl (BUILTINS_LOCATION,
6626 FIELD_DECL, get_identifier ("reg_save_area"),
6629 va_list_gpr_counter_field = f_gpr;
6630 va_list_fpr_counter_field = f_fpr;
6632 DECL_FIELD_CONTEXT (f_gpr) = record;
6633 DECL_FIELD_CONTEXT (f_fpr) = record;
6634 DECL_FIELD_CONTEXT (f_ovf) = record;
6635 DECL_FIELD_CONTEXT (f_sav) = record;
6637 TREE_CHAIN (record) = type_decl;
6638 TYPE_NAME (record) = type_decl;
6639 TYPE_FIELDS (record) = f_gpr;
6640 TREE_CHAIN (f_gpr) = f_fpr;
6641 TREE_CHAIN (f_fpr) = f_ovf;
6642 TREE_CHAIN (f_ovf) = f_sav;
6644 layout_type (record);
6646 /* The correct type is an array type of one element. */
6647 return build_array_type (record, build_index_type (size_zero_node));
6650 /* Setup the builtin va_list data type and for 64-bit the additional
6651 calling convention specific va_list data types. */
6654 ix86_build_builtin_va_list (void)
6656 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6658 /* Initialize abi specific va_list builtin types. */
6662 if (ix86_abi == MS_ABI)
6664 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6665 if (TREE_CODE (t) != RECORD_TYPE)
6666 t = build_variant_type_copy (t);
6667 sysv_va_list_type_node = t;
6672 if (TREE_CODE (t) != RECORD_TYPE)
6673 t = build_variant_type_copy (t);
6674 sysv_va_list_type_node = t;
6676 if (ix86_abi != MS_ABI)
6678 t = ix86_build_builtin_va_list_abi (MS_ABI);
6679 if (TREE_CODE (t) != RECORD_TYPE)
6680 t = build_variant_type_copy (t);
6681 ms_va_list_type_node = t;
6686 if (TREE_CODE (t) != RECORD_TYPE)
6687 t = build_variant_type_copy (t);
6688 ms_va_list_type_node = t;
6695 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6698 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6707 int regparm = ix86_regparm;
6709 if (cum->call_abi != ix86_abi)
6710 regparm = (ix86_abi != SYSV_ABI
6711 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6713 /* GPR size of varargs save area. */
6714 if (cfun->va_list_gpr_size)
6715 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6717 ix86_varargs_gpr_size = 0;
6719 /* FPR size of varargs save area. We don't need it if we don't pass
6720 anything in SSE registers. */
6721 if (cum->sse_nregs && cfun->va_list_fpr_size)
6722 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6724 ix86_varargs_fpr_size = 0;
6726 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6729 save_area = frame_pointer_rtx;
6730 set = get_varargs_alias_set ();
6732 for (i = cum->regno;
6734 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6737 mem = gen_rtx_MEM (Pmode,
6738 plus_constant (save_area, i * UNITS_PER_WORD));
6739 MEM_NOTRAP_P (mem) = 1;
6740 set_mem_alias_set (mem, set);
6741 emit_move_insn (mem, gen_rtx_REG (Pmode,
6742 x86_64_int_parameter_registers[i]));
6745 if (ix86_varargs_fpr_size)
6747 /* Now emit code to save SSE registers. The AX parameter contains number
6748 of SSE parameter registers used to call this function. We use
6749 sse_prologue_save insn template that produces computed jump across
6750 SSE saves. We need some preparation work to get this working. */
6752 label = gen_label_rtx ();
6753 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6755 /* Compute address to jump to :
6756 label - eax*4 + nnamed_sse_arguments*4 Or
6757 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6758 tmp_reg = gen_reg_rtx (Pmode);
6759 nsse_reg = gen_reg_rtx (Pmode);
6760 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6761 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6762 gen_rtx_MULT (Pmode, nsse_reg,
6765 /* vmovaps is one byte longer than movaps. */
6767 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6768 gen_rtx_PLUS (Pmode, tmp_reg,
6774 gen_rtx_CONST (DImode,
6775 gen_rtx_PLUS (DImode,
6777 GEN_INT (cum->sse_regno
6778 * (TARGET_AVX ? 5 : 4)))));
6780 emit_move_insn (nsse_reg, label_ref);
6781 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6783 /* Compute address of memory block we save into. We always use pointer
6784 pointing 127 bytes after first byte to store - this is needed to keep
6785 instruction size limited by 4 bytes (5 bytes for AVX) with one
6786 byte displacement. */
6787 tmp_reg = gen_reg_rtx (Pmode);
6788 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6789 plus_constant (save_area,
6790 ix86_varargs_gpr_size + 127)));
6791 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6792 MEM_NOTRAP_P (mem) = 1;
6793 set_mem_alias_set (mem, set);
6794 set_mem_align (mem, BITS_PER_WORD);
6796 /* And finally do the dirty job! */
6797 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6798 GEN_INT (cum->sse_regno), label));
6803 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6805 alias_set_type set = get_varargs_alias_set ();
6808 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
6812 mem = gen_rtx_MEM (Pmode,
6813 plus_constant (virtual_incoming_args_rtx,
6814 i * UNITS_PER_WORD));
6815 MEM_NOTRAP_P (mem) = 1;
6816 set_mem_alias_set (mem, set);
6818 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6819 emit_move_insn (mem, reg);
6824 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6825 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6828 CUMULATIVE_ARGS next_cum;
6831 /* This argument doesn't appear to be used anymore. Which is good,
6832 because the old code here didn't suppress rtl generation. */
6833 gcc_assert (!no_rtl);
6838 fntype = TREE_TYPE (current_function_decl);
6840 /* For varargs, we do not want to skip the dummy va_dcl argument.
6841 For stdargs, we do want to skip the last named argument. */
6843 if (stdarg_p (fntype))
6844 function_arg_advance (&next_cum, mode, type, 1);
6846 if (cum->call_abi == MS_ABI)
6847 setup_incoming_varargs_ms_64 (&next_cum);
6849 setup_incoming_varargs_64 (&next_cum);
6852 /* Checks if TYPE is of kind va_list char *. */
6855 is_va_list_char_pointer (tree type)
6859 /* For 32-bit it is always true. */
6862 canonic = ix86_canonical_va_list_type (type);
6863 return (canonic == ms_va_list_type_node
6864 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6867 /* Implement va_start. */
6870 ix86_va_start (tree valist, rtx nextarg)
6872 HOST_WIDE_INT words, n_gpr, n_fpr;
6873 tree f_gpr, f_fpr, f_ovf, f_sav;
6874 tree gpr, fpr, ovf, sav, t;
6877 /* Only 64bit target needs something special. */
6878 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6880 std_expand_builtin_va_start (valist, nextarg);
6884 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6885 f_fpr = TREE_CHAIN (f_gpr);
6886 f_ovf = TREE_CHAIN (f_fpr);
6887 f_sav = TREE_CHAIN (f_ovf);
6889 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6890 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6891 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6892 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6893 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6895 /* Count number of gp and fp argument registers used. */
6896 words = crtl->args.info.words;
6897 n_gpr = crtl->args.info.regno;
6898 n_fpr = crtl->args.info.sse_regno;
6900 if (cfun->va_list_gpr_size)
6902 type = TREE_TYPE (gpr);
6903 t = build2 (MODIFY_EXPR, type,
6904 gpr, build_int_cst (type, n_gpr * 8));
6905 TREE_SIDE_EFFECTS (t) = 1;
6906 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6909 if (TARGET_SSE && cfun->va_list_fpr_size)
6911 type = TREE_TYPE (fpr);
6912 t = build2 (MODIFY_EXPR, type, fpr,
6913 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6914 TREE_SIDE_EFFECTS (t) = 1;
6915 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6918 /* Find the overflow area. */
6919 type = TREE_TYPE (ovf);
6920 t = make_tree (type, crtl->args.internal_arg_pointer);
6922 t = build2 (POINTER_PLUS_EXPR, type, t,
6923 size_int (words * UNITS_PER_WORD));
6924 t = build2 (MODIFY_EXPR, type, ovf, t);
6925 TREE_SIDE_EFFECTS (t) = 1;
6926 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6928 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6930 /* Find the register save area.
6931 Prologue of the function save it right above stack frame. */
6932 type = TREE_TYPE (sav);
6933 t = make_tree (type, frame_pointer_rtx);
6934 if (!ix86_varargs_gpr_size)
6935 t = build2 (POINTER_PLUS_EXPR, type, t,
6936 size_int (-8 * X86_64_REGPARM_MAX));
6937 t = build2 (MODIFY_EXPR, type, sav, t);
6938 TREE_SIDE_EFFECTS (t) = 1;
6939 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6943 /* Implement va_arg. */
6946 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6949 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6950 tree f_gpr, f_fpr, f_ovf, f_sav;
6951 tree gpr, fpr, ovf, sav, t;
6953 tree lab_false, lab_over = NULL_TREE;
6958 enum machine_mode nat_mode;
6961 /* Only 64bit target needs something special. */
6962 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6963 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6965 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6966 f_fpr = TREE_CHAIN (f_gpr);
6967 f_ovf = TREE_CHAIN (f_fpr);
6968 f_sav = TREE_CHAIN (f_ovf);
6970 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6971 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6972 valist = build_va_arg_indirect_ref (valist);
6973 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6974 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6975 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6977 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6979 type = build_pointer_type (type);
6980 size = int_size_in_bytes (type);
6981 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6983 nat_mode = type_natural_mode (type, NULL);
6992 /* Unnamed 256bit vector mode parameters are passed on stack. */
6993 if (ix86_cfun_abi () == SYSV_ABI)
7000 container = construct_container (nat_mode, TYPE_MODE (type),
7001 type, 0, X86_64_REGPARM_MAX,
7002 X86_64_SSE_REGPARM_MAX, intreg,
7007 /* Pull the value out of the saved registers. */
7009 addr = create_tmp_var (ptr_type_node, "addr");
7013 int needed_intregs, needed_sseregs;
7015 tree int_addr, sse_addr;
7017 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7018 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7020 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7022 need_temp = (!REG_P (container)
7023 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7024 || TYPE_ALIGN (type) > 128));
7026 /* In case we are passing structure, verify that it is consecutive block
7027 on the register save area. If not we need to do moves. */
7028 if (!need_temp && !REG_P (container))
7030 /* Verify that all registers are strictly consecutive */
7031 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7035 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7037 rtx slot = XVECEXP (container, 0, i);
7038 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7039 || INTVAL (XEXP (slot, 1)) != i * 16)
7047 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7049 rtx slot = XVECEXP (container, 0, i);
7050 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7051 || INTVAL (XEXP (slot, 1)) != i * 8)
7063 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7064 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7067 /* First ensure that we fit completely in registers. */
7070 t = build_int_cst (TREE_TYPE (gpr),
7071 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7072 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7073 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7074 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7075 gimplify_and_add (t, pre_p);
7079 t = build_int_cst (TREE_TYPE (fpr),
7080 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7081 + X86_64_REGPARM_MAX * 8);
7082 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7083 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7084 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7085 gimplify_and_add (t, pre_p);
7088 /* Compute index to start of area used for integer regs. */
7091 /* int_addr = gpr + sav; */
7092 t = fold_convert (sizetype, gpr);
7093 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7094 gimplify_assign (int_addr, t, pre_p);
7098 /* sse_addr = fpr + sav; */
7099 t = fold_convert (sizetype, fpr);
7100 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7101 gimplify_assign (sse_addr, t, pre_p);
7106 tree temp = create_tmp_var (type, "va_arg_tmp");
7109 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7110 gimplify_assign (addr, t, pre_p);
7112 for (i = 0; i < XVECLEN (container, 0); i++)
7114 rtx slot = XVECEXP (container, 0, i);
7115 rtx reg = XEXP (slot, 0);
7116 enum machine_mode mode = GET_MODE (reg);
7117 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
7118 tree addr_type = build_pointer_type (piece_type);
7119 tree daddr_type = build_pointer_type_for_mode (piece_type,
7123 tree dest_addr, dest;
7125 if (SSE_REGNO_P (REGNO (reg)))
7127 src_addr = sse_addr;
7128 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7132 src_addr = int_addr;
7133 src_offset = REGNO (reg) * 8;
7135 src_addr = fold_convert (addr_type, src_addr);
7136 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7137 size_int (src_offset));
7138 src = build_va_arg_indirect_ref (src_addr);
7140 dest_addr = fold_convert (daddr_type, addr);
7141 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7142 size_int (INTVAL (XEXP (slot, 1))));
7143 dest = build_va_arg_indirect_ref (dest_addr);
7145 gimplify_assign (dest, src, pre_p);
7151 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7152 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7153 gimplify_assign (gpr, t, pre_p);
7158 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7159 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7160 gimplify_assign (fpr, t, pre_p);
7163 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7165 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7168 /* ... otherwise out of the overflow area. */
7170 /* When we align parameter on stack for caller, if the parameter
7171 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7172 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7173 here with caller. */
7174 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7175 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7176 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7178 /* Care for on-stack alignment if needed. */
7179 if (arg_boundary <= 64
7180 || integer_zerop (TYPE_SIZE (type)))
7184 HOST_WIDE_INT align = arg_boundary / 8;
7185 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7186 size_int (align - 1));
7187 t = fold_convert (sizetype, t);
7188 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7190 t = fold_convert (TREE_TYPE (ovf), t);
7192 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7193 gimplify_assign (addr, t, pre_p);
7195 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7196 size_int (rsize * UNITS_PER_WORD));
7197 gimplify_assign (unshare_expr (ovf), t, pre_p);
7200 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7202 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7203 addr = fold_convert (ptrtype, addr);
7206 addr = build_va_arg_indirect_ref (addr);
7207 return build_va_arg_indirect_ref (addr);
7210 /* Return nonzero if OPNUM's MEM should be matched
7211 in movabs* patterns. */
7214 ix86_check_movabs (rtx insn, int opnum)
7218 set = PATTERN (insn);
7219 if (GET_CODE (set) == PARALLEL)
7220 set = XVECEXP (set, 0, 0);
7221 gcc_assert (GET_CODE (set) == SET);
7222 mem = XEXP (set, opnum);
7223 while (GET_CODE (mem) == SUBREG)
7224 mem = SUBREG_REG (mem);
7225 gcc_assert (MEM_P (mem));
7226 return (volatile_ok || !MEM_VOLATILE_P (mem));
7229 /* Initialize the table of extra 80387 mathematical constants. */
7232 init_ext_80387_constants (void)
7234 static const char * cst[5] =
7236 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7237 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7238 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7239 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7240 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7244 for (i = 0; i < 5; i++)
7246 real_from_string (&ext_80387_constants_table[i], cst[i]);
7247 /* Ensure each constant is rounded to XFmode precision. */
7248 real_convert (&ext_80387_constants_table[i],
7249 XFmode, &ext_80387_constants_table[i]);
7252 ext_80387_constants_init = 1;
7255 /* Return true if the constant is something that can be loaded with
7256 a special instruction. */
7259 standard_80387_constant_p (rtx x)
7261 enum machine_mode mode = GET_MODE (x);
7265 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7268 if (x == CONST0_RTX (mode))
7270 if (x == CONST1_RTX (mode))
7273 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7275 /* For XFmode constants, try to find a special 80387 instruction when
7276 optimizing for size or on those CPUs that benefit from them. */
7278 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7282 if (! ext_80387_constants_init)
7283 init_ext_80387_constants ();
7285 for (i = 0; i < 5; i++)
7286 if (real_identical (&r, &ext_80387_constants_table[i]))
7290 /* Load of the constant -0.0 or -1.0 will be split as
7291 fldz;fchs or fld1;fchs sequence. */
7292 if (real_isnegzero (&r))
7294 if (real_identical (&r, &dconstm1))
7300 /* Return the opcode of the special instruction to be used to load
7304 standard_80387_constant_opcode (rtx x)
7306 switch (standard_80387_constant_p (x))
7330 /* Return the CONST_DOUBLE representing the 80387 constant that is
7331 loaded by the specified special instruction. The argument IDX
7332 matches the return value from standard_80387_constant_p. */
7335 standard_80387_constant_rtx (int idx)
7339 if (! ext_80387_constants_init)
7340 init_ext_80387_constants ();
7356 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7360 /* Return 1 if X is all 0s and 2 if x is all 1s
7361 in supported SSE vector mode. */
7364 standard_sse_constant_p (rtx x)
7366 enum machine_mode mode = GET_MODE (x);
7368 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7370 if (vector_all_ones_operand (x, mode))
7386 /* Return the opcode of the special instruction to be used to load
7390 standard_sse_constant_opcode (rtx insn, rtx x)
7392 switch (standard_sse_constant_p (x))
7395 switch (get_attr_mode (insn))
7398 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7400 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7402 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7404 return "vxorps\t%x0, %x0, %x0";
7406 return "vxorpd\t%x0, %x0, %x0";
7408 return "vpxor\t%x0, %x0, %x0";
7413 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7420 /* Returns 1 if OP contains a symbol reference */
7423 symbolic_reference_mentioned_p (rtx op)
7428 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7431 fmt = GET_RTX_FORMAT (GET_CODE (op));
7432 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7438 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7439 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7443 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7450 /* Return 1 if it is appropriate to emit `ret' instructions in the
7451 body of a function. Do this only if the epilogue is simple, needing a
7452 couple of insns. Prior to reloading, we can't tell how many registers
7453 must be saved, so return 0 then. Return 0 if there is no frame
7454 marker to de-allocate. */
7457 ix86_can_use_return_insn_p (void)
7459 struct ix86_frame frame;
7461 if (! reload_completed || frame_pointer_needed)
7464 /* Don't allow more than 32 pop, since that's all we can do
7465 with one instruction. */
7466 if (crtl->args.pops_args
7467 && crtl->args.size >= 32768)
7470 ix86_compute_frame_layout (&frame);
7471 return frame.to_allocate == 0 && (frame.nregs + frame.nsseregs) == 0;
7474 /* Value should be nonzero if functions must have frame pointers.
7475 Zero means the frame pointer need not be set up (and parms may
7476 be accessed via the stack pointer) in functions that seem suitable. */
7479 ix86_frame_pointer_required (void)
7481 /* If we accessed previous frames, then the generated code expects
7482 to be able to access the saved ebp value in our frame. */
7483 if (cfun->machine->accesses_prev_frame)
7486 /* Several x86 os'es need a frame pointer for other reasons,
7487 usually pertaining to setjmp. */
7488 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7491 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7492 the frame pointer by default. Turn it back on now if we've not
7493 got a leaf function. */
7494 if (TARGET_OMIT_LEAF_FRAME_POINTER
7495 && (!current_function_is_leaf
7496 || ix86_current_function_calls_tls_descriptor))
7505 /* Record that the current function accesses previous call frames. */
7508 ix86_setup_frame_addresses (void)
7510 cfun->machine->accesses_prev_frame = 1;
7513 #ifndef USE_HIDDEN_LINKONCE
7514 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7515 # define USE_HIDDEN_LINKONCE 1
7517 # define USE_HIDDEN_LINKONCE 0
7521 static int pic_labels_used;
7523 /* Fills in the label name that should be used for a pc thunk for
7524 the given register. */
7527 get_pc_thunk_name (char name[32], unsigned int regno)
7529 gcc_assert (!TARGET_64BIT);
7531 if (USE_HIDDEN_LINKONCE)
7532 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7534 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7538 /* This function generates code for -fpic that loads %ebx with
7539 the return address of the caller and then returns. */
7542 ix86_file_end (void)
7547 for (regno = 0; regno < 8; ++regno)
7551 if (! ((pic_labels_used >> regno) & 1))
7554 get_pc_thunk_name (name, regno);
7559 switch_to_section (darwin_sections[text_coal_section]);
7560 fputs ("\t.weak_definition\t", asm_out_file);
7561 assemble_name (asm_out_file, name);
7562 fputs ("\n\t.private_extern\t", asm_out_file);
7563 assemble_name (asm_out_file, name);
7564 fputs ("\n", asm_out_file);
7565 ASM_OUTPUT_LABEL (asm_out_file, name);
7569 if (USE_HIDDEN_LINKONCE)
7573 decl = build_decl (BUILTINS_LOCATION,
7574 FUNCTION_DECL, get_identifier (name),
7576 TREE_PUBLIC (decl) = 1;
7577 TREE_STATIC (decl) = 1;
7578 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7580 (*targetm.asm_out.unique_section) (decl, 0);
7581 switch_to_section (get_named_section (decl, NULL, 0));
7583 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7584 fputs ("\t.hidden\t", asm_out_file);
7585 assemble_name (asm_out_file, name);
7586 putc ('\n', asm_out_file);
7587 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7591 switch_to_section (text_section);
7592 ASM_OUTPUT_LABEL (asm_out_file, name);
7595 xops[0] = gen_rtx_REG (Pmode, regno);
7596 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7597 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7598 output_asm_insn ("ret", xops);
7601 if (NEED_INDICATE_EXEC_STACK)
7602 file_end_indicate_exec_stack ();
7605 /* Emit code for the SET_GOT patterns. */
7608 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7614 if (TARGET_VXWORKS_RTP && flag_pic)
7616 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7617 xops[2] = gen_rtx_MEM (Pmode,
7618 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7619 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7621 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7622 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7623 an unadorned address. */
7624 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7625 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7626 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7630 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7632 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7634 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7637 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7639 output_asm_insn ("call\t%a2", xops);
7642 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7643 is what will be referenced by the Mach-O PIC subsystem. */
7645 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7648 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7649 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7652 output_asm_insn ("pop%z0\t%0", xops);
7657 get_pc_thunk_name (name, REGNO (dest));
7658 pic_labels_used |= 1 << REGNO (dest);
7660 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7661 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7662 output_asm_insn ("call\t%X2", xops);
7663 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7664 is what will be referenced by the Mach-O PIC subsystem. */
7667 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7669 targetm.asm_out.internal_label (asm_out_file, "L",
7670 CODE_LABEL_NUMBER (label));
7677 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7678 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7680 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7685 /* Generate an "push" pattern for input ARG. */
7690 if (ix86_cfa_state->reg == stack_pointer_rtx)
7691 ix86_cfa_state->offset += UNITS_PER_WORD;
7693 return gen_rtx_SET (VOIDmode,
7695 gen_rtx_PRE_DEC (Pmode,
7696 stack_pointer_rtx)),
7700 /* Return >= 0 if there is an unused call-clobbered register available
7701 for the entire function. */
7704 ix86_select_alt_pic_regnum (void)
7706 if (current_function_is_leaf && !crtl->profile
7707 && !ix86_current_function_calls_tls_descriptor)
7710 /* Can't use the same register for both PIC and DRAP. */
7712 drap = REGNO (crtl->drap_reg);
7715 for (i = 2; i >= 0; --i)
7716 if (i != drap && !df_regs_ever_live_p (i))
7720 return INVALID_REGNUM;
7723 /* Return 1 if we need to save REGNO. */
7725 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7727 if (pic_offset_table_rtx
7728 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7729 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7731 || crtl->calls_eh_return
7732 || crtl->uses_const_pool))
7734 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7739 if (crtl->calls_eh_return && maybe_eh_return)
7744 unsigned test = EH_RETURN_DATA_REGNO (i);
7745 if (test == INVALID_REGNUM)
7752 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
7755 return (df_regs_ever_live_p (regno)
7756 && !call_used_regs[regno]
7757 && !fixed_regs[regno]
7758 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7761 /* Return number of saved general prupose registers. */
7764 ix86_nsaved_regs (void)
7769 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7770 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7775 /* Return number of saved SSE registrers. */
7778 ix86_nsaved_sseregs (void)
7783 if (ix86_cfun_abi () != MS_ABI)
7785 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7786 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7791 /* Given FROM and TO register numbers, say whether this elimination is
7792 allowed. If stack alignment is needed, we can only replace argument
7793 pointer with hard frame pointer, or replace frame pointer with stack
7794 pointer. Otherwise, frame pointer elimination is automatically
7795 handled and all other eliminations are valid. */
7798 ix86_can_eliminate (int from, int to)
7800 if (stack_realign_fp)
7801 return ((from == ARG_POINTER_REGNUM
7802 && to == HARD_FRAME_POINTER_REGNUM)
7803 || (from == FRAME_POINTER_REGNUM
7804 && to == STACK_POINTER_REGNUM));
7806 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7809 /* Return the offset between two registers, one to be eliminated, and the other
7810 its replacement, at the start of a routine. */
7813 ix86_initial_elimination_offset (int from, int to)
7815 struct ix86_frame frame;
7816 ix86_compute_frame_layout (&frame);
7818 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7819 return frame.hard_frame_pointer_offset;
7820 else if (from == FRAME_POINTER_REGNUM
7821 && to == HARD_FRAME_POINTER_REGNUM)
7822 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7825 gcc_assert (to == STACK_POINTER_REGNUM);
7827 if (from == ARG_POINTER_REGNUM)
7828 return frame.stack_pointer_offset;
7830 gcc_assert (from == FRAME_POINTER_REGNUM);
7831 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7835 /* In a dynamically-aligned function, we can't know the offset from
7836 stack pointer to frame pointer, so we must ensure that setjmp
7837 eliminates fp against the hard fp (%ebp) rather than trying to
7838 index from %esp up to the top of the frame across a gap that is
7839 of unknown (at compile-time) size. */
7841 ix86_builtin_setjmp_frame_value (void)
7843 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7846 /* Fill structure ix86_frame about frame of currently computed function. */
7849 ix86_compute_frame_layout (struct ix86_frame *frame)
7851 HOST_WIDE_INT total_size;
7852 unsigned int stack_alignment_needed;
7853 HOST_WIDE_INT offset;
7854 unsigned int preferred_alignment;
7855 HOST_WIDE_INT size = get_frame_size ();
7857 frame->nregs = ix86_nsaved_regs ();
7858 frame->nsseregs = ix86_nsaved_sseregs ();
7861 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7862 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7864 /* MS ABI seem to require stack alignment to be always 16 except for function
7866 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7868 preferred_alignment = 16;
7869 stack_alignment_needed = 16;
7870 crtl->preferred_stack_boundary = 128;
7871 crtl->stack_alignment_needed = 128;
7874 gcc_assert (!size || stack_alignment_needed);
7875 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7876 gcc_assert (preferred_alignment <= stack_alignment_needed);
7878 /* During reload iteration the amount of registers saved can change.
7879 Recompute the value as needed. Do not recompute when amount of registers
7880 didn't change as reload does multiple calls to the function and does not
7881 expect the decision to change within single iteration. */
7882 if (!optimize_function_for_size_p (cfun)
7883 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7885 int count = frame->nregs;
7887 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7888 /* The fast prologue uses move instead of push to save registers. This
7889 is significantly longer, but also executes faster as modern hardware
7890 can execute the moves in parallel, but can't do that for push/pop.
7892 Be careful about choosing what prologue to emit: When function takes
7893 many instructions to execute we may use slow version as well as in
7894 case function is known to be outside hot spot (this is known with
7895 feedback only). Weight the size of function by number of registers
7896 to save as it is cheap to use one or two push instructions but very
7897 slow to use many of them. */
7899 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7900 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7901 || (flag_branch_probabilities
7902 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7903 cfun->machine->use_fast_prologue_epilogue = false;
7905 cfun->machine->use_fast_prologue_epilogue
7906 = !expensive_function_p (count);
7908 if (TARGET_PROLOGUE_USING_MOVE
7909 && cfun->machine->use_fast_prologue_epilogue)
7910 frame->save_regs_using_mov = true;
7912 frame->save_regs_using_mov = false;
7915 /* Skip return address and saved base pointer. */
7916 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7918 frame->hard_frame_pointer_offset = offset;
7920 /* Set offset to aligned because the realigned frame starts from
7922 if (stack_realign_fp)
7923 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7925 /* Register save area */
7926 offset += frame->nregs * UNITS_PER_WORD;
7928 /* Align SSE reg save area. */
7929 if (frame->nsseregs)
7930 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
7932 frame->padding0 = 0;
7934 /* SSE register save area. */
7935 offset += frame->padding0 + frame->nsseregs * 16;
7938 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7939 offset += frame->va_arg_size;
7941 /* Align start of frame for local function. */
7942 frame->padding1 = ((offset + stack_alignment_needed - 1)
7943 & -stack_alignment_needed) - offset;
7945 offset += frame->padding1;
7947 /* Frame pointer points here. */
7948 frame->frame_pointer_offset = offset;
7952 /* Add outgoing arguments area. Can be skipped if we eliminated
7953 all the function calls as dead code.
7954 Skipping is however impossible when function calls alloca. Alloca
7955 expander assumes that last crtl->outgoing_args_size
7956 of stack frame are unused. */
7957 if (ACCUMULATE_OUTGOING_ARGS
7958 && (!current_function_is_leaf || cfun->calls_alloca
7959 || ix86_current_function_calls_tls_descriptor))
7961 offset += crtl->outgoing_args_size;
7962 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7965 frame->outgoing_arguments_size = 0;
7967 /* Align stack boundary. Only needed if we're calling another function
7969 if (!current_function_is_leaf || cfun->calls_alloca
7970 || ix86_current_function_calls_tls_descriptor)
7971 frame->padding2 = ((offset + preferred_alignment - 1)
7972 & -preferred_alignment) - offset;
7974 frame->padding2 = 0;
7976 offset += frame->padding2;
7978 /* We've reached end of stack frame. */
7979 frame->stack_pointer_offset = offset;
7981 /* Size prologue needs to allocate. */
7982 frame->to_allocate =
7983 (size + frame->padding1 + frame->padding2
7984 + frame->outgoing_arguments_size + frame->va_arg_size);
7986 if ((!frame->to_allocate && frame->nregs <= 1)
7987 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
7988 frame->save_regs_using_mov = false;
7990 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
7991 && current_function_sp_is_unchanging
7992 && current_function_is_leaf
7993 && !ix86_current_function_calls_tls_descriptor)
7995 frame->red_zone_size = frame->to_allocate;
7996 if (frame->save_regs_using_mov)
7997 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
7998 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7999 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8002 frame->red_zone_size = 0;
8003 frame->to_allocate -= frame->red_zone_size;
8004 frame->stack_pointer_offset -= frame->red_zone_size;
8006 fprintf (stderr, "\n");
8007 fprintf (stderr, "size: %ld\n", (long)size);
8008 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
8009 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
8010 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
8011 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
8012 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
8013 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
8014 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
8015 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
8016 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
8017 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
8018 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
8019 (long)frame->hard_frame_pointer_offset);
8020 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
8021 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
8022 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
8023 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
8027 /* Emit code to save registers in the prologue. */
8030 ix86_emit_save_regs (void)
8035 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8036 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8038 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8039 RTX_FRAME_RELATED_P (insn) = 1;
8043 /* Emit code to save registers using MOV insns. First register
8044 is restored from POINTER + OFFSET. */
8046 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8051 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8052 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8054 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8056 gen_rtx_REG (Pmode, regno));
8057 RTX_FRAME_RELATED_P (insn) = 1;
8058 offset += UNITS_PER_WORD;
8062 /* Emit code to save registers using MOV insns. First register
8063 is restored from POINTER + OFFSET. */
8065 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8071 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8072 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8074 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8075 set_mem_align (mem, 128);
8076 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8077 RTX_FRAME_RELATED_P (insn) = 1;
8082 static GTY(()) rtx queued_cfa_restores;
8084 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8085 manipulation insn. Don't add it if the previously
8086 saved value will be left untouched within stack red-zone till return,
8087 as unwinders can find the same value in the register and
8091 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8094 && !TARGET_64BIT_MS_ABI
8095 && red_offset + RED_ZONE_SIZE >= 0
8096 && crtl->args.pops_args < 65536)
8101 add_reg_note (insn, REG_CFA_RESTORE, reg);
8102 RTX_FRAME_RELATED_P (insn) = 1;
8106 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8109 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8112 ix86_add_queued_cfa_restore_notes (rtx insn)
8115 if (!queued_cfa_restores)
8117 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8119 XEXP (last, 1) = REG_NOTES (insn);
8120 REG_NOTES (insn) = queued_cfa_restores;
8121 queued_cfa_restores = NULL_RTX;
8122 RTX_FRAME_RELATED_P (insn) = 1;
8125 /* Expand prologue or epilogue stack adjustment.
8126 The pattern exist to put a dependency on all ebp-based memory accesses.
8127 STYLE should be negative if instructions should be marked as frame related,
8128 zero if %r11 register is live and cannot be freely used and positive
8132 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8133 int style, bool set_cfa)
8138 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8139 else if (x86_64_immediate_operand (offset, DImode))
8140 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8144 /* r11 is used by indirect sibcall return as well, set before the
8145 epilogue and used after the epilogue. ATM indirect sibcall
8146 shouldn't be used together with huge frame sizes in one
8147 function because of the frame_size check in sibcall.c. */
8149 r11 = gen_rtx_REG (DImode, R11_REG);
8150 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8152 RTX_FRAME_RELATED_P (insn) = 1;
8153 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8158 ix86_add_queued_cfa_restore_notes (insn);
8164 gcc_assert (ix86_cfa_state->reg == src);
8165 ix86_cfa_state->offset += INTVAL (offset);
8166 ix86_cfa_state->reg = dest;
8168 r = gen_rtx_PLUS (Pmode, src, offset);
8169 r = gen_rtx_SET (VOIDmode, dest, r);
8170 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8171 RTX_FRAME_RELATED_P (insn) = 1;
8174 RTX_FRAME_RELATED_P (insn) = 1;
8177 /* Find an available register to be used as dynamic realign argument
8178 pointer regsiter. Such a register will be written in prologue and
8179 used in begin of body, so it must not be
8180 1. parameter passing register.
8182 We reuse static-chain register if it is available. Otherwise, we
8183 use DI for i386 and R13 for x86-64. We chose R13 since it has
8186 Return: the regno of chosen register. */
8189 find_drap_reg (void)
8191 tree decl = cfun->decl;
8195 /* Use R13 for nested function or function need static chain.
8196 Since function with tail call may use any caller-saved
8197 registers in epilogue, DRAP must not use caller-saved
8198 register in such case. */
8199 if ((decl_function_context (decl)
8200 && !DECL_NO_STATIC_CHAIN (decl))
8201 || crtl->tail_call_emit)
8208 /* Use DI for nested function or function need static chain.
8209 Since function with tail call may use any caller-saved
8210 registers in epilogue, DRAP must not use caller-saved
8211 register in such case. */
8212 if ((decl_function_context (decl)
8213 && !DECL_NO_STATIC_CHAIN (decl))
8214 || crtl->tail_call_emit)
8217 /* Reuse static chain register if it isn't used for parameter
8219 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8220 && !lookup_attribute ("fastcall",
8221 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8228 /* Update incoming stack boundary and estimated stack alignment. */
8231 ix86_update_stack_boundary (void)
8233 /* Prefer the one specified at command line. */
8234 ix86_incoming_stack_boundary
8235 = (ix86_user_incoming_stack_boundary
8236 ? ix86_user_incoming_stack_boundary
8237 : ix86_default_incoming_stack_boundary);
8239 /* Incoming stack alignment can be changed on individual functions
8240 via force_align_arg_pointer attribute. We use the smallest
8241 incoming stack boundary. */
8242 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8243 && lookup_attribute (ix86_force_align_arg_pointer_string,
8244 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8245 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8247 /* The incoming stack frame has to be aligned at least at
8248 parm_stack_boundary. */
8249 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8250 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8252 /* Stack at entrance of main is aligned by runtime. We use the
8253 smallest incoming stack boundary. */
8254 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8255 && DECL_NAME (current_function_decl)
8256 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8257 && DECL_FILE_SCOPE_P (current_function_decl))
8258 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8260 /* x86_64 vararg needs 16byte stack alignment for register save
8264 && crtl->stack_alignment_estimated < 128)
8265 crtl->stack_alignment_estimated = 128;
8268 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8269 needed or an rtx for DRAP otherwise. */
8272 ix86_get_drap_rtx (void)
8274 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8275 crtl->need_drap = true;
8277 if (stack_realign_drap)
8279 /* Assign DRAP to vDRAP and returns vDRAP */
8280 unsigned int regno = find_drap_reg ();
8285 arg_ptr = gen_rtx_REG (Pmode, regno);
8286 crtl->drap_reg = arg_ptr;
8289 drap_vreg = copy_to_reg (arg_ptr);
8293 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8294 RTX_FRAME_RELATED_P (insn) = 1;
8301 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8304 ix86_internal_arg_pointer (void)
8306 return virtual_incoming_args_rtx;
8309 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8310 to be generated in correct form. */
8312 ix86_finalize_stack_realign_flags (void)
8314 /* Check if stack realign is really needed after reload, and
8315 stores result in cfun */
8316 unsigned int incoming_stack_boundary
8317 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8318 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8319 unsigned int stack_realign = (incoming_stack_boundary
8320 < (current_function_is_leaf
8321 ? crtl->max_used_stack_slot_alignment
8322 : crtl->stack_alignment_needed));
8324 if (crtl->stack_realign_finalized)
8326 /* After stack_realign_needed is finalized, we can't no longer
8328 gcc_assert (crtl->stack_realign_needed == stack_realign);
8332 crtl->stack_realign_needed = stack_realign;
8333 crtl->stack_realign_finalized = true;
8337 /* Expand the prologue into a bunch of separate insns. */
8340 ix86_expand_prologue (void)
8344 struct ix86_frame frame;
8345 HOST_WIDE_INT allocate;
8347 ix86_finalize_stack_realign_flags ();
8349 /* DRAP should not coexist with stack_realign_fp */
8350 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8352 /* Initialize CFA state for before the prologue. */
8353 ix86_cfa_state->reg = stack_pointer_rtx;
8354 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
8356 ix86_compute_frame_layout (&frame);
8358 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8359 of DRAP is needed and stack realignment is really needed after reload */
8360 if (crtl->drap_reg && crtl->stack_realign_needed)
8363 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8364 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8365 ? 0 : UNITS_PER_WORD);
8367 gcc_assert (stack_realign_drap);
8369 /* Grab the argument pointer. */
8370 x = plus_constant (stack_pointer_rtx,
8371 (UNITS_PER_WORD + param_ptr_offset));
8374 /* Only need to push parameter pointer reg if it is caller
8376 if (!call_used_regs[REGNO (crtl->drap_reg)])
8378 /* Push arg pointer reg */
8379 insn = emit_insn (gen_push (y));
8380 RTX_FRAME_RELATED_P (insn) = 1;
8383 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8384 RTX_FRAME_RELATED_P (insn) = 1;
8385 ix86_cfa_state->reg = crtl->drap_reg;
8387 /* Align the stack. */
8388 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8390 GEN_INT (-align_bytes)));
8391 RTX_FRAME_RELATED_P (insn) = 1;
8393 /* Replicate the return address on the stack so that return
8394 address can be reached via (argp - 1) slot. This is needed
8395 to implement macro RETURN_ADDR_RTX and intrinsic function
8396 expand_builtin_return_addr etc. */
8398 x = gen_frame_mem (Pmode,
8399 plus_constant (x, -UNITS_PER_WORD));
8400 insn = emit_insn (gen_push (x));
8401 RTX_FRAME_RELATED_P (insn) = 1;
8404 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8405 slower on all targets. Also sdb doesn't like it. */
8407 if (frame_pointer_needed)
8409 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8410 RTX_FRAME_RELATED_P (insn) = 1;
8412 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8413 RTX_FRAME_RELATED_P (insn) = 1;
8415 if (ix86_cfa_state->reg == stack_pointer_rtx)
8416 ix86_cfa_state->reg = hard_frame_pointer_rtx;
8419 if (stack_realign_fp)
8421 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8422 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8424 /* Align the stack. */
8425 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8427 GEN_INT (-align_bytes)));
8428 RTX_FRAME_RELATED_P (insn) = 1;
8431 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8433 if (!frame.save_regs_using_mov)
8434 ix86_emit_save_regs ();
8436 allocate += frame.nregs * UNITS_PER_WORD;
8438 /* When using red zone we may start register saving before allocating
8439 the stack frame saving one cycle of the prologue. However I will
8440 avoid doing this if I am going to have to probe the stack since
8441 at least on x86_64 the stack probe can turn into a call that clobbers
8442 a red zone location */
8443 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8444 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8445 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8446 && !crtl->stack_realign_needed)
8447 ? hard_frame_pointer_rtx
8448 : stack_pointer_rtx,
8449 -frame.nregs * UNITS_PER_WORD);
8453 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8454 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8455 GEN_INT (-allocate), -1,
8456 ix86_cfa_state->reg == stack_pointer_rtx);
8459 /* Only valid for Win32. */
8460 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8464 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8466 if (cfun->machine->call_abi == MS_ABI)
8469 eax_live = ix86_eax_live_at_start_p ();
8473 emit_insn (gen_push (eax));
8474 allocate -= UNITS_PER_WORD;
8477 emit_move_insn (eax, GEN_INT (allocate));
8480 insn = gen_allocate_stack_worker_64 (eax, eax);
8482 insn = gen_allocate_stack_worker_32 (eax, eax);
8483 insn = emit_insn (insn);
8485 if (ix86_cfa_state->reg == stack_pointer_rtx)
8487 ix86_cfa_state->offset += allocate;
8488 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8489 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8490 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8491 RTX_FRAME_RELATED_P (insn) = 1;
8496 if (frame_pointer_needed)
8497 t = plus_constant (hard_frame_pointer_rtx,
8500 - frame.nregs * UNITS_PER_WORD);
8502 t = plus_constant (stack_pointer_rtx, allocate);
8503 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8507 if (frame.save_regs_using_mov
8508 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8509 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8511 if (!frame_pointer_needed
8512 || !frame.to_allocate
8513 || crtl->stack_realign_needed)
8514 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8516 + frame.nsseregs * 16 + frame.padding0);
8518 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8519 -frame.nregs * UNITS_PER_WORD);
8521 if (!frame_pointer_needed
8522 || !frame.to_allocate
8523 || crtl->stack_realign_needed)
8524 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8527 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8528 - frame.nregs * UNITS_PER_WORD
8529 - frame.nsseregs * 16
8532 pic_reg_used = false;
8533 if (pic_offset_table_rtx
8534 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8537 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8539 if (alt_pic_reg_used != INVALID_REGNUM)
8540 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8542 pic_reg_used = true;
8549 if (ix86_cmodel == CM_LARGE_PIC)
8551 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8552 rtx label = gen_label_rtx ();
8554 LABEL_PRESERVE_P (label) = 1;
8555 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8556 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8557 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8558 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8559 pic_offset_table_rtx, tmp_reg));
8562 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8565 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8568 /* In the pic_reg_used case, make sure that the got load isn't deleted
8569 when mcount needs it. Blockage to avoid call movement across mcount
8570 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8572 if (crtl->profile && pic_reg_used)
8573 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8575 if (crtl->drap_reg && !crtl->stack_realign_needed)
8577 /* vDRAP is setup but after reload it turns out stack realign
8578 isn't necessary, here we will emit prologue to setup DRAP
8579 without stack realign adjustment */
8580 int drap_bp_offset = UNITS_PER_WORD * 2;
8581 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8582 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8585 /* Prevent instructions from being scheduled into register save push
8586 sequence when access to the redzone area is done through frame pointer.
8587 The offset betweeh the frame pointer and the stack pointer is calculated
8588 relative to the value of the stack pointer at the end of the function
8589 prologue, and moving instructions that access redzone area via frame
8590 pointer inside push sequence violates this assumption. */
8591 if (frame_pointer_needed && frame.red_zone_size)
8592 emit_insn (gen_memory_blockage ());
8594 /* Emit cld instruction if stringops are used in the function. */
8595 if (TARGET_CLD && ix86_current_function_needs_cld)
8596 emit_insn (gen_cld ());
8599 /* Emit code to restore REG using a POP insn. */
8602 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
8604 rtx insn = emit_insn (ix86_gen_pop1 (reg));
8606 if (ix86_cfa_state->reg == crtl->drap_reg
8607 && REGNO (reg) == REGNO (crtl->drap_reg))
8609 /* Previously we'd represented the CFA as an expression
8610 like *(%ebp - 8). We've just popped that value from
8611 the stack, which means we need to reset the CFA to
8612 the drap register. This will remain until we restore
8613 the stack pointer. */
8614 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8615 RTX_FRAME_RELATED_P (insn) = 1;
8619 if (ix86_cfa_state->reg == stack_pointer_rtx)
8621 ix86_cfa_state->offset -= UNITS_PER_WORD;
8622 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8623 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
8624 RTX_FRAME_RELATED_P (insn) = 1;
8627 /* When the frame pointer is the CFA, and we pop it, we are
8628 swapping back to the stack pointer as the CFA. This happens
8629 for stack frames that don't allocate other data, so we assume
8630 the stack pointer is now pointing at the return address, i.e.
8631 the function entry state, which makes the offset be 1 word. */
8632 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
8633 && reg == hard_frame_pointer_rtx)
8635 ix86_cfa_state->reg = stack_pointer_rtx;
8636 ix86_cfa_state->offset = UNITS_PER_WORD;
8638 add_reg_note (insn, REG_CFA_DEF_CFA,
8639 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8640 GEN_INT (UNITS_PER_WORD)));
8641 RTX_FRAME_RELATED_P (insn) = 1;
8644 ix86_add_cfa_restore_note (insn, reg, red_offset);
8647 /* Emit code to restore saved registers using POP insns. */
8650 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
8654 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8655 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8657 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
8659 red_offset += UNITS_PER_WORD;
8663 /* Emit code and notes for the LEAVE instruction. */
8666 ix86_emit_leave (HOST_WIDE_INT red_offset)
8668 rtx insn = emit_insn (ix86_gen_leave ());
8670 ix86_add_queued_cfa_restore_notes (insn);
8672 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
8674 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8675 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
8676 RTX_FRAME_RELATED_P (insn) = 1;
8677 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
8681 /* Emit code to restore saved registers using MOV insns. First register
8682 is restored from POINTER + OFFSET. */
8684 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8685 HOST_WIDE_INT red_offset,
8686 int maybe_eh_return)
8689 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8692 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8693 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8695 rtx reg = gen_rtx_REG (Pmode, regno);
8697 /* Ensure that adjust_address won't be forced to produce pointer
8698 out of range allowed by x86-64 instruction set. */
8699 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8703 r11 = gen_rtx_REG (DImode, R11_REG);
8704 emit_move_insn (r11, GEN_INT (offset));
8705 emit_insn (gen_adddi3 (r11, r11, pointer));
8706 base_address = gen_rtx_MEM (Pmode, r11);
8709 insn = emit_move_insn (reg,
8710 adjust_address (base_address, Pmode, offset));
8711 offset += UNITS_PER_WORD;
8713 if (ix86_cfa_state->reg == crtl->drap_reg
8714 && regno == REGNO (crtl->drap_reg))
8716 /* Previously we'd represented the CFA as an expression
8717 like *(%ebp - 8). We've just popped that value from
8718 the stack, which means we need to reset the CFA to
8719 the drap register. This will remain until we restore
8720 the stack pointer. */
8721 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8722 RTX_FRAME_RELATED_P (insn) = 1;
8725 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8727 red_offset += UNITS_PER_WORD;
8731 /* Emit code to restore saved registers using MOV insns. First register
8732 is restored from POINTER + OFFSET. */
8734 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8735 HOST_WIDE_INT red_offset,
8736 int maybe_eh_return)
8739 rtx base_address = gen_rtx_MEM (TImode, pointer);
8742 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8743 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8745 rtx reg = gen_rtx_REG (TImode, regno);
8747 /* Ensure that adjust_address won't be forced to produce pointer
8748 out of range allowed by x86-64 instruction set. */
8749 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8753 r11 = gen_rtx_REG (DImode, R11_REG);
8754 emit_move_insn (r11, GEN_INT (offset));
8755 emit_insn (gen_adddi3 (r11, r11, pointer));
8756 base_address = gen_rtx_MEM (TImode, r11);
8759 mem = adjust_address (base_address, TImode, offset);
8760 set_mem_align (mem, 128);
8761 insn = emit_move_insn (reg, mem);
8764 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8770 /* Restore function stack, frame, and registers. */
8773 ix86_expand_epilogue (int style)
8776 struct ix86_frame frame;
8777 HOST_WIDE_INT offset, red_offset;
8778 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
8781 ix86_finalize_stack_realign_flags ();
8783 /* When stack is realigned, SP must be valid. */
8784 sp_valid = (!frame_pointer_needed
8785 || current_function_sp_is_unchanging
8786 || stack_realign_fp);
8788 ix86_compute_frame_layout (&frame);
8790 /* See the comment about red zone and frame
8791 pointer usage in ix86_expand_prologue. */
8792 if (frame_pointer_needed && frame.red_zone_size)
8793 emit_insn (gen_memory_blockage ());
8795 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8796 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
8798 /* Calculate start of saved registers relative to ebp. Special care
8799 must be taken for the normal return case of a function using
8800 eh_return: the eax and edx registers are marked as saved, but not
8801 restored along this path. */
8802 offset = frame.nregs;
8803 if (crtl->calls_eh_return && style != 2)
8805 offset *= -UNITS_PER_WORD;
8806 offset -= frame.nsseregs * 16 + frame.padding0;
8808 /* Calculate start of saved registers relative to esp on entry of the
8809 function. When realigning stack, this needs to be the most negative
8810 value possible at runtime. */
8811 red_offset = offset;
8813 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8815 else if (stack_realign_fp)
8816 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8818 if (frame_pointer_needed)
8819 red_offset -= UNITS_PER_WORD;
8821 /* If we're only restoring one register and sp is not valid then
8822 using a move instruction to restore the register since it's
8823 less work than reloading sp and popping the register.
8825 The default code result in stack adjustment using add/lea instruction,
8826 while this code results in LEAVE instruction (or discrete equivalent),
8827 so it is profitable in some other cases as well. Especially when there
8828 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8829 and there is exactly one register to pop. This heuristic may need some
8830 tuning in future. */
8831 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8832 || (TARGET_EPILOGUE_USING_MOVE
8833 && cfun->machine->use_fast_prologue_epilogue
8834 && ((frame.nregs + frame.nsseregs) > 1 || frame.to_allocate))
8835 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
8836 && frame.to_allocate)
8837 || (frame_pointer_needed && TARGET_USE_LEAVE
8838 && cfun->machine->use_fast_prologue_epilogue
8839 && (frame.nregs + frame.nsseregs) == 1)
8840 || crtl->calls_eh_return)
8842 /* Restore registers. We can use ebp or esp to address the memory
8843 locations. If both are available, default to ebp, since offsets
8844 are known to be small. Only exception is esp pointing directly
8845 to the end of block of saved registers, where we may simplify
8848 If we are realigning stack with bp and sp, regs restore can't
8849 be addressed by bp. sp must be used instead. */
8851 if (!frame_pointer_needed
8852 || (sp_valid && !frame.to_allocate)
8853 || stack_realign_fp)
8855 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8856 frame.to_allocate, red_offset,
8858 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8860 + frame.nsseregs * 16
8863 + frame.nsseregs * 16
8864 + frame.padding0, style == 2);
8868 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8871 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8873 + frame.nsseregs * 16
8876 + frame.nsseregs * 16
8877 + frame.padding0, style == 2);
8880 red_offset -= offset;
8882 /* eh_return epilogues need %ecx added to the stack pointer. */
8885 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8887 /* Stack align doesn't work with eh_return. */
8888 gcc_assert (!crtl->stack_realign_needed);
8890 if (frame_pointer_needed)
8892 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8893 tmp = plus_constant (tmp, UNITS_PER_WORD);
8894 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8896 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8897 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
8899 /* Note that we use SA as a temporary CFA, as the return
8900 address is at the proper place relative to it. We
8901 pretend this happens at the FP restore insn because
8902 prior to this insn the FP would be stored at the wrong
8903 offset relative to SA, and after this insn we have no
8904 other reasonable register to use for the CFA. We don't
8905 bother resetting the CFA to the SP for the duration of
8907 add_reg_note (tmp, REG_CFA_DEF_CFA,
8908 plus_constant (sa, UNITS_PER_WORD));
8909 ix86_add_queued_cfa_restore_notes (tmp);
8910 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
8911 RTX_FRAME_RELATED_P (tmp) = 1;
8912 ix86_cfa_state->reg = sa;
8913 ix86_cfa_state->offset = UNITS_PER_WORD;
8915 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8916 const0_rtx, style, false);
8920 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8921 tmp = plus_constant (tmp, (frame.to_allocate
8922 + frame.nregs * UNITS_PER_WORD
8923 + frame.nsseregs * 16
8925 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8926 ix86_add_queued_cfa_restore_notes (tmp);
8928 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
8929 if (ix86_cfa_state->offset != UNITS_PER_WORD)
8931 ix86_cfa_state->offset = UNITS_PER_WORD;
8932 add_reg_note (tmp, REG_CFA_DEF_CFA,
8933 plus_constant (stack_pointer_rtx,
8935 RTX_FRAME_RELATED_P (tmp) = 1;
8939 else if (!frame_pointer_needed)
8940 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8941 GEN_INT (frame.to_allocate
8942 + frame.nregs * UNITS_PER_WORD
8943 + frame.nsseregs * 16
8945 style, !using_drap);
8946 /* If not an i386, mov & pop is faster than "leave". */
8947 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8948 || !cfun->machine->use_fast_prologue_epilogue)
8949 ix86_emit_leave (red_offset);
8952 pro_epilogue_adjust_stack (stack_pointer_rtx,
8953 hard_frame_pointer_rtx,
8954 const0_rtx, style, !using_drap);
8956 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
8961 /* First step is to deallocate the stack frame so that we can
8964 If we realign stack with frame pointer, then stack pointer
8965 won't be able to recover via lea $offset(%bp), %sp, because
8966 there is a padding area between bp and sp for realign.
8967 "add $to_allocate, %sp" must be used instead. */
8970 gcc_assert (frame_pointer_needed);
8971 gcc_assert (!stack_realign_fp);
8972 pro_epilogue_adjust_stack (stack_pointer_rtx,
8973 hard_frame_pointer_rtx,
8974 GEN_INT (offset), style, false);
8975 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8976 frame.to_allocate, red_offset,
8978 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8979 GEN_INT (frame.nsseregs * 16),
8982 else if (frame.to_allocate || frame.nsseregs)
8984 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8985 frame.to_allocate, red_offset,
8987 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8988 GEN_INT (frame.to_allocate
8989 + frame.nsseregs * 16
8990 + frame.padding0), style,
8991 !using_drap && !frame_pointer_needed);
8994 ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
8996 red_offset -= offset;
8998 if (frame_pointer_needed)
9000 /* Leave results in shorter dependency chains on CPUs that are
9001 able to grok it fast. */
9002 if (TARGET_USE_LEAVE)
9003 ix86_emit_leave (red_offset);
9006 /* For stack realigned really happens, recover stack
9007 pointer to hard frame pointer is a must, if not using
9009 if (stack_realign_fp)
9010 pro_epilogue_adjust_stack (stack_pointer_rtx,
9011 hard_frame_pointer_rtx,
9012 const0_rtx, style, !using_drap);
9013 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
9021 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
9022 ? 0 : UNITS_PER_WORD);
9025 gcc_assert (stack_realign_drap);
9027 insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
9029 GEN_INT (-(UNITS_PER_WORD
9030 + param_ptr_offset))));
9032 ix86_cfa_state->reg = stack_pointer_rtx;
9033 ix86_cfa_state->offset = UNITS_PER_WORD + param_ptr_offset;
9035 add_reg_note (insn, REG_CFA_DEF_CFA,
9036 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
9037 GEN_INT (ix86_cfa_state->offset)));
9038 RTX_FRAME_RELATED_P (insn) = 1;
9040 if (param_ptr_offset)
9041 ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
9044 /* Sibcall epilogues don't want a return instruction. */
9047 *ix86_cfa_state = cfa_state_save;
9051 if (crtl->args.pops_args && crtl->args.size)
9053 rtx popc = GEN_INT (crtl->args.pops_args);
9055 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9056 address, do explicit add, and jump indirectly to the caller. */
9058 if (crtl->args.pops_args >= 65536)
9060 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9063 /* There is no "pascal" calling convention in any 64bit ABI. */
9064 gcc_assert (!TARGET_64BIT);
9066 insn = emit_insn (gen_popsi1 (ecx));
9067 ix86_cfa_state->offset -= UNITS_PER_WORD;
9069 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9070 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9071 add_reg_note (insn, REG_CFA_REGISTER,
9072 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
9073 RTX_FRAME_RELATED_P (insn) = 1;
9075 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9077 emit_jump_insn (gen_return_indirect_internal (ecx));
9080 emit_jump_insn (gen_return_pop_internal (popc));
9083 emit_jump_insn (gen_return_internal ());
9085 /* Restore the state back to the state from the prologue,
9086 so that it's correct for the next epilogue. */
9087 *ix86_cfa_state = cfa_state_save;
9090 /* Reset from the function's potential modifications. */
9093 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9094 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
9096 if (pic_offset_table_rtx)
9097 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9099 /* Mach-O doesn't support labels at the end of objects, so if
9100 it looks like we might want one, insert a NOP. */
9102 rtx insn = get_last_insn ();
9105 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9106 insn = PREV_INSN (insn);
9110 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
9111 fputs ("\tnop\n", file);
9117 /* Extract the parts of an RTL expression that is a valid memory address
9118 for an instruction. Return 0 if the structure of the address is
9119 grossly off. Return -1 if the address contains ASHIFT, so it is not
9120 strictly valid, but still used for computing length of lea instruction. */
9123 ix86_decompose_address (rtx addr, struct ix86_address *out)
9125 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9126 rtx base_reg, index_reg;
9127 HOST_WIDE_INT scale = 1;
9128 rtx scale_rtx = NULL_RTX;
9130 enum ix86_address_seg seg = SEG_DEFAULT;
9132 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
9134 else if (GET_CODE (addr) == PLUS)
9144 addends[n++] = XEXP (op, 1);
9147 while (GET_CODE (op) == PLUS);
9152 for (i = n; i >= 0; --i)
9155 switch (GET_CODE (op))
9160 index = XEXP (op, 0);
9161 scale_rtx = XEXP (op, 1);
9165 if (XINT (op, 1) == UNSPEC_TP
9166 && TARGET_TLS_DIRECT_SEG_REFS
9167 && seg == SEG_DEFAULT)
9168 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
9197 else if (GET_CODE (addr) == MULT)
9199 index = XEXP (addr, 0); /* index*scale */
9200 scale_rtx = XEXP (addr, 1);
9202 else if (GET_CODE (addr) == ASHIFT)
9206 /* We're called for lea too, which implements ashift on occasion. */
9207 index = XEXP (addr, 0);
9208 tmp = XEXP (addr, 1);
9209 if (!CONST_INT_P (tmp))
9211 scale = INTVAL (tmp);
9212 if ((unsigned HOST_WIDE_INT) scale > 3)
9218 disp = addr; /* displacement */
9220 /* Extract the integral value of scale. */
9223 if (!CONST_INT_P (scale_rtx))
9225 scale = INTVAL (scale_rtx);
9228 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
9229 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
9231 /* Avoid useless 0 displacement. */
9232 if (disp == const0_rtx && (base || index))
9235 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9236 if (base_reg && index_reg && scale == 1
9237 && (index_reg == arg_pointer_rtx
9238 || index_reg == frame_pointer_rtx
9239 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
9242 tmp = base, base = index, index = tmp;
9243 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
9246 /* Special case: %ebp cannot be encoded as a base without a displacement.
9250 && (base_reg == hard_frame_pointer_rtx
9251 || base_reg == frame_pointer_rtx
9252 || base_reg == arg_pointer_rtx
9253 || (REG_P (base_reg)
9254 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
9255 || REGNO (base_reg) == R13_REG))))
9258 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9259 Avoid this by transforming to [%esi+0].
9260 Reload calls address legitimization without cfun defined, so we need
9261 to test cfun for being non-NULL. */
9262 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9263 && base_reg && !index_reg && !disp
9265 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
9268 /* Special case: encode reg+reg instead of reg*2. */
9269 if (!base && index && scale == 2)
9270 base = index, base_reg = index_reg, scale = 1;
9272 /* Special case: scaling cannot be encoded without base or displacement. */
9273 if (!base && !disp && index && scale != 1)
9285 /* Return cost of the memory address x.
9286 For i386, it is better to use a complex address than let gcc copy
9287 the address into a reg and make a new pseudo. But not if the address
9288 requires to two regs - that would mean more pseudos with longer
9291 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
9293 struct ix86_address parts;
9295 int ok = ix86_decompose_address (x, &parts);
9299 if (parts.base && GET_CODE (parts.base) == SUBREG)
9300 parts.base = SUBREG_REG (parts.base);
9301 if (parts.index && GET_CODE (parts.index) == SUBREG)
9302 parts.index = SUBREG_REG (parts.index);
9304 /* Attempt to minimize number of registers in the address. */
9306 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9308 && (!REG_P (parts.index)
9309 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9313 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9315 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9316 && parts.base != parts.index)
9319 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9320 since it's predecode logic can't detect the length of instructions
9321 and it degenerates to vector decoded. Increase cost of such
9322 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9323 to split such addresses or even refuse such addresses at all.
9325 Following addressing modes are affected:
9330 The first and last case may be avoidable by explicitly coding the zero in
9331 memory address, but I don't have AMD-K6 machine handy to check this
9335 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9336 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9337 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9343 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9344 this is used for to form addresses to local data when -fPIC is in
9348 darwin_local_data_pic (rtx disp)
9350 return (GET_CODE (disp) == UNSPEC
9351 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9354 /* Determine if a given RTX is a valid constant. We already know this
9355 satisfies CONSTANT_P. */
9358 legitimate_constant_p (rtx x)
9360 switch (GET_CODE (x))
9365 if (GET_CODE (x) == PLUS)
9367 if (!CONST_INT_P (XEXP (x, 1)))
9372 if (TARGET_MACHO && darwin_local_data_pic (x))
9375 /* Only some unspecs are valid as "constants". */
9376 if (GET_CODE (x) == UNSPEC)
9377 switch (XINT (x, 1))
9382 return TARGET_64BIT;
9385 x = XVECEXP (x, 0, 0);
9386 return (GET_CODE (x) == SYMBOL_REF
9387 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9389 x = XVECEXP (x, 0, 0);
9390 return (GET_CODE (x) == SYMBOL_REF
9391 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9396 /* We must have drilled down to a symbol. */
9397 if (GET_CODE (x) == LABEL_REF)
9399 if (GET_CODE (x) != SYMBOL_REF)
9404 /* TLS symbols are never valid. */
9405 if (SYMBOL_REF_TLS_MODEL (x))
9408 /* DLLIMPORT symbols are never valid. */
9409 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9410 && SYMBOL_REF_DLLIMPORT_P (x))
9415 if (GET_MODE (x) == TImode
9416 && x != CONST0_RTX (TImode)
9422 if (!standard_sse_constant_p (x))
9429 /* Otherwise we handle everything else in the move patterns. */
9433 /* Determine if it's legal to put X into the constant pool. This
9434 is not possible for the address of thread-local symbols, which
9435 is checked above. */
9438 ix86_cannot_force_const_mem (rtx x)
9440 /* We can always put integral constants and vectors in memory. */
9441 switch (GET_CODE (x))
9451 return !legitimate_constant_p (x);
9455 /* Nonzero if the constant value X is a legitimate general operand
9456 when generating PIC code. It is given that flag_pic is on and
9457 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9460 legitimate_pic_operand_p (rtx x)
9464 switch (GET_CODE (x))
9467 inner = XEXP (x, 0);
9468 if (GET_CODE (inner) == PLUS
9469 && CONST_INT_P (XEXP (inner, 1)))
9470 inner = XEXP (inner, 0);
9472 /* Only some unspecs are valid as "constants". */
9473 if (GET_CODE (inner) == UNSPEC)
9474 switch (XINT (inner, 1))
9479 return TARGET_64BIT;
9481 x = XVECEXP (inner, 0, 0);
9482 return (GET_CODE (x) == SYMBOL_REF
9483 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9484 case UNSPEC_MACHOPIC_OFFSET:
9485 return legitimate_pic_address_disp_p (x);
9493 return legitimate_pic_address_disp_p (x);
9500 /* Determine if a given CONST RTX is a valid memory displacement
9504 legitimate_pic_address_disp_p (rtx disp)
9508 /* In 64bit mode we can allow direct addresses of symbols and labels
9509 when they are not dynamic symbols. */
9512 rtx op0 = disp, op1;
9514 switch (GET_CODE (disp))
9520 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9522 op0 = XEXP (XEXP (disp, 0), 0);
9523 op1 = XEXP (XEXP (disp, 0), 1);
9524 if (!CONST_INT_P (op1)
9525 || INTVAL (op1) >= 16*1024*1024
9526 || INTVAL (op1) < -16*1024*1024)
9528 if (GET_CODE (op0) == LABEL_REF)
9530 if (GET_CODE (op0) != SYMBOL_REF)
9535 /* TLS references should always be enclosed in UNSPEC. */
9536 if (SYMBOL_REF_TLS_MODEL (op0))
9538 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9539 && ix86_cmodel != CM_LARGE_PIC)
9547 if (GET_CODE (disp) != CONST)
9549 disp = XEXP (disp, 0);
9553 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9554 of GOT tables. We should not need these anyway. */
9555 if (GET_CODE (disp) != UNSPEC
9556 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9557 && XINT (disp, 1) != UNSPEC_GOTOFF
9558 && XINT (disp, 1) != UNSPEC_PLTOFF))
9561 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9562 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9568 if (GET_CODE (disp) == PLUS)
9570 if (!CONST_INT_P (XEXP (disp, 1)))
9572 disp = XEXP (disp, 0);
9576 if (TARGET_MACHO && darwin_local_data_pic (disp))
9579 if (GET_CODE (disp) != UNSPEC)
9582 switch (XINT (disp, 1))
9587 /* We need to check for both symbols and labels because VxWorks loads
9588 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9590 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9591 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9593 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9594 While ABI specify also 32bit relocation but we don't produce it in
9595 small PIC model at all. */
9596 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9597 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9599 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9601 case UNSPEC_GOTTPOFF:
9602 case UNSPEC_GOTNTPOFF:
9603 case UNSPEC_INDNTPOFF:
9606 disp = XVECEXP (disp, 0, 0);
9607 return (GET_CODE (disp) == SYMBOL_REF
9608 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9610 disp = XVECEXP (disp, 0, 0);
9611 return (GET_CODE (disp) == SYMBOL_REF
9612 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9614 disp = XVECEXP (disp, 0, 0);
9615 return (GET_CODE (disp) == SYMBOL_REF
9616 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9622 /* Recognizes RTL expressions that are valid memory addresses for an
9623 instruction. The MODE argument is the machine mode for the MEM
9624 expression that wants to use this address.
9626 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9627 convert common non-canonical forms to canonical form so that they will
9631 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9632 rtx addr, bool strict)
9634 struct ix86_address parts;
9635 rtx base, index, disp;
9636 HOST_WIDE_INT scale;
9637 const char *reason = NULL;
9638 rtx reason_rtx = NULL_RTX;
9640 if (ix86_decompose_address (addr, &parts) <= 0)
9642 reason = "decomposition failed";
9647 index = parts.index;
9649 scale = parts.scale;
9651 /* Validate base register.
9653 Don't allow SUBREG's that span more than a word here. It can lead to spill
9654 failures when the base is one word out of a two word structure, which is
9655 represented internally as a DImode int. */
9664 else if (GET_CODE (base) == SUBREG
9665 && REG_P (SUBREG_REG (base))
9666 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9668 reg = SUBREG_REG (base);
9671 reason = "base is not a register";
9675 if (GET_MODE (base) != Pmode)
9677 reason = "base is not in Pmode";
9681 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9682 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9684 reason = "base is not valid";
9689 /* Validate index register.
9691 Don't allow SUBREG's that span more than a word here -- same as above. */
9700 else if (GET_CODE (index) == SUBREG
9701 && REG_P (SUBREG_REG (index))
9702 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9704 reg = SUBREG_REG (index);
9707 reason = "index is not a register";
9711 if (GET_MODE (index) != Pmode)
9713 reason = "index is not in Pmode";
9717 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9718 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9720 reason = "index is not valid";
9725 /* Validate scale factor. */
9728 reason_rtx = GEN_INT (scale);
9731 reason = "scale without index";
9735 if (scale != 2 && scale != 4 && scale != 8)
9737 reason = "scale is not a valid multiplier";
9742 /* Validate displacement. */
9747 if (GET_CODE (disp) == CONST
9748 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9749 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9750 switch (XINT (XEXP (disp, 0), 1))
9752 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9753 used. While ABI specify also 32bit relocations, we don't produce
9754 them at all and use IP relative instead. */
9757 gcc_assert (flag_pic);
9759 goto is_legitimate_pic;
9760 reason = "64bit address unspec";
9763 case UNSPEC_GOTPCREL:
9764 gcc_assert (flag_pic);
9765 goto is_legitimate_pic;
9767 case UNSPEC_GOTTPOFF:
9768 case UNSPEC_GOTNTPOFF:
9769 case UNSPEC_INDNTPOFF:
9775 reason = "invalid address unspec";
9779 else if (SYMBOLIC_CONST (disp)
9783 && MACHOPIC_INDIRECT
9784 && !machopic_operand_p (disp)
9790 if (TARGET_64BIT && (index || base))
9792 /* foo@dtpoff(%rX) is ok. */
9793 if (GET_CODE (disp) != CONST
9794 || GET_CODE (XEXP (disp, 0)) != PLUS
9795 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9796 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9797 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9798 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9800 reason = "non-constant pic memory reference";
9804 else if (! legitimate_pic_address_disp_p (disp))
9806 reason = "displacement is an invalid pic construct";
9810 /* This code used to verify that a symbolic pic displacement
9811 includes the pic_offset_table_rtx register.
9813 While this is good idea, unfortunately these constructs may
9814 be created by "adds using lea" optimization for incorrect
9823 This code is nonsensical, but results in addressing
9824 GOT table with pic_offset_table_rtx base. We can't
9825 just refuse it easily, since it gets matched by
9826 "addsi3" pattern, that later gets split to lea in the
9827 case output register differs from input. While this
9828 can be handled by separate addsi pattern for this case
9829 that never results in lea, this seems to be easier and
9830 correct fix for crash to disable this test. */
9832 else if (GET_CODE (disp) != LABEL_REF
9833 && !CONST_INT_P (disp)
9834 && (GET_CODE (disp) != CONST
9835 || !legitimate_constant_p (disp))
9836 && (GET_CODE (disp) != SYMBOL_REF
9837 || !legitimate_constant_p (disp)))
9839 reason = "displacement is not constant";
9842 else if (TARGET_64BIT
9843 && !x86_64_immediate_operand (disp, VOIDmode))
9845 reason = "displacement is out of range";
9850 /* Everything looks valid. */
9857 /* Determine if a given RTX is a valid constant address. */
9860 constant_address_p (rtx x)
9862 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
9865 /* Return a unique alias set for the GOT. */
9867 static alias_set_type
9868 ix86_GOT_alias_set (void)
9870 static alias_set_type set = -1;
9872 set = new_alias_set ();
9876 /* Return a legitimate reference for ORIG (an address) using the
9877 register REG. If REG is 0, a new pseudo is generated.
9879 There are two types of references that must be handled:
9881 1. Global data references must load the address from the GOT, via
9882 the PIC reg. An insn is emitted to do this load, and the reg is
9885 2. Static data references, constant pool addresses, and code labels
9886 compute the address as an offset from the GOT, whose base is in
9887 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9888 differentiate them from global data objects. The returned
9889 address is the PIC reg + an unspec constant.
9891 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
9892 reg also appears in the address. */
9895 legitimize_pic_address (rtx orig, rtx reg)
9902 if (TARGET_MACHO && !TARGET_64BIT)
9905 reg = gen_reg_rtx (Pmode);
9906 /* Use the generic Mach-O PIC machinery. */
9907 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9911 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9913 else if (TARGET_64BIT
9914 && ix86_cmodel != CM_SMALL_PIC
9915 && gotoff_operand (addr, Pmode))
9918 /* This symbol may be referenced via a displacement from the PIC
9919 base address (@GOTOFF). */
9921 if (reload_in_progress)
9922 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9923 if (GET_CODE (addr) == CONST)
9924 addr = XEXP (addr, 0);
9925 if (GET_CODE (addr) == PLUS)
9927 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9929 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9932 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9933 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9935 tmpreg = gen_reg_rtx (Pmode);
9938 emit_move_insn (tmpreg, new_rtx);
9942 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9943 tmpreg, 1, OPTAB_DIRECT);
9946 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9948 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9950 /* This symbol may be referenced via a displacement from the PIC
9951 base address (@GOTOFF). */
9953 if (reload_in_progress)
9954 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9955 if (GET_CODE (addr) == CONST)
9956 addr = XEXP (addr, 0);
9957 if (GET_CODE (addr) == PLUS)
9959 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9961 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9964 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9965 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9966 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9970 emit_move_insn (reg, new_rtx);
9974 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9975 /* We can't use @GOTOFF for text labels on VxWorks;
9976 see gotoff_operand. */
9977 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9979 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9981 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9982 return legitimize_dllimport_symbol (addr, true);
9983 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9984 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9985 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9987 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9988 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9992 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9994 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9995 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9996 new_rtx = gen_const_mem (Pmode, new_rtx);
9997 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10000 reg = gen_reg_rtx (Pmode);
10001 /* Use directly gen_movsi, otherwise the address is loaded
10002 into register for CSE. We don't want to CSE this addresses,
10003 instead we CSE addresses from the GOT table, so skip this. */
10004 emit_insn (gen_movsi (reg, new_rtx));
10009 /* This symbol must be referenced via a load from the
10010 Global Offset Table (@GOT). */
10012 if (reload_in_progress)
10013 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10014 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10015 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10017 new_rtx = force_reg (Pmode, new_rtx);
10018 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10019 new_rtx = gen_const_mem (Pmode, new_rtx);
10020 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10023 reg = gen_reg_rtx (Pmode);
10024 emit_move_insn (reg, new_rtx);
10030 if (CONST_INT_P (addr)
10031 && !x86_64_immediate_operand (addr, VOIDmode))
10035 emit_move_insn (reg, addr);
10039 new_rtx = force_reg (Pmode, addr);
10041 else if (GET_CODE (addr) == CONST)
10043 addr = XEXP (addr, 0);
10045 /* We must match stuff we generate before. Assume the only
10046 unspecs that can get here are ours. Not that we could do
10047 anything with them anyway.... */
10048 if (GET_CODE (addr) == UNSPEC
10049 || (GET_CODE (addr) == PLUS
10050 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10052 gcc_assert (GET_CODE (addr) == PLUS);
10054 if (GET_CODE (addr) == PLUS)
10056 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10058 /* Check first to see if this is a constant offset from a @GOTOFF
10059 symbol reference. */
10060 if (gotoff_operand (op0, Pmode)
10061 && CONST_INT_P (op1))
10065 if (reload_in_progress)
10066 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10067 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10069 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10070 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10071 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10075 emit_move_insn (reg, new_rtx);
10081 if (INTVAL (op1) < -16*1024*1024
10082 || INTVAL (op1) >= 16*1024*1024)
10084 if (!x86_64_immediate_operand (op1, Pmode))
10085 op1 = force_reg (Pmode, op1);
10086 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10092 base = legitimize_pic_address (XEXP (addr, 0), reg);
10093 new_rtx = legitimize_pic_address (XEXP (addr, 1),
10094 base == reg ? NULL_RTX : reg);
10096 if (CONST_INT_P (new_rtx))
10097 new_rtx = plus_constant (base, INTVAL (new_rtx));
10100 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
10102 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
10103 new_rtx = XEXP (new_rtx, 1);
10105 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
10113 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10116 get_thread_pointer (int to_reg)
10120 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10124 reg = gen_reg_rtx (Pmode);
10125 insn = gen_rtx_SET (VOIDmode, reg, tp);
10126 insn = emit_insn (insn);
10131 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10132 false if we expect this to be used for a memory address and true if
10133 we expect to load the address into a register. */
10136 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
10138 rtx dest, base, off, pic, tp;
10143 case TLS_MODEL_GLOBAL_DYNAMIC:
10144 dest = gen_reg_rtx (Pmode);
10145 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10147 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10149 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
10152 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
10153 insns = get_insns ();
10156 RTL_CONST_CALL_P (insns) = 1;
10157 emit_libcall_block (insns, dest, rax, x);
10159 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10160 emit_insn (gen_tls_global_dynamic_64 (dest, x));
10162 emit_insn (gen_tls_global_dynamic_32 (dest, x));
10164 if (TARGET_GNU2_TLS)
10166 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
10168 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10172 case TLS_MODEL_LOCAL_DYNAMIC:
10173 base = gen_reg_rtx (Pmode);
10174 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10176 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10178 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
10181 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
10182 insns = get_insns ();
10185 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
10186 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
10187 RTL_CONST_CALL_P (insns) = 1;
10188 emit_libcall_block (insns, base, rax, note);
10190 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10191 emit_insn (gen_tls_local_dynamic_base_64 (base));
10193 emit_insn (gen_tls_local_dynamic_base_32 (base));
10195 if (TARGET_GNU2_TLS)
10197 rtx x = ix86_tls_module_base ();
10199 set_unique_reg_note (get_last_insn (), REG_EQUIV,
10200 gen_rtx_MINUS (Pmode, x, tp));
10203 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
10204 off = gen_rtx_CONST (Pmode, off);
10206 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
10208 if (TARGET_GNU2_TLS)
10210 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
10212 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10217 case TLS_MODEL_INITIAL_EXEC:
10221 type = UNSPEC_GOTNTPOFF;
10225 if (reload_in_progress)
10226 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10227 pic = pic_offset_table_rtx;
10228 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
10230 else if (!TARGET_ANY_GNU_TLS)
10232 pic = gen_reg_rtx (Pmode);
10233 emit_insn (gen_set_got (pic));
10234 type = UNSPEC_GOTTPOFF;
10239 type = UNSPEC_INDNTPOFF;
10242 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
10243 off = gen_rtx_CONST (Pmode, off);
10245 off = gen_rtx_PLUS (Pmode, pic, off);
10246 off = gen_const_mem (Pmode, off);
10247 set_mem_alias_set (off, ix86_GOT_alias_set ());
10249 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10251 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10252 off = force_reg (Pmode, off);
10253 return gen_rtx_PLUS (Pmode, base, off);
10257 base = get_thread_pointer (true);
10258 dest = gen_reg_rtx (Pmode);
10259 emit_insn (gen_subsi3 (dest, base, off));
10263 case TLS_MODEL_LOCAL_EXEC:
10264 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
10265 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10266 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
10267 off = gen_rtx_CONST (Pmode, off);
10269 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10271 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10272 return gen_rtx_PLUS (Pmode, base, off);
10276 base = get_thread_pointer (true);
10277 dest = gen_reg_rtx (Pmode);
10278 emit_insn (gen_subsi3 (dest, base, off));
10283 gcc_unreachable ();
10289 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10292 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10293 htab_t dllimport_map;
10296 get_dllimport_decl (tree decl)
10298 struct tree_map *h, in;
10301 const char *prefix;
10302 size_t namelen, prefixlen;
10307 if (!dllimport_map)
10308 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10310 in.hash = htab_hash_pointer (decl);
10311 in.base.from = decl;
10312 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10313 h = (struct tree_map *) *loc;
10317 *loc = h = GGC_NEW (struct tree_map);
10319 h->base.from = decl;
10320 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
10321 VAR_DECL, NULL, ptr_type_node);
10322 DECL_ARTIFICIAL (to) = 1;
10323 DECL_IGNORED_P (to) = 1;
10324 DECL_EXTERNAL (to) = 1;
10325 TREE_READONLY (to) = 1;
10327 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10328 name = targetm.strip_name_encoding (name);
10329 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10330 ? "*__imp_" : "*__imp__";
10331 namelen = strlen (name);
10332 prefixlen = strlen (prefix);
10333 imp_name = (char *) alloca (namelen + prefixlen + 1);
10334 memcpy (imp_name, prefix, prefixlen);
10335 memcpy (imp_name + prefixlen, name, namelen + 1);
10337 name = ggc_alloc_string (imp_name, namelen + prefixlen);
10338 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10339 SET_SYMBOL_REF_DECL (rtl, to);
10340 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10342 rtl = gen_const_mem (Pmode, rtl);
10343 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10345 SET_DECL_RTL (to, rtl);
10346 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10351 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10352 true if we require the result be a register. */
10355 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10360 gcc_assert (SYMBOL_REF_DECL (symbol));
10361 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10363 x = DECL_RTL (imp_decl);
10365 x = force_reg (Pmode, x);
10369 /* Try machine-dependent ways of modifying an illegitimate address
10370 to be legitimate. If we find one, return the new, valid address.
10371 This macro is used in only one place: `memory_address' in explow.c.
10373 OLDX is the address as it was before break_out_memory_refs was called.
10374 In some cases it is useful to look at this to decide what needs to be done.
10376 It is always safe for this macro to do nothing. It exists to recognize
10377 opportunities to optimize the output.
10379 For the 80386, we handle X+REG by loading X into a register R and
10380 using R+REG. R will go in a general reg and indexing will be used.
10381 However, if REG is a broken-out memory address or multiplication,
10382 nothing needs to be done because REG can certainly go in a general reg.
10384 When -fpic is used, special handling is needed for symbolic references.
10385 See comments by legitimize_pic_address in i386.c for details. */
10388 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10389 enum machine_mode mode)
10394 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10396 return legitimize_tls_address (x, (enum tls_model) log, false);
10397 if (GET_CODE (x) == CONST
10398 && GET_CODE (XEXP (x, 0)) == PLUS
10399 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10400 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10402 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10403 (enum tls_model) log, false);
10404 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10407 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10409 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10410 return legitimize_dllimport_symbol (x, true);
10411 if (GET_CODE (x) == CONST
10412 && GET_CODE (XEXP (x, 0)) == PLUS
10413 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10414 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10416 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10417 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10421 if (flag_pic && SYMBOLIC_CONST (x))
10422 return legitimize_pic_address (x, 0);
10424 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10425 if (GET_CODE (x) == ASHIFT
10426 && CONST_INT_P (XEXP (x, 1))
10427 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10430 log = INTVAL (XEXP (x, 1));
10431 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10432 GEN_INT (1 << log));
10435 if (GET_CODE (x) == PLUS)
10437 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10439 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10440 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10441 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10444 log = INTVAL (XEXP (XEXP (x, 0), 1));
10445 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10446 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10447 GEN_INT (1 << log));
10450 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10451 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10452 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10455 log = INTVAL (XEXP (XEXP (x, 1), 1));
10456 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10457 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10458 GEN_INT (1 << log));
10461 /* Put multiply first if it isn't already. */
10462 if (GET_CODE (XEXP (x, 1)) == MULT)
10464 rtx tmp = XEXP (x, 0);
10465 XEXP (x, 0) = XEXP (x, 1);
10470 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10471 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10472 created by virtual register instantiation, register elimination, and
10473 similar optimizations. */
10474 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10477 x = gen_rtx_PLUS (Pmode,
10478 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10479 XEXP (XEXP (x, 1), 0)),
10480 XEXP (XEXP (x, 1), 1));
10484 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10485 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10486 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10487 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10488 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10489 && CONSTANT_P (XEXP (x, 1)))
10492 rtx other = NULL_RTX;
10494 if (CONST_INT_P (XEXP (x, 1)))
10496 constant = XEXP (x, 1);
10497 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10499 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10501 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10502 other = XEXP (x, 1);
10510 x = gen_rtx_PLUS (Pmode,
10511 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10512 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10513 plus_constant (other, INTVAL (constant)));
10517 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10520 if (GET_CODE (XEXP (x, 0)) == MULT)
10523 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10526 if (GET_CODE (XEXP (x, 1)) == MULT)
10529 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10533 && REG_P (XEXP (x, 1))
10534 && REG_P (XEXP (x, 0)))
10537 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10540 x = legitimize_pic_address (x, 0);
10543 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10546 if (REG_P (XEXP (x, 0)))
10548 rtx temp = gen_reg_rtx (Pmode);
10549 rtx val = force_operand (XEXP (x, 1), temp);
10551 emit_move_insn (temp, val);
10553 XEXP (x, 1) = temp;
10557 else if (REG_P (XEXP (x, 1)))
10559 rtx temp = gen_reg_rtx (Pmode);
10560 rtx val = force_operand (XEXP (x, 0), temp);
10562 emit_move_insn (temp, val);
10564 XEXP (x, 0) = temp;
10572 /* Print an integer constant expression in assembler syntax. Addition
10573 and subtraction are the only arithmetic that may appear in these
10574 expressions. FILE is the stdio stream to write to, X is the rtx, and
10575 CODE is the operand print code from the output string. */
10578 output_pic_addr_const (FILE *file, rtx x, int code)
10582 switch (GET_CODE (x))
10585 gcc_assert (flag_pic);
10590 if (! TARGET_MACHO || TARGET_64BIT)
10591 output_addr_const (file, x);
10594 const char *name = XSTR (x, 0);
10596 /* Mark the decl as referenced so that cgraph will
10597 output the function. */
10598 if (SYMBOL_REF_DECL (x))
10599 mark_decl_referenced (SYMBOL_REF_DECL (x));
10602 if (MACHOPIC_INDIRECT
10603 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10604 name = machopic_indirection_name (x, /*stub_p=*/true);
10606 assemble_name (file, name);
10608 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10609 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10610 fputs ("@PLT", file);
10617 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10618 assemble_name (asm_out_file, buf);
10622 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10626 /* This used to output parentheses around the expression,
10627 but that does not work on the 386 (either ATT or BSD assembler). */
10628 output_pic_addr_const (file, XEXP (x, 0), code);
10632 if (GET_MODE (x) == VOIDmode)
10634 /* We can use %d if the number is <32 bits and positive. */
10635 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10636 fprintf (file, "0x%lx%08lx",
10637 (unsigned long) CONST_DOUBLE_HIGH (x),
10638 (unsigned long) CONST_DOUBLE_LOW (x));
10640 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10643 /* We can't handle floating point constants;
10644 PRINT_OPERAND must handle them. */
10645 output_operand_lossage ("floating constant misused");
10649 /* Some assemblers need integer constants to appear first. */
10650 if (CONST_INT_P (XEXP (x, 0)))
10652 output_pic_addr_const (file, XEXP (x, 0), code);
10654 output_pic_addr_const (file, XEXP (x, 1), code);
10658 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10659 output_pic_addr_const (file, XEXP (x, 1), code);
10661 output_pic_addr_const (file, XEXP (x, 0), code);
10667 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10668 output_pic_addr_const (file, XEXP (x, 0), code);
10670 output_pic_addr_const (file, XEXP (x, 1), code);
10672 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10676 gcc_assert (XVECLEN (x, 0) == 1);
10677 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10678 switch (XINT (x, 1))
10681 fputs ("@GOT", file);
10683 case UNSPEC_GOTOFF:
10684 fputs ("@GOTOFF", file);
10686 case UNSPEC_PLTOFF:
10687 fputs ("@PLTOFF", file);
10689 case UNSPEC_GOTPCREL:
10690 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10691 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10693 case UNSPEC_GOTTPOFF:
10694 /* FIXME: This might be @TPOFF in Sun ld too. */
10695 fputs ("@GOTTPOFF", file);
10698 fputs ("@TPOFF", file);
10700 case UNSPEC_NTPOFF:
10702 fputs ("@TPOFF", file);
10704 fputs ("@NTPOFF", file);
10706 case UNSPEC_DTPOFF:
10707 fputs ("@DTPOFF", file);
10709 case UNSPEC_GOTNTPOFF:
10711 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10712 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10714 fputs ("@GOTNTPOFF", file);
10716 case UNSPEC_INDNTPOFF:
10717 fputs ("@INDNTPOFF", file);
10720 case UNSPEC_MACHOPIC_OFFSET:
10722 machopic_output_function_base_name (file);
10726 output_operand_lossage ("invalid UNSPEC as operand");
10732 output_operand_lossage ("invalid expression as operand");
10736 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10737 We need to emit DTP-relative relocations. */
10739 static void ATTRIBUTE_UNUSED
10740 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10742 fputs (ASM_LONG, file);
10743 output_addr_const (file, x);
10744 fputs ("@DTPOFF", file);
10750 fputs (", 0", file);
10753 gcc_unreachable ();
10757 /* Return true if X is a representation of the PIC register. This copes
10758 with calls from ix86_find_base_term, where the register might have
10759 been replaced by a cselib value. */
10762 ix86_pic_register_p (rtx x)
10764 if (GET_CODE (x) == VALUE)
10765 return (pic_offset_table_rtx
10766 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10768 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10771 /* In the name of slightly smaller debug output, and to cater to
10772 general assembler lossage, recognize PIC+GOTOFF and turn it back
10773 into a direct symbol reference.
10775 On Darwin, this is necessary to avoid a crash, because Darwin
10776 has a different PIC label for each routine but the DWARF debugging
10777 information is not associated with any particular routine, so it's
10778 necessary to remove references to the PIC label from RTL stored by
10779 the DWARF output code. */
10782 ix86_delegitimize_address (rtx orig_x)
10785 /* reg_addend is NULL or a multiple of some register. */
10786 rtx reg_addend = NULL_RTX;
10787 /* const_addend is NULL or a const_int. */
10788 rtx const_addend = NULL_RTX;
10789 /* This is the result, or NULL. */
10790 rtx result = NULL_RTX;
10797 if (GET_CODE (x) != CONST
10798 || GET_CODE (XEXP (x, 0)) != UNSPEC
10799 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10800 || !MEM_P (orig_x))
10802 return XVECEXP (XEXP (x, 0), 0, 0);
10805 if (GET_CODE (x) != PLUS
10806 || GET_CODE (XEXP (x, 1)) != CONST)
10809 if (ix86_pic_register_p (XEXP (x, 0)))
10810 /* %ebx + GOT/GOTOFF */
10812 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10814 /* %ebx + %reg * scale + GOT/GOTOFF */
10815 reg_addend = XEXP (x, 0);
10816 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10817 reg_addend = XEXP (reg_addend, 1);
10818 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10819 reg_addend = XEXP (reg_addend, 0);
10822 if (!REG_P (reg_addend)
10823 && GET_CODE (reg_addend) != MULT
10824 && GET_CODE (reg_addend) != ASHIFT)
10830 x = XEXP (XEXP (x, 1), 0);
10831 if (GET_CODE (x) == PLUS
10832 && CONST_INT_P (XEXP (x, 1)))
10834 const_addend = XEXP (x, 1);
10838 if (GET_CODE (x) == UNSPEC
10839 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10840 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10841 result = XVECEXP (x, 0, 0);
10843 if (TARGET_MACHO && darwin_local_data_pic (x)
10844 && !MEM_P (orig_x))
10845 result = XVECEXP (x, 0, 0);
10851 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10853 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10857 /* If X is a machine specific address (i.e. a symbol or label being
10858 referenced as a displacement from the GOT implemented using an
10859 UNSPEC), then return the base term. Otherwise return X. */
10862 ix86_find_base_term (rtx x)
10868 if (GET_CODE (x) != CONST)
10870 term = XEXP (x, 0);
10871 if (GET_CODE (term) == PLUS
10872 && (CONST_INT_P (XEXP (term, 1))
10873 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10874 term = XEXP (term, 0);
10875 if (GET_CODE (term) != UNSPEC
10876 || XINT (term, 1) != UNSPEC_GOTPCREL)
10879 return XVECEXP (term, 0, 0);
10882 return ix86_delegitimize_address (x);
10886 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10887 int fp, FILE *file)
10889 const char *suffix;
10891 if (mode == CCFPmode || mode == CCFPUmode)
10893 code = ix86_fp_compare_code_to_integer (code);
10897 code = reverse_condition (code);
10948 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10952 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10953 Those same assemblers have the same but opposite lossage on cmov. */
10954 if (mode == CCmode)
10955 suffix = fp ? "nbe" : "a";
10956 else if (mode == CCCmode)
10959 gcc_unreachable ();
10975 gcc_unreachable ();
10979 gcc_assert (mode == CCmode || mode == CCCmode);
10996 gcc_unreachable ();
11000 /* ??? As above. */
11001 gcc_assert (mode == CCmode || mode == CCCmode);
11002 suffix = fp ? "nb" : "ae";
11005 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
11009 /* ??? As above. */
11010 if (mode == CCmode)
11012 else if (mode == CCCmode)
11013 suffix = fp ? "nb" : "ae";
11015 gcc_unreachable ();
11018 suffix = fp ? "u" : "p";
11021 suffix = fp ? "nu" : "np";
11024 gcc_unreachable ();
11026 fputs (suffix, file);
11029 /* Print the name of register X to FILE based on its machine mode and number.
11030 If CODE is 'w', pretend the mode is HImode.
11031 If CODE is 'b', pretend the mode is QImode.
11032 If CODE is 'k', pretend the mode is SImode.
11033 If CODE is 'q', pretend the mode is DImode.
11034 If CODE is 'x', pretend the mode is V4SFmode.
11035 If CODE is 't', pretend the mode is V8SFmode.
11036 If CODE is 'h', pretend the reg is the 'high' byte register.
11037 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
11038 If CODE is 'd', duplicate the operand for AVX instruction.
11042 print_reg (rtx x, int code, FILE *file)
11045 bool duplicated = code == 'd' && TARGET_AVX;
11047 gcc_assert (x == pc_rtx
11048 || (REGNO (x) != ARG_POINTER_REGNUM
11049 && REGNO (x) != FRAME_POINTER_REGNUM
11050 && REGNO (x) != FLAGS_REG
11051 && REGNO (x) != FPSR_REG
11052 && REGNO (x) != FPCR_REG));
11054 if (ASSEMBLER_DIALECT == ASM_ATT)
11059 gcc_assert (TARGET_64BIT);
11060 fputs ("rip", file);
11064 if (code == 'w' || MMX_REG_P (x))
11066 else if (code == 'b')
11068 else if (code == 'k')
11070 else if (code == 'q')
11072 else if (code == 'y')
11074 else if (code == 'h')
11076 else if (code == 'x')
11078 else if (code == 't')
11081 code = GET_MODE_SIZE (GET_MODE (x));
11083 /* Irritatingly, AMD extended registers use different naming convention
11084 from the normal registers. */
11085 if (REX_INT_REG_P (x))
11087 gcc_assert (TARGET_64BIT);
11091 error ("extended registers have no high halves");
11094 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
11097 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
11100 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
11103 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
11106 error ("unsupported operand size for extended register");
11116 if (STACK_TOP_P (x))
11125 if (! ANY_FP_REG_P (x))
11126 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
11131 reg = hi_reg_name[REGNO (x)];
11134 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
11136 reg = qi_reg_name[REGNO (x)];
11139 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
11141 reg = qi_high_reg_name[REGNO (x)];
11146 gcc_assert (!duplicated);
11148 fputs (hi_reg_name[REGNO (x)] + 1, file);
11153 gcc_unreachable ();
11159 if (ASSEMBLER_DIALECT == ASM_ATT)
11160 fprintf (file, ", %%%s", reg);
11162 fprintf (file, ", %s", reg);
11166 /* Locate some local-dynamic symbol still in use by this function
11167 so that we can print its name in some tls_local_dynamic_base
11171 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11175 if (GET_CODE (x) == SYMBOL_REF
11176 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11178 cfun->machine->some_ld_name = XSTR (x, 0);
11185 static const char *
11186 get_some_local_dynamic_name (void)
11190 if (cfun->machine->some_ld_name)
11191 return cfun->machine->some_ld_name;
11193 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11195 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11196 return cfun->machine->some_ld_name;
11198 gcc_unreachable ();
11201 /* Meaning of CODE:
11202 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
11203 C -- print opcode suffix for set/cmov insn.
11204 c -- like C, but print reversed condition
11205 E,e -- likewise, but for compare-and-branch fused insn.
11206 F,f -- likewise, but for floating-point.
11207 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
11209 R -- print the prefix for register names.
11210 z -- print the opcode suffix for the size of the current operand.
11211 Z -- likewise, with special suffixes for x87 instructions.
11212 * -- print a star (in certain assembler syntax)
11213 A -- print an absolute memory reference.
11214 w -- print the operand as if it's a "word" (HImode) even if it isn't.
11215 s -- print a shift double count, followed by the assemblers argument
11217 b -- print the QImode name of the register for the indicated operand.
11218 %b0 would print %al if operands[0] is reg 0.
11219 w -- likewise, print the HImode name of the register.
11220 k -- likewise, print the SImode name of the register.
11221 q -- likewise, print the DImode name of the register.
11222 x -- likewise, print the V4SFmode name of the register.
11223 t -- likewise, print the V8SFmode name of the register.
11224 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
11225 y -- print "st(0)" instead of "st" as a register.
11226 d -- print duplicated register operand for AVX instruction.
11227 D -- print condition for SSE cmp instruction.
11228 P -- if PIC, print an @PLT suffix.
11229 X -- don't print any sort of PIC '@' suffix for a symbol.
11230 & -- print some in-use local-dynamic symbol name.
11231 H -- print a memory address offset by 8; used for sse high-parts
11232 Y -- print condition for SSE5 com* instruction.
11233 + -- print a branch hint as 'cs' or 'ds' prefix
11234 ; -- print a semicolon (after prefixes due to bug in older gas).
11238 print_operand (FILE *file, rtx x, int code)
11245 if (ASSEMBLER_DIALECT == ASM_ATT)
11250 assemble_name (file, get_some_local_dynamic_name ());
11254 switch (ASSEMBLER_DIALECT)
11261 /* Intel syntax. For absolute addresses, registers should not
11262 be surrounded by braces. */
11266 PRINT_OPERAND (file, x, 0);
11273 gcc_unreachable ();
11276 PRINT_OPERAND (file, x, 0);
11281 if (ASSEMBLER_DIALECT == ASM_ATT)
11286 if (ASSEMBLER_DIALECT == ASM_ATT)
11291 if (ASSEMBLER_DIALECT == ASM_ATT)
11296 if (ASSEMBLER_DIALECT == ASM_ATT)
11301 if (ASSEMBLER_DIALECT == ASM_ATT)
11306 if (ASSEMBLER_DIALECT == ASM_ATT)
11311 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11313 /* Opcodes don't get size suffixes if using Intel opcodes. */
11314 if (ASSEMBLER_DIALECT == ASM_INTEL)
11317 switch (GET_MODE_SIZE (GET_MODE (x)))
11336 output_operand_lossage
11337 ("invalid operand size for operand code '%c'", code);
11342 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11344 (0, "non-integer operand used with operand code '%c'", code);
11348 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
11349 if (ASSEMBLER_DIALECT == ASM_INTEL)
11352 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11354 switch (GET_MODE_SIZE (GET_MODE (x)))
11357 #ifdef HAVE_AS_IX86_FILDS
11367 #ifdef HAVE_AS_IX86_FILDQ
11370 fputs ("ll", file);
11378 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11380 /* 387 opcodes don't get size suffixes
11381 if the operands are registers. */
11382 if (STACK_REG_P (x))
11385 switch (GET_MODE_SIZE (GET_MODE (x)))
11406 output_operand_lossage
11407 ("invalid operand type used with operand code '%c'", code);
11411 output_operand_lossage
11412 ("invalid operand size for operand code '%c'", code);
11429 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11431 PRINT_OPERAND (file, x, 0);
11432 fputs (", ", file);
11437 /* Little bit of braindamage here. The SSE compare instructions
11438 does use completely different names for the comparisons that the
11439 fp conditional moves. */
11442 switch (GET_CODE (x))
11445 fputs ("eq", file);
11448 fputs ("eq_us", file);
11451 fputs ("lt", file);
11454 fputs ("nge", file);
11457 fputs ("le", file);
11460 fputs ("ngt", file);
11463 fputs ("unord", file);
11466 fputs ("neq", file);
11469 fputs ("neq_oq", file);
11472 fputs ("ge", file);
11475 fputs ("nlt", file);
11478 fputs ("gt", file);
11481 fputs ("nle", file);
11484 fputs ("ord", file);
11487 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11493 switch (GET_CODE (x))
11497 fputs ("eq", file);
11501 fputs ("lt", file);
11505 fputs ("le", file);
11508 fputs ("unord", file);
11512 fputs ("neq", file);
11516 fputs ("nlt", file);
11520 fputs ("nle", file);
11523 fputs ("ord", file);
11526 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11532 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11533 if (ASSEMBLER_DIALECT == ASM_ATT)
11535 switch (GET_MODE (x))
11537 case HImode: putc ('w', file); break;
11539 case SFmode: putc ('l', file); break;
11541 case DFmode: putc ('q', file); break;
11542 default: gcc_unreachable ();
11549 if (!COMPARISON_P (x))
11551 output_operand_lossage ("operand is neither a constant nor a "
11552 "condition code, invalid operand code "
11556 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11559 if (!COMPARISON_P (x))
11561 output_operand_lossage ("operand is neither a constant nor a "
11562 "condition code, invalid operand code "
11566 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11567 if (ASSEMBLER_DIALECT == ASM_ATT)
11570 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11573 /* Like above, but reverse condition */
11575 /* Check to see if argument to %c is really a constant
11576 and not a condition code which needs to be reversed. */
11577 if (!COMPARISON_P (x))
11579 output_operand_lossage ("operand is neither a constant nor a "
11580 "condition code, invalid operand "
11584 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11587 if (!COMPARISON_P (x))
11589 output_operand_lossage ("operand is neither a constant nor a "
11590 "condition code, invalid operand "
11594 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11595 if (ASSEMBLER_DIALECT == ASM_ATT)
11598 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11602 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11606 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11610 /* It doesn't actually matter what mode we use here, as we're
11611 only going to use this for printing. */
11612 x = adjust_address_nv (x, DImode, 8);
11620 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11623 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11626 int pred_val = INTVAL (XEXP (x, 0));
11628 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11629 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11631 int taken = pred_val > REG_BR_PROB_BASE / 2;
11632 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11634 /* Emit hints only in the case default branch prediction
11635 heuristics would fail. */
11636 if (taken != cputaken)
11638 /* We use 3e (DS) prefix for taken branches and
11639 2e (CS) prefix for not taken branches. */
11641 fputs ("ds ; ", file);
11643 fputs ("cs ; ", file);
11651 switch (GET_CODE (x))
11654 fputs ("neq", file);
11657 fputs ("eq", file);
11661 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11665 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11669 fputs ("le", file);
11673 fputs ("lt", file);
11676 fputs ("unord", file);
11679 fputs ("ord", file);
11682 fputs ("ueq", file);
11685 fputs ("nlt", file);
11688 fputs ("nle", file);
11691 fputs ("ule", file);
11694 fputs ("ult", file);
11697 fputs ("une", file);
11700 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11707 fputs (" ; ", file);
11714 output_operand_lossage ("invalid operand code '%c'", code);
11719 print_reg (x, code, file);
11721 else if (MEM_P (x))
11723 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11724 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11725 && GET_MODE (x) != BLKmode)
11728 switch (GET_MODE_SIZE (GET_MODE (x)))
11730 case 1: size = "BYTE"; break;
11731 case 2: size = "WORD"; break;
11732 case 4: size = "DWORD"; break;
11733 case 8: size = "QWORD"; break;
11734 case 12: size = "XWORD"; break;
11736 if (GET_MODE (x) == XFmode)
11742 gcc_unreachable ();
11745 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11748 else if (code == 'w')
11750 else if (code == 'k')
11753 fputs (size, file);
11754 fputs (" PTR ", file);
11758 /* Avoid (%rip) for call operands. */
11759 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11760 && !CONST_INT_P (x))
11761 output_addr_const (file, x);
11762 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11763 output_operand_lossage ("invalid constraints for operand");
11765 output_address (x);
11768 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11773 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11774 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11776 if (ASSEMBLER_DIALECT == ASM_ATT)
11778 fprintf (file, "0x%08lx", (long unsigned int) l);
11781 /* These float cases don't actually occur as immediate operands. */
11782 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11786 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11787 fputs (dstr, file);
11790 else if (GET_CODE (x) == CONST_DOUBLE
11791 && GET_MODE (x) == XFmode)
11795 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11796 fputs (dstr, file);
11801 /* We have patterns that allow zero sets of memory, for instance.
11802 In 64-bit mode, we should probably support all 8-byte vectors,
11803 since we can in fact encode that into an immediate. */
11804 if (GET_CODE (x) == CONST_VECTOR)
11806 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11812 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11814 if (ASSEMBLER_DIALECT == ASM_ATT)
11817 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11818 || GET_CODE (x) == LABEL_REF)
11820 if (ASSEMBLER_DIALECT == ASM_ATT)
11823 fputs ("OFFSET FLAT:", file);
11826 if (CONST_INT_P (x))
11827 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11829 output_pic_addr_const (file, x, code);
11831 output_addr_const (file, x);
11835 /* Print a memory operand whose address is ADDR. */
11838 print_operand_address (FILE *file, rtx addr)
11840 struct ix86_address parts;
11841 rtx base, index, disp;
11843 int ok = ix86_decompose_address (addr, &parts);
11848 index = parts.index;
11850 scale = parts.scale;
11858 if (ASSEMBLER_DIALECT == ASM_ATT)
11860 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11863 gcc_unreachable ();
11866 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11867 if (TARGET_64BIT && !base && !index)
11871 if (GET_CODE (disp) == CONST
11872 && GET_CODE (XEXP (disp, 0)) == PLUS
11873 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11874 symbol = XEXP (XEXP (disp, 0), 0);
11876 if (GET_CODE (symbol) == LABEL_REF
11877 || (GET_CODE (symbol) == SYMBOL_REF
11878 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11881 if (!base && !index)
11883 /* Displacement only requires special attention. */
11885 if (CONST_INT_P (disp))
11887 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11888 fputs ("ds:", file);
11889 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11892 output_pic_addr_const (file, disp, 0);
11894 output_addr_const (file, disp);
11898 if (ASSEMBLER_DIALECT == ASM_ATT)
11903 output_pic_addr_const (file, disp, 0);
11904 else if (GET_CODE (disp) == LABEL_REF)
11905 output_asm_label (disp);
11907 output_addr_const (file, disp);
11912 print_reg (base, 0, file);
11916 print_reg (index, 0, file);
11918 fprintf (file, ",%d", scale);
11924 rtx offset = NULL_RTX;
11928 /* Pull out the offset of a symbol; print any symbol itself. */
11929 if (GET_CODE (disp) == CONST
11930 && GET_CODE (XEXP (disp, 0)) == PLUS
11931 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11933 offset = XEXP (XEXP (disp, 0), 1);
11934 disp = gen_rtx_CONST (VOIDmode,
11935 XEXP (XEXP (disp, 0), 0));
11939 output_pic_addr_const (file, disp, 0);
11940 else if (GET_CODE (disp) == LABEL_REF)
11941 output_asm_label (disp);
11942 else if (CONST_INT_P (disp))
11945 output_addr_const (file, disp);
11951 print_reg (base, 0, file);
11954 if (INTVAL (offset) >= 0)
11956 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11960 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11967 print_reg (index, 0, file);
11969 fprintf (file, "*%d", scale);
11977 output_addr_const_extra (FILE *file, rtx x)
11981 if (GET_CODE (x) != UNSPEC)
11984 op = XVECEXP (x, 0, 0);
11985 switch (XINT (x, 1))
11987 case UNSPEC_GOTTPOFF:
11988 output_addr_const (file, op);
11989 /* FIXME: This might be @TPOFF in Sun ld. */
11990 fputs ("@GOTTPOFF", file);
11993 output_addr_const (file, op);
11994 fputs ("@TPOFF", file);
11996 case UNSPEC_NTPOFF:
11997 output_addr_const (file, op);
11999 fputs ("@TPOFF", file);
12001 fputs ("@NTPOFF", file);
12003 case UNSPEC_DTPOFF:
12004 output_addr_const (file, op);
12005 fputs ("@DTPOFF", file);
12007 case UNSPEC_GOTNTPOFF:
12008 output_addr_const (file, op);
12010 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12011 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
12013 fputs ("@GOTNTPOFF", file);
12015 case UNSPEC_INDNTPOFF:
12016 output_addr_const (file, op);
12017 fputs ("@INDNTPOFF", file);
12020 case UNSPEC_MACHOPIC_OFFSET:
12021 output_addr_const (file, op);
12023 machopic_output_function_base_name (file);
12034 /* Split one or more DImode RTL references into pairs of SImode
12035 references. The RTL can be REG, offsettable MEM, integer constant, or
12036 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12037 split and "num" is its length. lo_half and hi_half are output arrays
12038 that parallel "operands". */
12041 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12045 rtx op = operands[num];
12047 /* simplify_subreg refuse to split volatile memory addresses,
12048 but we still have to handle it. */
12051 lo_half[num] = adjust_address (op, SImode, 0);
12052 hi_half[num] = adjust_address (op, SImode, 4);
12056 lo_half[num] = simplify_gen_subreg (SImode, op,
12057 GET_MODE (op) == VOIDmode
12058 ? DImode : GET_MODE (op), 0);
12059 hi_half[num] = simplify_gen_subreg (SImode, op,
12060 GET_MODE (op) == VOIDmode
12061 ? DImode : GET_MODE (op), 4);
12065 /* Split one or more TImode RTL references into pairs of DImode
12066 references. The RTL can be REG, offsettable MEM, integer constant, or
12067 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12068 split and "num" is its length. lo_half and hi_half are output arrays
12069 that parallel "operands". */
12072 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12076 rtx op = operands[num];
12078 /* simplify_subreg refuse to split volatile memory addresses, but we
12079 still have to handle it. */
12082 lo_half[num] = adjust_address (op, DImode, 0);
12083 hi_half[num] = adjust_address (op, DImode, 8);
12087 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
12088 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
12093 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
12094 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
12095 is the expression of the binary operation. The output may either be
12096 emitted here, or returned to the caller, like all output_* functions.
12098 There is no guarantee that the operands are the same mode, as they
12099 might be within FLOAT or FLOAT_EXTEND expressions. */
12101 #ifndef SYSV386_COMPAT
12102 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
12103 wants to fix the assemblers because that causes incompatibility
12104 with gcc. No-one wants to fix gcc because that causes
12105 incompatibility with assemblers... You can use the option of
12106 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
12107 #define SYSV386_COMPAT 1
12111 output_387_binary_op (rtx insn, rtx *operands)
12113 static char buf[40];
12116 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
12118 #ifdef ENABLE_CHECKING
12119 /* Even if we do not want to check the inputs, this documents input
12120 constraints. Which helps in understanding the following code. */
12121 if (STACK_REG_P (operands[0])
12122 && ((REG_P (operands[1])
12123 && REGNO (operands[0]) == REGNO (operands[1])
12124 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
12125 || (REG_P (operands[2])
12126 && REGNO (operands[0]) == REGNO (operands[2])
12127 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
12128 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
12131 gcc_assert (is_sse);
12134 switch (GET_CODE (operands[3]))
12137 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12138 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12146 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12147 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12155 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12156 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12164 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12165 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12173 gcc_unreachable ();
12180 strcpy (buf, ssep);
12181 if (GET_MODE (operands[0]) == SFmode)
12182 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
12184 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
12188 strcpy (buf, ssep + 1);
12189 if (GET_MODE (operands[0]) == SFmode)
12190 strcat (buf, "ss\t{%2, %0|%0, %2}");
12192 strcat (buf, "sd\t{%2, %0|%0, %2}");
12198 switch (GET_CODE (operands[3]))
12202 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
12204 rtx temp = operands[2];
12205 operands[2] = operands[1];
12206 operands[1] = temp;
12209 /* know operands[0] == operands[1]. */
12211 if (MEM_P (operands[2]))
12217 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12219 if (STACK_TOP_P (operands[0]))
12220 /* How is it that we are storing to a dead operand[2]?
12221 Well, presumably operands[1] is dead too. We can't
12222 store the result to st(0) as st(0) gets popped on this
12223 instruction. Instead store to operands[2] (which I
12224 think has to be st(1)). st(1) will be popped later.
12225 gcc <= 2.8.1 didn't have this check and generated
12226 assembly code that the Unixware assembler rejected. */
12227 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12229 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12233 if (STACK_TOP_P (operands[0]))
12234 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12236 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12241 if (MEM_P (operands[1]))
12247 if (MEM_P (operands[2]))
12253 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12256 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
12257 derived assemblers, confusingly reverse the direction of
12258 the operation for fsub{r} and fdiv{r} when the
12259 destination register is not st(0). The Intel assembler
12260 doesn't have this brain damage. Read !SYSV386_COMPAT to
12261 figure out what the hardware really does. */
12262 if (STACK_TOP_P (operands[0]))
12263 p = "{p\t%0, %2|rp\t%2, %0}";
12265 p = "{rp\t%2, %0|p\t%0, %2}";
12267 if (STACK_TOP_P (operands[0]))
12268 /* As above for fmul/fadd, we can't store to st(0). */
12269 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12271 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12276 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
12279 if (STACK_TOP_P (operands[0]))
12280 p = "{rp\t%0, %1|p\t%1, %0}";
12282 p = "{p\t%1, %0|rp\t%0, %1}";
12284 if (STACK_TOP_P (operands[0]))
12285 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
12287 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
12292 if (STACK_TOP_P (operands[0]))
12294 if (STACK_TOP_P (operands[1]))
12295 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12297 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
12300 else if (STACK_TOP_P (operands[1]))
12303 p = "{\t%1, %0|r\t%0, %1}";
12305 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
12311 p = "{r\t%2, %0|\t%0, %2}";
12313 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12319 gcc_unreachable ();
12326 /* Return needed mode for entity in optimize_mode_switching pass. */
12329 ix86_mode_needed (int entity, rtx insn)
12331 enum attr_i387_cw mode;
12333 /* The mode UNINITIALIZED is used to store control word after a
12334 function call or ASM pattern. The mode ANY specify that function
12335 has no requirements on the control word and make no changes in the
12336 bits we are interested in. */
12339 || (NONJUMP_INSN_P (insn)
12340 && (asm_noperands (PATTERN (insn)) >= 0
12341 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12342 return I387_CW_UNINITIALIZED;
12344 if (recog_memoized (insn) < 0)
12345 return I387_CW_ANY;
12347 mode = get_attr_i387_cw (insn);
12352 if (mode == I387_CW_TRUNC)
12357 if (mode == I387_CW_FLOOR)
12362 if (mode == I387_CW_CEIL)
12367 if (mode == I387_CW_MASK_PM)
12372 gcc_unreachable ();
12375 return I387_CW_ANY;
12378 /* Output code to initialize control word copies used by trunc?f?i and
12379 rounding patterns. CURRENT_MODE is set to current control word,
12380 while NEW_MODE is set to new control word. */
12383 emit_i387_cw_initialization (int mode)
12385 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12388 enum ix86_stack_slot slot;
12390 rtx reg = gen_reg_rtx (HImode);
12392 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12393 emit_move_insn (reg, copy_rtx (stored_mode));
12395 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12396 || optimize_function_for_size_p (cfun))
12400 case I387_CW_TRUNC:
12401 /* round toward zero (truncate) */
12402 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12403 slot = SLOT_CW_TRUNC;
12406 case I387_CW_FLOOR:
12407 /* round down toward -oo */
12408 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12409 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12410 slot = SLOT_CW_FLOOR;
12414 /* round up toward +oo */
12415 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12416 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12417 slot = SLOT_CW_CEIL;
12420 case I387_CW_MASK_PM:
12421 /* mask precision exception for nearbyint() */
12422 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12423 slot = SLOT_CW_MASK_PM;
12427 gcc_unreachable ();
12434 case I387_CW_TRUNC:
12435 /* round toward zero (truncate) */
12436 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12437 slot = SLOT_CW_TRUNC;
12440 case I387_CW_FLOOR:
12441 /* round down toward -oo */
12442 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12443 slot = SLOT_CW_FLOOR;
12447 /* round up toward +oo */
12448 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12449 slot = SLOT_CW_CEIL;
12452 case I387_CW_MASK_PM:
12453 /* mask precision exception for nearbyint() */
12454 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12455 slot = SLOT_CW_MASK_PM;
12459 gcc_unreachable ();
12463 gcc_assert (slot < MAX_386_STACK_LOCALS);
12465 new_mode = assign_386_stack_local (HImode, slot);
12466 emit_move_insn (new_mode, reg);
12469 /* Output code for INSN to convert a float to a signed int. OPERANDS
12470 are the insn operands. The output may be [HSD]Imode and the input
12471 operand may be [SDX]Fmode. */
12474 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12476 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12477 int dimode_p = GET_MODE (operands[0]) == DImode;
12478 int round_mode = get_attr_i387_cw (insn);
12480 /* Jump through a hoop or two for DImode, since the hardware has no
12481 non-popping instruction. We used to do this a different way, but
12482 that was somewhat fragile and broke with post-reload splitters. */
12483 if ((dimode_p || fisttp) && !stack_top_dies)
12484 output_asm_insn ("fld\t%y1", operands);
12486 gcc_assert (STACK_TOP_P (operands[1]));
12487 gcc_assert (MEM_P (operands[0]));
12488 gcc_assert (GET_MODE (operands[1]) != TFmode);
12491 output_asm_insn ("fisttp%Z0\t%0", operands);
12494 if (round_mode != I387_CW_ANY)
12495 output_asm_insn ("fldcw\t%3", operands);
12496 if (stack_top_dies || dimode_p)
12497 output_asm_insn ("fistp%Z0\t%0", operands);
12499 output_asm_insn ("fist%Z0\t%0", operands);
12500 if (round_mode != I387_CW_ANY)
12501 output_asm_insn ("fldcw\t%2", operands);
12507 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12508 have the values zero or one, indicates the ffreep insn's operand
12509 from the OPERANDS array. */
12511 static const char *
12512 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12514 if (TARGET_USE_FFREEP)
12515 #ifdef HAVE_AS_IX86_FFREEP
12516 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12519 static char retval[32];
12520 int regno = REGNO (operands[opno]);
12522 gcc_assert (FP_REGNO_P (regno));
12524 regno -= FIRST_STACK_REG;
12526 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
12531 return opno ? "fstp\t%y1" : "fstp\t%y0";
12535 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12536 should be used. UNORDERED_P is true when fucom should be used. */
12539 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12541 int stack_top_dies;
12542 rtx cmp_op0, cmp_op1;
12543 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12547 cmp_op0 = operands[0];
12548 cmp_op1 = operands[1];
12552 cmp_op0 = operands[1];
12553 cmp_op1 = operands[2];
12558 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12559 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12560 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12561 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12563 if (GET_MODE (operands[0]) == SFmode)
12565 return &ucomiss[TARGET_AVX ? 0 : 1];
12567 return &comiss[TARGET_AVX ? 0 : 1];
12570 return &ucomisd[TARGET_AVX ? 0 : 1];
12572 return &comisd[TARGET_AVX ? 0 : 1];
12575 gcc_assert (STACK_TOP_P (cmp_op0));
12577 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12579 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12581 if (stack_top_dies)
12583 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12584 return output_387_ffreep (operands, 1);
12587 return "ftst\n\tfnstsw\t%0";
12590 if (STACK_REG_P (cmp_op1)
12592 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12593 && REGNO (cmp_op1) != FIRST_STACK_REG)
12595 /* If both the top of the 387 stack dies, and the other operand
12596 is also a stack register that dies, then this must be a
12597 `fcompp' float compare */
12601 /* There is no double popping fcomi variant. Fortunately,
12602 eflags is immune from the fstp's cc clobbering. */
12604 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12606 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12607 return output_387_ffreep (operands, 0);
12612 return "fucompp\n\tfnstsw\t%0";
12614 return "fcompp\n\tfnstsw\t%0";
12619 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12621 static const char * const alt[16] =
12623 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12624 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12625 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12626 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12628 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12629 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12633 "fcomi\t{%y1, %0|%0, %y1}",
12634 "fcomip\t{%y1, %0|%0, %y1}",
12635 "fucomi\t{%y1, %0|%0, %y1}",
12636 "fucomip\t{%y1, %0|%0, %y1}",
12647 mask = eflags_p << 3;
12648 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12649 mask |= unordered_p << 1;
12650 mask |= stack_top_dies;
12652 gcc_assert (mask < 16);
12661 ix86_output_addr_vec_elt (FILE *file, int value)
12663 const char *directive = ASM_LONG;
12667 directive = ASM_QUAD;
12669 gcc_assert (!TARGET_64BIT);
12672 fprintf (file, "%s" LPREFIX "%d\n", directive, value);
12676 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12678 const char *directive = ASM_LONG;
12681 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12682 directive = ASM_QUAD;
12684 gcc_assert (!TARGET_64BIT);
12686 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12687 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12688 fprintf (file, "%s" LPREFIX "%d-" LPREFIX "%d\n",
12689 directive, value, rel);
12690 else if (HAVE_AS_GOTOFF_IN_DATA)
12691 fprintf (file, ASM_LONG LPREFIX "%d@GOTOFF\n", value);
12693 else if (TARGET_MACHO)
12695 fprintf (file, ASM_LONG LPREFIX "%d-", value);
12696 machopic_output_function_base_name (file);
12701 asm_fprintf (file, ASM_LONG "%U%s+[.-" LPREFIX "%d]\n",
12702 GOT_SYMBOL_NAME, value);
12705 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12709 ix86_expand_clear (rtx dest)
12713 /* We play register width games, which are only valid after reload. */
12714 gcc_assert (reload_completed);
12716 /* Avoid HImode and its attendant prefix byte. */
12717 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12718 dest = gen_rtx_REG (SImode, REGNO (dest));
12719 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12721 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12722 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12724 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12725 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12731 /* X is an unchanging MEM. If it is a constant pool reference, return
12732 the constant pool rtx, else NULL. */
12735 maybe_get_pool_constant (rtx x)
12737 x = ix86_delegitimize_address (XEXP (x, 0));
12739 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12740 return get_pool_constant (x);
12746 ix86_expand_move (enum machine_mode mode, rtx operands[])
12749 enum tls_model model;
12754 if (GET_CODE (op1) == SYMBOL_REF)
12756 model = SYMBOL_REF_TLS_MODEL (op1);
12759 op1 = legitimize_tls_address (op1, model, true);
12760 op1 = force_operand (op1, op0);
12764 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12765 && SYMBOL_REF_DLLIMPORT_P (op1))
12766 op1 = legitimize_dllimport_symbol (op1, false);
12768 else if (GET_CODE (op1) == CONST
12769 && GET_CODE (XEXP (op1, 0)) == PLUS
12770 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12772 rtx addend = XEXP (XEXP (op1, 0), 1);
12773 rtx symbol = XEXP (XEXP (op1, 0), 0);
12776 model = SYMBOL_REF_TLS_MODEL (symbol);
12778 tmp = legitimize_tls_address (symbol, model, true);
12779 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12780 && SYMBOL_REF_DLLIMPORT_P (symbol))
12781 tmp = legitimize_dllimport_symbol (symbol, true);
12785 tmp = force_operand (tmp, NULL);
12786 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12787 op0, 1, OPTAB_DIRECT);
12793 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12795 if (TARGET_MACHO && !TARGET_64BIT)
12800 rtx temp = ((reload_in_progress
12801 || ((op0 && REG_P (op0))
12803 ? op0 : gen_reg_rtx (Pmode));
12804 op1 = machopic_indirect_data_reference (op1, temp);
12805 op1 = machopic_legitimize_pic_address (op1, mode,
12806 temp == op1 ? 0 : temp);
12808 else if (MACHOPIC_INDIRECT)
12809 op1 = machopic_indirect_data_reference (op1, 0);
12817 op1 = force_reg (Pmode, op1);
12818 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12820 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
12821 op1 = legitimize_pic_address (op1, reg);
12830 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12831 || !push_operand (op0, mode))
12833 op1 = force_reg (mode, op1);
12835 if (push_operand (op0, mode)
12836 && ! general_no_elim_operand (op1, mode))
12837 op1 = copy_to_mode_reg (mode, op1);
12839 /* Force large constants in 64bit compilation into register
12840 to get them CSEed. */
12841 if (can_create_pseudo_p ()
12842 && (mode == DImode) && TARGET_64BIT
12843 && immediate_operand (op1, mode)
12844 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12845 && !register_operand (op0, mode)
12847 op1 = copy_to_mode_reg (mode, op1);
12849 if (can_create_pseudo_p ()
12850 && FLOAT_MODE_P (mode)
12851 && GET_CODE (op1) == CONST_DOUBLE)
12853 /* If we are loading a floating point constant to a register,
12854 force the value to memory now, since we'll get better code
12855 out the back end. */
12857 op1 = validize_mem (force_const_mem (mode, op1));
12858 if (!register_operand (op0, mode))
12860 rtx temp = gen_reg_rtx (mode);
12861 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12862 emit_move_insn (op0, temp);
12868 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12872 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12874 rtx op0 = operands[0], op1 = operands[1];
12875 unsigned int align = GET_MODE_ALIGNMENT (mode);
12877 /* Force constants other than zero into memory. We do not know how
12878 the instructions used to build constants modify the upper 64 bits
12879 of the register, once we have that information we may be able
12880 to handle some of them more efficiently. */
12881 if (can_create_pseudo_p ()
12882 && register_operand (op0, mode)
12883 && (CONSTANT_P (op1)
12884 || (GET_CODE (op1) == SUBREG
12885 && CONSTANT_P (SUBREG_REG (op1))))
12886 && !standard_sse_constant_p (op1))
12887 op1 = validize_mem (force_const_mem (mode, op1));
12889 /* We need to check memory alignment for SSE mode since attribute
12890 can make operands unaligned. */
12891 if (can_create_pseudo_p ()
12892 && SSE_REG_MODE_P (mode)
12893 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12894 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12898 /* ix86_expand_vector_move_misalign() does not like constants ... */
12899 if (CONSTANT_P (op1)
12900 || (GET_CODE (op1) == SUBREG
12901 && CONSTANT_P (SUBREG_REG (op1))))
12902 op1 = validize_mem (force_const_mem (mode, op1));
12904 /* ... nor both arguments in memory. */
12905 if (!register_operand (op0, mode)
12906 && !register_operand (op1, mode))
12907 op1 = force_reg (mode, op1);
12909 tmp[0] = op0; tmp[1] = op1;
12910 ix86_expand_vector_move_misalign (mode, tmp);
12914 /* Make operand1 a register if it isn't already. */
12915 if (can_create_pseudo_p ()
12916 && !register_operand (op0, mode)
12917 && !register_operand (op1, mode))
12919 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12923 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12926 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12927 straight to ix86_expand_vector_move. */
12928 /* Code generation for scalar reg-reg moves of single and double precision data:
12929 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12933 if (x86_sse_partial_reg_dependency == true)
12938 Code generation for scalar loads of double precision data:
12939 if (x86_sse_split_regs == true)
12940 movlpd mem, reg (gas syntax)
12944 Code generation for unaligned packed loads of single precision data
12945 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12946 if (x86_sse_unaligned_move_optimal)
12949 if (x86_sse_partial_reg_dependency == true)
12961 Code generation for unaligned packed loads of double precision data
12962 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12963 if (x86_sse_unaligned_move_optimal)
12966 if (x86_sse_split_regs == true)
12979 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12988 switch (GET_MODE_CLASS (mode))
12990 case MODE_VECTOR_INT:
12992 switch (GET_MODE_SIZE (mode))
12995 op0 = gen_lowpart (V16QImode, op0);
12996 op1 = gen_lowpart (V16QImode, op1);
12997 emit_insn (gen_avx_movdqu (op0, op1));
13000 op0 = gen_lowpart (V32QImode, op0);
13001 op1 = gen_lowpart (V32QImode, op1);
13002 emit_insn (gen_avx_movdqu256 (op0, op1));
13005 gcc_unreachable ();
13008 case MODE_VECTOR_FLOAT:
13009 op0 = gen_lowpart (mode, op0);
13010 op1 = gen_lowpart (mode, op1);
13015 emit_insn (gen_avx_movups (op0, op1));
13018 emit_insn (gen_avx_movups256 (op0, op1));
13021 emit_insn (gen_avx_movupd (op0, op1));
13024 emit_insn (gen_avx_movupd256 (op0, op1));
13027 gcc_unreachable ();
13032 gcc_unreachable ();
13040 /* If we're optimizing for size, movups is the smallest. */
13041 if (optimize_insn_for_size_p ())
13043 op0 = gen_lowpart (V4SFmode, op0);
13044 op1 = gen_lowpart (V4SFmode, op1);
13045 emit_insn (gen_sse_movups (op0, op1));
13049 /* ??? If we have typed data, then it would appear that using
13050 movdqu is the only way to get unaligned data loaded with
13052 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13054 op0 = gen_lowpart (V16QImode, op0);
13055 op1 = gen_lowpart (V16QImode, op1);
13056 emit_insn (gen_sse2_movdqu (op0, op1));
13060 if (TARGET_SSE2 && mode == V2DFmode)
13064 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13066 op0 = gen_lowpart (V2DFmode, op0);
13067 op1 = gen_lowpart (V2DFmode, op1);
13068 emit_insn (gen_sse2_movupd (op0, op1));
13072 /* When SSE registers are split into halves, we can avoid
13073 writing to the top half twice. */
13074 if (TARGET_SSE_SPLIT_REGS)
13076 emit_clobber (op0);
13081 /* ??? Not sure about the best option for the Intel chips.
13082 The following would seem to satisfy; the register is
13083 entirely cleared, breaking the dependency chain. We
13084 then store to the upper half, with a dependency depth
13085 of one. A rumor has it that Intel recommends two movsd
13086 followed by an unpacklpd, but this is unconfirmed. And
13087 given that the dependency depth of the unpacklpd would
13088 still be one, I'm not sure why this would be better. */
13089 zero = CONST0_RTX (V2DFmode);
13092 m = adjust_address (op1, DFmode, 0);
13093 emit_insn (gen_sse2_loadlpd (op0, zero, m));
13094 m = adjust_address (op1, DFmode, 8);
13095 emit_insn (gen_sse2_loadhpd (op0, op0, m));
13099 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13101 op0 = gen_lowpart (V4SFmode, op0);
13102 op1 = gen_lowpart (V4SFmode, op1);
13103 emit_insn (gen_sse_movups (op0, op1));
13107 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
13108 emit_move_insn (op0, CONST0_RTX (mode));
13110 emit_clobber (op0);
13112 if (mode != V4SFmode)
13113 op0 = gen_lowpart (V4SFmode, op0);
13114 m = adjust_address (op1, V2SFmode, 0);
13115 emit_insn (gen_sse_loadlps (op0, op0, m));
13116 m = adjust_address (op1, V2SFmode, 8);
13117 emit_insn (gen_sse_loadhps (op0, op0, m));
13120 else if (MEM_P (op0))
13122 /* If we're optimizing for size, movups is the smallest. */
13123 if (optimize_insn_for_size_p ())
13125 op0 = gen_lowpart (V4SFmode, op0);
13126 op1 = gen_lowpart (V4SFmode, op1);
13127 emit_insn (gen_sse_movups (op0, op1));
13131 /* ??? Similar to above, only less clear because of quote
13132 typeless stores unquote. */
13133 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
13134 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13136 op0 = gen_lowpart (V16QImode, op0);
13137 op1 = gen_lowpart (V16QImode, op1);
13138 emit_insn (gen_sse2_movdqu (op0, op1));
13142 if (TARGET_SSE2 && mode == V2DFmode)
13144 m = adjust_address (op0, DFmode, 0);
13145 emit_insn (gen_sse2_storelpd (m, op1));
13146 m = adjust_address (op0, DFmode, 8);
13147 emit_insn (gen_sse2_storehpd (m, op1));
13151 if (mode != V4SFmode)
13152 op1 = gen_lowpart (V4SFmode, op1);
13153 m = adjust_address (op0, V2SFmode, 0);
13154 emit_insn (gen_sse_storelps (m, op1));
13155 m = adjust_address (op0, V2SFmode, 8);
13156 emit_insn (gen_sse_storehps (m, op1));
13160 gcc_unreachable ();
13163 /* Expand a push in MODE. This is some mode for which we do not support
13164 proper push instructions, at least from the registers that we expect
13165 the value to live in. */
13168 ix86_expand_push (enum machine_mode mode, rtx x)
13172 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
13173 GEN_INT (-GET_MODE_SIZE (mode)),
13174 stack_pointer_rtx, 1, OPTAB_DIRECT);
13175 if (tmp != stack_pointer_rtx)
13176 emit_move_insn (stack_pointer_rtx, tmp);
13178 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
13180 /* When we push an operand onto stack, it has to be aligned at least
13181 at the function argument boundary. However since we don't have
13182 the argument type, we can't determine the actual argument
13184 emit_move_insn (tmp, x);
13187 /* Helper function of ix86_fixup_binary_operands to canonicalize
13188 operand order. Returns true if the operands should be swapped. */
13191 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
13194 rtx dst = operands[0];
13195 rtx src1 = operands[1];
13196 rtx src2 = operands[2];
13198 /* If the operation is not commutative, we can't do anything. */
13199 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
13202 /* Highest priority is that src1 should match dst. */
13203 if (rtx_equal_p (dst, src1))
13205 if (rtx_equal_p (dst, src2))
13208 /* Next highest priority is that immediate constants come second. */
13209 if (immediate_operand (src2, mode))
13211 if (immediate_operand (src1, mode))
13214 /* Lowest priority is that memory references should come second. */
13224 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
13225 destination to use for the operation. If different from the true
13226 destination in operands[0], a copy operation will be required. */
13229 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
13232 rtx dst = operands[0];
13233 rtx src1 = operands[1];
13234 rtx src2 = operands[2];
13236 /* Canonicalize operand order. */
13237 if (ix86_swap_binary_operands_p (code, mode, operands))
13241 /* It is invalid to swap operands of different modes. */
13242 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
13249 /* Both source operands cannot be in memory. */
13250 if (MEM_P (src1) && MEM_P (src2))
13252 /* Optimization: Only read from memory once. */
13253 if (rtx_equal_p (src1, src2))
13255 src2 = force_reg (mode, src2);
13259 src2 = force_reg (mode, src2);
13262 /* If the destination is memory, and we do not have matching source
13263 operands, do things in registers. */
13264 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13265 dst = gen_reg_rtx (mode);
13267 /* Source 1 cannot be a constant. */
13268 if (CONSTANT_P (src1))
13269 src1 = force_reg (mode, src1);
13271 /* Source 1 cannot be a non-matching memory. */
13272 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13273 src1 = force_reg (mode, src1);
13275 operands[1] = src1;
13276 operands[2] = src2;
13280 /* Similarly, but assume that the destination has already been
13281 set up properly. */
13284 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
13285 enum machine_mode mode, rtx operands[])
13287 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
13288 gcc_assert (dst == operands[0]);
13291 /* Attempt to expand a binary operator. Make the expansion closer to the
13292 actual machine, then just general_operand, which will allow 3 separate
13293 memory references (one output, two input) in a single insn. */
13296 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
13299 rtx src1, src2, dst, op, clob;
13301 dst = ix86_fixup_binary_operands (code, mode, operands);
13302 src1 = operands[1];
13303 src2 = operands[2];
13305 /* Emit the instruction. */
13307 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
13308 if (reload_in_progress)
13310 /* Reload doesn't know about the flags register, and doesn't know that
13311 it doesn't want to clobber it. We can only do this with PLUS. */
13312 gcc_assert (code == PLUS);
13317 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13318 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13321 /* Fix up the destination if needed. */
13322 if (dst != operands[0])
13323 emit_move_insn (operands[0], dst);
13326 /* Return TRUE or FALSE depending on whether the binary operator meets the
13327 appropriate constraints. */
13330 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13333 rtx dst = operands[0];
13334 rtx src1 = operands[1];
13335 rtx src2 = operands[2];
13337 /* Both source operands cannot be in memory. */
13338 if (MEM_P (src1) && MEM_P (src2))
13341 /* Canonicalize operand order for commutative operators. */
13342 if (ix86_swap_binary_operands_p (code, mode, operands))
13349 /* If the destination is memory, we must have a matching source operand. */
13350 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13353 /* Source 1 cannot be a constant. */
13354 if (CONSTANT_P (src1))
13357 /* Source 1 cannot be a non-matching memory. */
13358 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13364 /* Attempt to expand a unary operator. Make the expansion closer to the
13365 actual machine, then just general_operand, which will allow 2 separate
13366 memory references (one output, one input) in a single insn. */
13369 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13372 int matching_memory;
13373 rtx src, dst, op, clob;
13378 /* If the destination is memory, and we do not have matching source
13379 operands, do things in registers. */
13380 matching_memory = 0;
13383 if (rtx_equal_p (dst, src))
13384 matching_memory = 1;
13386 dst = gen_reg_rtx (mode);
13389 /* When source operand is memory, destination must match. */
13390 if (MEM_P (src) && !matching_memory)
13391 src = force_reg (mode, src);
13393 /* Emit the instruction. */
13395 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13396 if (reload_in_progress || code == NOT)
13398 /* Reload doesn't know about the flags register, and doesn't know that
13399 it doesn't want to clobber it. */
13400 gcc_assert (code == NOT);
13405 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13406 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13409 /* Fix up the destination if needed. */
13410 if (dst != operands[0])
13411 emit_move_insn (operands[0], dst);
13414 #define LEA_SEARCH_THRESHOLD 12
13416 /* Search backward for non-agu definition of register number REGNO1
13417 or register number REGNO2 in INSN's basic block until
13418 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13419 2. Reach BB boundary, or
13420 3. Reach agu definition.
13421 Returns the distance between the non-agu definition point and INSN.
13422 If no definition point, returns -1. */
13425 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13428 basic_block bb = BLOCK_FOR_INSN (insn);
13431 enum attr_type insn_type;
13433 if (insn != BB_HEAD (bb))
13435 rtx prev = PREV_INSN (insn);
13436 while (prev && distance < LEA_SEARCH_THRESHOLD)
13441 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13442 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13443 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13444 && (regno1 == DF_REF_REGNO (*def_rec)
13445 || regno2 == DF_REF_REGNO (*def_rec)))
13447 insn_type = get_attr_type (prev);
13448 if (insn_type != TYPE_LEA)
13452 if (prev == BB_HEAD (bb))
13454 prev = PREV_INSN (prev);
13458 if (distance < LEA_SEARCH_THRESHOLD)
13462 bool simple_loop = false;
13464 FOR_EACH_EDGE (e, ei, bb->preds)
13467 simple_loop = true;
13473 rtx prev = BB_END (bb);
13476 && distance < LEA_SEARCH_THRESHOLD)
13481 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13482 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13483 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13484 && (regno1 == DF_REF_REGNO (*def_rec)
13485 || regno2 == DF_REF_REGNO (*def_rec)))
13487 insn_type = get_attr_type (prev);
13488 if (insn_type != TYPE_LEA)
13492 prev = PREV_INSN (prev);
13500 /* get_attr_type may modify recog data. We want to make sure
13501 that recog data is valid for instruction INSN, on which
13502 distance_non_agu_define is called. INSN is unchanged here. */
13503 extract_insn_cached (insn);
13507 /* Return the distance between INSN and the next insn that uses
13508 register number REGNO0 in memory address. Return -1 if no such
13509 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13512 distance_agu_use (unsigned int regno0, rtx insn)
13514 basic_block bb = BLOCK_FOR_INSN (insn);
13519 if (insn != BB_END (bb))
13521 rtx next = NEXT_INSN (insn);
13522 while (next && distance < LEA_SEARCH_THRESHOLD)
13528 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13529 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13530 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13531 && regno0 == DF_REF_REGNO (*use_rec))
13533 /* Return DISTANCE if OP0 is used in memory
13534 address in NEXT. */
13538 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13539 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13540 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13541 && regno0 == DF_REF_REGNO (*def_rec))
13543 /* Return -1 if OP0 is set in NEXT. */
13547 if (next == BB_END (bb))
13549 next = NEXT_INSN (next);
13553 if (distance < LEA_SEARCH_THRESHOLD)
13557 bool simple_loop = false;
13559 FOR_EACH_EDGE (e, ei, bb->succs)
13562 simple_loop = true;
13568 rtx next = BB_HEAD (bb);
13571 && distance < LEA_SEARCH_THRESHOLD)
13577 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13578 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13579 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13580 && regno0 == DF_REF_REGNO (*use_rec))
13582 /* Return DISTANCE if OP0 is used in memory
13583 address in NEXT. */
13587 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13588 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13589 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13590 && regno0 == DF_REF_REGNO (*def_rec))
13592 /* Return -1 if OP0 is set in NEXT. */
13597 next = NEXT_INSN (next);
13605 /* Define this macro to tune LEA priority vs ADD, it take effect when
13606 there is a dilemma of choicing LEA or ADD
13607 Negative value: ADD is more preferred than LEA
13609 Positive value: LEA is more preferred than ADD*/
13610 #define IX86_LEA_PRIORITY 2
13612 /* Return true if it is ok to optimize an ADD operation to LEA
13613 operation to avoid flag register consumation. For the processors
13614 like ATOM, if the destination register of LEA holds an actual
13615 address which will be used soon, LEA is better and otherwise ADD
13619 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13620 rtx insn, rtx operands[])
13622 unsigned int regno0 = true_regnum (operands[0]);
13623 unsigned int regno1 = true_regnum (operands[1]);
13624 unsigned int regno2;
13626 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13627 return regno0 != regno1;
13629 regno2 = true_regnum (operands[2]);
13631 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13632 if (regno0 != regno1 && regno0 != regno2)
13636 int dist_define, dist_use;
13637 dist_define = distance_non_agu_define (regno1, regno2, insn);
13638 if (dist_define <= 0)
13641 /* If this insn has both backward non-agu dependence and forward
13642 agu dependence, the one with short distance take effect. */
13643 dist_use = distance_agu_use (regno0, insn);
13645 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13652 /* Return true if destination reg of SET_BODY is shift count of
13656 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13662 /* Retrieve destination of SET_BODY. */
13663 switch (GET_CODE (set_body))
13666 set_dest = SET_DEST (set_body);
13667 if (!set_dest || !REG_P (set_dest))
13671 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13672 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13680 /* Retrieve shift count of USE_BODY. */
13681 switch (GET_CODE (use_body))
13684 shift_rtx = XEXP (use_body, 1);
13687 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13688 if (ix86_dep_by_shift_count_body (set_body,
13689 XVECEXP (use_body, 0, i)))
13697 && (GET_CODE (shift_rtx) == ASHIFT
13698 || GET_CODE (shift_rtx) == LSHIFTRT
13699 || GET_CODE (shift_rtx) == ASHIFTRT
13700 || GET_CODE (shift_rtx) == ROTATE
13701 || GET_CODE (shift_rtx) == ROTATERT))
13703 rtx shift_count = XEXP (shift_rtx, 1);
13705 /* Return true if shift count is dest of SET_BODY. */
13706 if (REG_P (shift_count)
13707 && true_regnum (set_dest) == true_regnum (shift_count))
13714 /* Return true if destination reg of SET_INSN is shift count of
13718 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13720 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13721 PATTERN (use_insn));
13724 /* Return TRUE or FALSE depending on whether the unary operator meets the
13725 appropriate constraints. */
13728 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13729 enum machine_mode mode ATTRIBUTE_UNUSED,
13730 rtx operands[2] ATTRIBUTE_UNUSED)
13732 /* If one of operands is memory, source and destination must match. */
13733 if ((MEM_P (operands[0])
13734 || MEM_P (operands[1]))
13735 && ! rtx_equal_p (operands[0], operands[1]))
13740 /* Post-reload splitter for converting an SF or DFmode value in an
13741 SSE register into an unsigned SImode. */
13744 ix86_split_convert_uns_si_sse (rtx operands[])
13746 enum machine_mode vecmode;
13747 rtx value, large, zero_or_two31, input, two31, x;
13749 large = operands[1];
13750 zero_or_two31 = operands[2];
13751 input = operands[3];
13752 two31 = operands[4];
13753 vecmode = GET_MODE (large);
13754 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13756 /* Load up the value into the low element. We must ensure that the other
13757 elements are valid floats -- zero is the easiest such value. */
13760 if (vecmode == V4SFmode)
13761 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13763 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13767 input = gen_rtx_REG (vecmode, REGNO (input));
13768 emit_move_insn (value, CONST0_RTX (vecmode));
13769 if (vecmode == V4SFmode)
13770 emit_insn (gen_sse_movss (value, value, input));
13772 emit_insn (gen_sse2_movsd (value, value, input));
13775 emit_move_insn (large, two31);
13776 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13778 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13779 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13781 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13782 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13784 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13785 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13787 large = gen_rtx_REG (V4SImode, REGNO (large));
13788 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13790 x = gen_rtx_REG (V4SImode, REGNO (value));
13791 if (vecmode == V4SFmode)
13792 emit_insn (gen_sse2_cvttps2dq (x, value));
13794 emit_insn (gen_sse2_cvttpd2dq (x, value));
13797 emit_insn (gen_xorv4si3 (value, value, large));
13800 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13801 Expects the 64-bit DImode to be supplied in a pair of integral
13802 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13803 -mfpmath=sse, !optimize_size only. */
13806 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13808 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13809 rtx int_xmm, fp_xmm;
13810 rtx biases, exponents;
13813 int_xmm = gen_reg_rtx (V4SImode);
13814 if (TARGET_INTER_UNIT_MOVES)
13815 emit_insn (gen_movdi_to_sse (int_xmm, input));
13816 else if (TARGET_SSE_SPLIT_REGS)
13818 emit_clobber (int_xmm);
13819 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13823 x = gen_reg_rtx (V2DImode);
13824 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13825 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13828 x = gen_rtx_CONST_VECTOR (V4SImode,
13829 gen_rtvec (4, GEN_INT (0x43300000UL),
13830 GEN_INT (0x45300000UL),
13831 const0_rtx, const0_rtx));
13832 exponents = validize_mem (force_const_mem (V4SImode, x));
13834 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13835 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13837 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13838 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13839 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13840 (0x1.0p84 + double(fp_value_hi_xmm)).
13841 Note these exponents differ by 32. */
13843 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13845 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13846 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13847 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13848 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13849 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13850 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13851 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13852 biases = validize_mem (force_const_mem (V2DFmode, biases));
13853 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13855 /* Add the upper and lower DFmode values together. */
13857 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13860 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13861 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13862 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13865 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13868 /* Not used, but eases macroization of patterns. */
13870 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13871 rtx input ATTRIBUTE_UNUSED)
13873 gcc_unreachable ();
13876 /* Convert an unsigned SImode value into a DFmode. Only currently used
13877 for SSE, but applicable anywhere. */
13880 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13882 REAL_VALUE_TYPE TWO31r;
13885 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13886 NULL, 1, OPTAB_DIRECT);
13888 fp = gen_reg_rtx (DFmode);
13889 emit_insn (gen_floatsidf2 (fp, x));
13891 real_ldexp (&TWO31r, &dconst1, 31);
13892 x = const_double_from_real_value (TWO31r, DFmode);
13894 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13896 emit_move_insn (target, x);
13899 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13900 32-bit mode; otherwise we have a direct convert instruction. */
13903 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13905 REAL_VALUE_TYPE TWO32r;
13906 rtx fp_lo, fp_hi, x;
13908 fp_lo = gen_reg_rtx (DFmode);
13909 fp_hi = gen_reg_rtx (DFmode);
13911 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13913 real_ldexp (&TWO32r, &dconst1, 32);
13914 x = const_double_from_real_value (TWO32r, DFmode);
13915 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13917 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13919 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13922 emit_move_insn (target, x);
13925 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13926 For x86_32, -mfpmath=sse, !optimize_size only. */
13928 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13930 REAL_VALUE_TYPE ONE16r;
13931 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13933 real_ldexp (&ONE16r, &dconst1, 16);
13934 x = const_double_from_real_value (ONE16r, SFmode);
13935 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13936 NULL, 0, OPTAB_DIRECT);
13937 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13938 NULL, 0, OPTAB_DIRECT);
13939 fp_hi = gen_reg_rtx (SFmode);
13940 fp_lo = gen_reg_rtx (SFmode);
13941 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13942 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13943 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13945 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13947 if (!rtx_equal_p (target, fp_hi))
13948 emit_move_insn (target, fp_hi);
13951 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
13952 then replicate the value for all elements of the vector
13956 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13963 v = gen_rtvec (4, value, value, value, value);
13964 return gen_rtx_CONST_VECTOR (V4SImode, v);
13968 v = gen_rtvec (2, value, value);
13969 return gen_rtx_CONST_VECTOR (V2DImode, v);
13973 v = gen_rtvec (4, value, value, value, value);
13975 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13976 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13977 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13981 v = gen_rtvec (2, value, value);
13983 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13984 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13987 gcc_unreachable ();
13991 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13992 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13993 for an SSE register. If VECT is true, then replicate the mask for
13994 all elements of the vector register. If INVERT is true, then create
13995 a mask excluding the sign bit. */
13998 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
14000 enum machine_mode vec_mode, imode;
14001 HOST_WIDE_INT hi, lo;
14006 /* Find the sign bit, sign extended to 2*HWI. */
14012 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
14013 lo = 0x80000000, hi = lo < 0;
14019 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
14020 if (HOST_BITS_PER_WIDE_INT >= 64)
14021 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
14023 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
14028 vec_mode = VOIDmode;
14029 if (HOST_BITS_PER_WIDE_INT >= 64)
14032 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
14039 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
14043 lo = ~lo, hi = ~hi;
14049 mask = immed_double_const (lo, hi, imode);
14051 vec = gen_rtvec (2, v, mask);
14052 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
14053 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
14060 gcc_unreachable ();
14064 lo = ~lo, hi = ~hi;
14066 /* Force this value into the low part of a fp vector constant. */
14067 mask = immed_double_const (lo, hi, imode);
14068 mask = gen_lowpart (mode, mask);
14070 if (vec_mode == VOIDmode)
14071 return force_reg (mode, mask);
14073 v = ix86_build_const_vector (mode, vect, mask);
14074 return force_reg (vec_mode, v);
14077 /* Generate code for floating point ABS or NEG. */
14080 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
14083 rtx mask, set, use, clob, dst, src;
14084 bool use_sse = false;
14085 bool vector_mode = VECTOR_MODE_P (mode);
14086 enum machine_mode elt_mode = mode;
14090 elt_mode = GET_MODE_INNER (mode);
14093 else if (mode == TFmode)
14095 else if (TARGET_SSE_MATH)
14096 use_sse = SSE_FLOAT_MODE_P (mode);
14098 /* NEG and ABS performed with SSE use bitwise mask operations.
14099 Create the appropriate mask now. */
14101 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
14110 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
14111 set = gen_rtx_SET (VOIDmode, dst, set);
14116 set = gen_rtx_fmt_e (code, mode, src);
14117 set = gen_rtx_SET (VOIDmode, dst, set);
14120 use = gen_rtx_USE (VOIDmode, mask);
14121 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14122 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14123 gen_rtvec (3, set, use, clob)));
14130 /* Expand a copysign operation. Special case operand 0 being a constant. */
14133 ix86_expand_copysign (rtx operands[])
14135 enum machine_mode mode;
14136 rtx dest, op0, op1, mask, nmask;
14138 dest = operands[0];
14142 mode = GET_MODE (dest);
14144 if (GET_CODE (op0) == CONST_DOUBLE)
14146 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
14148 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
14149 op0 = simplify_unary_operation (ABS, mode, op0, mode);
14151 if (mode == SFmode || mode == DFmode)
14153 enum machine_mode vmode;
14155 vmode = mode == SFmode ? V4SFmode : V2DFmode;
14157 if (op0 == CONST0_RTX (mode))
14158 op0 = CONST0_RTX (vmode);
14161 rtx v = ix86_build_const_vector (mode, false, op0);
14163 op0 = force_reg (vmode, v);
14166 else if (op0 != CONST0_RTX (mode))
14167 op0 = force_reg (mode, op0);
14169 mask = ix86_build_signbit_mask (mode, 0, 0);
14171 if (mode == SFmode)
14172 copysign_insn = gen_copysignsf3_const;
14173 else if (mode == DFmode)
14174 copysign_insn = gen_copysigndf3_const;
14176 copysign_insn = gen_copysigntf3_const;
14178 emit_insn (copysign_insn (dest, op0, op1, mask));
14182 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
14184 nmask = ix86_build_signbit_mask (mode, 0, 1);
14185 mask = ix86_build_signbit_mask (mode, 0, 0);
14187 if (mode == SFmode)
14188 copysign_insn = gen_copysignsf3_var;
14189 else if (mode == DFmode)
14190 copysign_insn = gen_copysigndf3_var;
14192 copysign_insn = gen_copysigntf3_var;
14194 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
14198 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
14199 be a constant, and so has already been expanded into a vector constant. */
14202 ix86_split_copysign_const (rtx operands[])
14204 enum machine_mode mode, vmode;
14205 rtx dest, op0, op1, mask, x;
14207 dest = operands[0];
14210 mask = operands[3];
14212 mode = GET_MODE (dest);
14213 vmode = GET_MODE (mask);
14215 dest = simplify_gen_subreg (vmode, dest, mode, 0);
14216 x = gen_rtx_AND (vmode, dest, mask);
14217 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14219 if (op0 != CONST0_RTX (vmode))
14221 x = gen_rtx_IOR (vmode, dest, op0);
14222 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14226 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
14227 so we have to do two masks. */
14230 ix86_split_copysign_var (rtx operands[])
14232 enum machine_mode mode, vmode;
14233 rtx dest, scratch, op0, op1, mask, nmask, x;
14235 dest = operands[0];
14236 scratch = operands[1];
14239 nmask = operands[4];
14240 mask = operands[5];
14242 mode = GET_MODE (dest);
14243 vmode = GET_MODE (mask);
14245 if (rtx_equal_p (op0, op1))
14247 /* Shouldn't happen often (it's useless, obviously), but when it does
14248 we'd generate incorrect code if we continue below. */
14249 emit_move_insn (dest, op0);
14253 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
14255 gcc_assert (REGNO (op1) == REGNO (scratch));
14257 x = gen_rtx_AND (vmode, scratch, mask);
14258 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14261 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14262 x = gen_rtx_NOT (vmode, dest);
14263 x = gen_rtx_AND (vmode, x, op0);
14264 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14268 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
14270 x = gen_rtx_AND (vmode, scratch, mask);
14272 else /* alternative 2,4 */
14274 gcc_assert (REGNO (mask) == REGNO (scratch));
14275 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
14276 x = gen_rtx_AND (vmode, scratch, op1);
14278 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14280 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
14282 dest = simplify_gen_subreg (vmode, op0, mode, 0);
14283 x = gen_rtx_AND (vmode, dest, nmask);
14285 else /* alternative 3,4 */
14287 gcc_assert (REGNO (nmask) == REGNO (dest));
14289 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14290 x = gen_rtx_AND (vmode, dest, op0);
14292 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14295 x = gen_rtx_IOR (vmode, dest, scratch);
14296 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14299 /* Return TRUE or FALSE depending on whether the first SET in INSN
14300 has source and destination with matching CC modes, and that the
14301 CC mode is at least as constrained as REQ_MODE. */
14304 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
14307 enum machine_mode set_mode;
14309 set = PATTERN (insn);
14310 if (GET_CODE (set) == PARALLEL)
14311 set = XVECEXP (set, 0, 0);
14312 gcc_assert (GET_CODE (set) == SET);
14313 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
14315 set_mode = GET_MODE (SET_DEST (set));
14319 if (req_mode != CCNOmode
14320 && (req_mode != CCmode
14321 || XEXP (SET_SRC (set), 1) != const0_rtx))
14325 if (req_mode == CCGCmode)
14329 if (req_mode == CCGOCmode || req_mode == CCNOmode)
14333 if (req_mode == CCZmode)
14344 gcc_unreachable ();
14347 return (GET_MODE (SET_SRC (set)) == set_mode);
14350 /* Generate insn patterns to do an integer compare of OPERANDS. */
14353 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14355 enum machine_mode cmpmode;
14358 cmpmode = SELECT_CC_MODE (code, op0, op1);
14359 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14361 /* This is very simple, but making the interface the same as in the
14362 FP case makes the rest of the code easier. */
14363 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14364 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14366 /* Return the test that should be put into the flags user, i.e.
14367 the bcc, scc, or cmov instruction. */
14368 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14371 /* Figure out whether to use ordered or unordered fp comparisons.
14372 Return the appropriate mode to use. */
14375 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14377 /* ??? In order to make all comparisons reversible, we do all comparisons
14378 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14379 all forms trapping and nontrapping comparisons, we can make inequality
14380 comparisons trapping again, since it results in better code when using
14381 FCOM based compares. */
14382 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14386 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14388 enum machine_mode mode = GET_MODE (op0);
14390 if (SCALAR_FLOAT_MODE_P (mode))
14392 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14393 return ix86_fp_compare_mode (code);
14398 /* Only zero flag is needed. */
14399 case EQ: /* ZF=0 */
14400 case NE: /* ZF!=0 */
14402 /* Codes needing carry flag. */
14403 case GEU: /* CF=0 */
14404 case LTU: /* CF=1 */
14405 /* Detect overflow checks. They need just the carry flag. */
14406 if (GET_CODE (op0) == PLUS
14407 && rtx_equal_p (op1, XEXP (op0, 0)))
14411 case GTU: /* CF=0 & ZF=0 */
14412 case LEU: /* CF=1 | ZF=1 */
14413 /* Detect overflow checks. They need just the carry flag. */
14414 if (GET_CODE (op0) == MINUS
14415 && rtx_equal_p (op1, XEXP (op0, 0)))
14419 /* Codes possibly doable only with sign flag when
14420 comparing against zero. */
14421 case GE: /* SF=OF or SF=0 */
14422 case LT: /* SF<>OF or SF=1 */
14423 if (op1 == const0_rtx)
14426 /* For other cases Carry flag is not required. */
14428 /* Codes doable only with sign flag when comparing
14429 against zero, but we miss jump instruction for it
14430 so we need to use relational tests against overflow
14431 that thus needs to be zero. */
14432 case GT: /* ZF=0 & SF=OF */
14433 case LE: /* ZF=1 | SF<>OF */
14434 if (op1 == const0_rtx)
14438 /* strcmp pattern do (use flags) and combine may ask us for proper
14443 gcc_unreachable ();
14447 /* Return the fixed registers used for condition codes. */
14450 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14457 /* If two condition code modes are compatible, return a condition code
14458 mode which is compatible with both. Otherwise, return
14461 static enum machine_mode
14462 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14467 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14470 if ((m1 == CCGCmode && m2 == CCGOCmode)
14471 || (m1 == CCGOCmode && m2 == CCGCmode))
14477 gcc_unreachable ();
14507 /* These are only compatible with themselves, which we already
14514 /* Return a comparison we can do and that it is equivalent to
14515 swap_condition (code) apart possibly from orderedness.
14516 But, never change orderedness if TARGET_IEEE_FP, returning
14517 UNKNOWN in that case if necessary. */
14519 static enum rtx_code
14520 ix86_fp_swap_condition (enum rtx_code code)
14524 case GT: /* GTU - CF=0 & ZF=0 */
14525 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
14526 case GE: /* GEU - CF=0 */
14527 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
14528 case UNLT: /* LTU - CF=1 */
14529 return TARGET_IEEE_FP ? UNKNOWN : GT;
14530 case UNLE: /* LEU - CF=1 | ZF=1 */
14531 return TARGET_IEEE_FP ? UNKNOWN : GE;
14533 return swap_condition (code);
14537 /* Return cost of comparison CODE using the best strategy for performance.
14538 All following functions do use number of instructions as a cost metrics.
14539 In future this should be tweaked to compute bytes for optimize_size and
14540 take into account performance of various instructions on various CPUs. */
14543 ix86_fp_comparison_cost (enum rtx_code code)
14547 /* The cost of code using bit-twiddling on %ah. */
14564 arith_cost = TARGET_IEEE_FP ? 5 : 4;
14568 arith_cost = TARGET_IEEE_FP ? 6 : 4;
14571 gcc_unreachable ();
14574 switch (ix86_fp_comparison_strategy (code))
14576 case IX86_FPCMP_COMI:
14577 return arith_cost > 4 ? 3 : 2;
14578 case IX86_FPCMP_SAHF:
14579 return arith_cost > 4 ? 4 : 3;
14585 /* Return strategy to use for floating-point. We assume that fcomi is always
14586 preferrable where available, since that is also true when looking at size
14587 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
14589 enum ix86_fpcmp_strategy
14590 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
14592 /* Do fcomi/sahf based test when profitable. */
14595 return IX86_FPCMP_COMI;
14597 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
14598 return IX86_FPCMP_SAHF;
14600 return IX86_FPCMP_ARITH;
14603 /* Swap, force into registers, or otherwise massage the two operands
14604 to a fp comparison. The operands are updated in place; the new
14605 comparison code is returned. */
14607 static enum rtx_code
14608 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14610 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14611 rtx op0 = *pop0, op1 = *pop1;
14612 enum machine_mode op_mode = GET_MODE (op0);
14613 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14615 /* All of the unordered compare instructions only work on registers.
14616 The same is true of the fcomi compare instructions. The XFmode
14617 compare instructions require registers except when comparing
14618 against zero or when converting operand 1 from fixed point to
14622 && (fpcmp_mode == CCFPUmode
14623 || (op_mode == XFmode
14624 && ! (standard_80387_constant_p (op0) == 1
14625 || standard_80387_constant_p (op1) == 1)
14626 && GET_CODE (op1) != FLOAT)
14627 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
14629 op0 = force_reg (op_mode, op0);
14630 op1 = force_reg (op_mode, op1);
14634 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14635 things around if they appear profitable, otherwise force op0
14636 into a register. */
14638 if (standard_80387_constant_p (op0) == 0
14640 && ! (standard_80387_constant_p (op1) == 0
14643 enum rtx_code new_code = ix86_fp_swap_condition (code);
14644 if (new_code != UNKNOWN)
14647 tmp = op0, op0 = op1, op1 = tmp;
14653 op0 = force_reg (op_mode, op0);
14655 if (CONSTANT_P (op1))
14657 int tmp = standard_80387_constant_p (op1);
14659 op1 = validize_mem (force_const_mem (op_mode, op1));
14663 op1 = force_reg (op_mode, op1);
14666 op1 = force_reg (op_mode, op1);
14670 /* Try to rearrange the comparison to make it cheaper. */
14671 if (ix86_fp_comparison_cost (code)
14672 > ix86_fp_comparison_cost (swap_condition (code))
14673 && (REG_P (op1) || can_create_pseudo_p ()))
14676 tmp = op0, op0 = op1, op1 = tmp;
14677 code = swap_condition (code);
14679 op0 = force_reg (op_mode, op0);
14687 /* Convert comparison codes we use to represent FP comparison to integer
14688 code that will result in proper branch. Return UNKNOWN if no such code
14692 ix86_fp_compare_code_to_integer (enum rtx_code code)
14721 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14724 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
14726 enum machine_mode fpcmp_mode, intcmp_mode;
14729 fpcmp_mode = ix86_fp_compare_mode (code);
14730 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14732 /* Do fcomi/sahf based test when profitable. */
14733 switch (ix86_fp_comparison_strategy (code))
14735 case IX86_FPCMP_COMI:
14736 intcmp_mode = fpcmp_mode;
14737 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14738 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14743 case IX86_FPCMP_SAHF:
14744 intcmp_mode = fpcmp_mode;
14745 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14746 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14750 scratch = gen_reg_rtx (HImode);
14751 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14752 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14755 case IX86_FPCMP_ARITH:
14756 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14757 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14758 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14760 scratch = gen_reg_rtx (HImode);
14761 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14763 /* In the unordered case, we have to check C2 for NaN's, which
14764 doesn't happen to work out to anything nice combination-wise.
14765 So do some bit twiddling on the value we've got in AH to come
14766 up with an appropriate set of condition codes. */
14768 intcmp_mode = CCNOmode;
14773 if (code == GT || !TARGET_IEEE_FP)
14775 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14780 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14781 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14782 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14783 intcmp_mode = CCmode;
14789 if (code == LT && TARGET_IEEE_FP)
14791 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14792 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
14793 intcmp_mode = CCmode;
14798 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
14804 if (code == GE || !TARGET_IEEE_FP)
14806 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14811 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14812 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
14818 if (code == LE && TARGET_IEEE_FP)
14820 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14821 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14822 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14823 intcmp_mode = CCmode;
14828 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14834 if (code == EQ && TARGET_IEEE_FP)
14836 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14837 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14838 intcmp_mode = CCmode;
14843 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14849 if (code == NE && TARGET_IEEE_FP)
14851 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14852 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14858 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14864 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14868 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14873 gcc_unreachable ();
14881 /* Return the test that should be put into the flags user, i.e.
14882 the bcc, scc, or cmov instruction. */
14883 return gen_rtx_fmt_ee (code, VOIDmode,
14884 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14889 ix86_expand_compare (enum rtx_code code)
14892 op0 = ix86_compare_op0;
14893 op1 = ix86_compare_op1;
14895 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
14896 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
14898 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14900 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14901 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
14904 ret = ix86_expand_int_compare (code, op0, op1);
14910 ix86_expand_branch (enum rtx_code code, rtx label)
14914 switch (GET_MODE (ix86_compare_op0))
14923 tmp = ix86_expand_compare (code);
14924 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14925 gen_rtx_LABEL_REF (VOIDmode, label),
14927 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14934 /* Expand DImode branch into multiple compare+branch. */
14936 rtx lo[2], hi[2], label2;
14937 enum rtx_code code1, code2, code3;
14938 enum machine_mode submode;
14940 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14942 tmp = ix86_compare_op0;
14943 ix86_compare_op0 = ix86_compare_op1;
14944 ix86_compare_op1 = tmp;
14945 code = swap_condition (code);
14947 if (GET_MODE (ix86_compare_op0) == DImode)
14949 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14950 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14955 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14956 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14960 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14961 avoid two branches. This costs one extra insn, so disable when
14962 optimizing for size. */
14964 if ((code == EQ || code == NE)
14965 && (!optimize_insn_for_size_p ()
14966 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14971 if (hi[1] != const0_rtx)
14972 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14973 NULL_RTX, 0, OPTAB_WIDEN);
14976 if (lo[1] != const0_rtx)
14977 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14978 NULL_RTX, 0, OPTAB_WIDEN);
14980 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14981 NULL_RTX, 0, OPTAB_WIDEN);
14983 ix86_compare_op0 = tmp;
14984 ix86_compare_op1 = const0_rtx;
14985 ix86_expand_branch (code, label);
14989 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14990 op1 is a constant and the low word is zero, then we can just
14991 examine the high word. Similarly for low word -1 and
14992 less-or-equal-than or greater-than. */
14994 if (CONST_INT_P (hi[1]))
14997 case LT: case LTU: case GE: case GEU:
14998 if (lo[1] == const0_rtx)
15000 ix86_compare_op0 = hi[0];
15001 ix86_compare_op1 = hi[1];
15002 ix86_expand_branch (code, label);
15006 case LE: case LEU: case GT: case GTU:
15007 if (lo[1] == constm1_rtx)
15009 ix86_compare_op0 = hi[0];
15010 ix86_compare_op1 = hi[1];
15011 ix86_expand_branch (code, label);
15019 /* Otherwise, we need two or three jumps. */
15021 label2 = gen_label_rtx ();
15024 code2 = swap_condition (code);
15025 code3 = unsigned_condition (code);
15029 case LT: case GT: case LTU: case GTU:
15032 case LE: code1 = LT; code2 = GT; break;
15033 case GE: code1 = GT; code2 = LT; break;
15034 case LEU: code1 = LTU; code2 = GTU; break;
15035 case GEU: code1 = GTU; code2 = LTU; break;
15037 case EQ: code1 = UNKNOWN; code2 = NE; break;
15038 case NE: code2 = UNKNOWN; break;
15041 gcc_unreachable ();
15046 * if (hi(a) < hi(b)) goto true;
15047 * if (hi(a) > hi(b)) goto false;
15048 * if (lo(a) < lo(b)) goto true;
15052 ix86_compare_op0 = hi[0];
15053 ix86_compare_op1 = hi[1];
15055 if (code1 != UNKNOWN)
15056 ix86_expand_branch (code1, label);
15057 if (code2 != UNKNOWN)
15058 ix86_expand_branch (code2, label2);
15060 ix86_compare_op0 = lo[0];
15061 ix86_compare_op1 = lo[1];
15062 ix86_expand_branch (code3, label);
15064 if (code2 != UNKNOWN)
15065 emit_label (label2);
15070 /* If we have already emitted a compare insn, go straight to simple.
15071 ix86_expand_compare won't emit anything if ix86_compare_emitted
15073 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
15078 /* Split branch based on floating point condition. */
15080 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
15081 rtx target1, rtx target2, rtx tmp, rtx pushed)
15086 if (target2 != pc_rtx)
15089 code = reverse_condition_maybe_unordered (code);
15094 condition = ix86_expand_fp_compare (code, op1, op2,
15097 /* Remove pushed operand from stack. */
15099 ix86_free_from_memory (GET_MODE (pushed));
15101 i = emit_jump_insn (gen_rtx_SET
15103 gen_rtx_IF_THEN_ELSE (VOIDmode,
15104 condition, target1, target2)));
15105 if (split_branch_probability >= 0)
15106 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
15110 ix86_expand_setcc (enum rtx_code code, rtx dest)
15114 gcc_assert (GET_MODE (dest) == QImode);
15116 ret = ix86_expand_compare (code);
15117 PUT_MODE (ret, QImode);
15118 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
15121 /* Expand comparison setting or clearing carry flag. Return true when
15122 successful and set pop for the operation. */
15124 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15126 enum machine_mode mode =
15127 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15129 /* Do not handle DImode compares that go through special path. */
15130 if (mode == (TARGET_64BIT ? TImode : DImode))
15133 if (SCALAR_FLOAT_MODE_P (mode))
15135 rtx compare_op, compare_seq;
15137 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15139 /* Shortcut: following common codes never translate
15140 into carry flag compares. */
15141 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15142 || code == ORDERED || code == UNORDERED)
15145 /* These comparisons require zero flag; swap operands so they won't. */
15146 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15147 && !TARGET_IEEE_FP)
15152 code = swap_condition (code);
15155 /* Try to expand the comparison and verify that we end up with
15156 carry flag based comparison. This fails to be true only when
15157 we decide to expand comparison using arithmetic that is not
15158 too common scenario. */
15160 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15161 compare_seq = get_insns ();
15164 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15165 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15166 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15168 code = GET_CODE (compare_op);
15170 if (code != LTU && code != GEU)
15173 emit_insn (compare_seq);
15178 if (!INTEGRAL_MODE_P (mode))
15187 /* Convert a==0 into (unsigned)a<1. */
15190 if (op1 != const0_rtx)
15193 code = (code == EQ ? LTU : GEU);
15196 /* Convert a>b into b<a or a>=b-1. */
15199 if (CONST_INT_P (op1))
15201 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15202 /* Bail out on overflow. We still can swap operands but that
15203 would force loading of the constant into register. */
15204 if (op1 == const0_rtx
15205 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15207 code = (code == GTU ? GEU : LTU);
15214 code = (code == GTU ? LTU : GEU);
15218 /* Convert a>=0 into (unsigned)a<0x80000000. */
15221 if (mode == DImode || op1 != const0_rtx)
15223 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15224 code = (code == LT ? GEU : LTU);
15228 if (mode == DImode || op1 != constm1_rtx)
15230 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15231 code = (code == LE ? GEU : LTU);
15237 /* Swapping operands may cause constant to appear as first operand. */
15238 if (!nonimmediate_operand (op0, VOIDmode))
15240 if (!can_create_pseudo_p ())
15242 op0 = force_reg (mode, op0);
15244 ix86_compare_op0 = op0;
15245 ix86_compare_op1 = op1;
15246 *pop = ix86_expand_compare (code);
15247 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15252 ix86_expand_int_movcc (rtx operands[])
15254 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15255 rtx compare_seq, compare_op;
15256 enum machine_mode mode = GET_MODE (operands[0]);
15257 bool sign_bit_compare_p = false;;
15260 ix86_compare_op0 = XEXP (operands[1], 0);
15261 ix86_compare_op1 = XEXP (operands[1], 1);
15262 compare_op = ix86_expand_compare (code);
15263 compare_seq = get_insns ();
15266 compare_code = GET_CODE (compare_op);
15268 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15269 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15270 sign_bit_compare_p = true;
15272 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15273 HImode insns, we'd be swallowed in word prefix ops. */
15275 if ((mode != HImode || TARGET_FAST_PREFIX)
15276 && (mode != (TARGET_64BIT ? TImode : DImode))
15277 && CONST_INT_P (operands[2])
15278 && CONST_INT_P (operands[3]))
15280 rtx out = operands[0];
15281 HOST_WIDE_INT ct = INTVAL (operands[2]);
15282 HOST_WIDE_INT cf = INTVAL (operands[3]);
15283 HOST_WIDE_INT diff;
15286 /* Sign bit compares are better done using shifts than we do by using
15288 if (sign_bit_compare_p
15289 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15290 ix86_compare_op1, &compare_op))
15292 /* Detect overlap between destination and compare sources. */
15295 if (!sign_bit_compare_p)
15297 bool fpcmp = false;
15299 compare_code = GET_CODE (compare_op);
15301 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15302 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15305 compare_code = ix86_fp_compare_code_to_integer (compare_code);
15308 /* To simplify rest of code, restrict to the GEU case. */
15309 if (compare_code == LTU)
15311 HOST_WIDE_INT tmp = ct;
15314 compare_code = reverse_condition (compare_code);
15315 code = reverse_condition (code);
15320 PUT_CODE (compare_op,
15321 reverse_condition_maybe_unordered
15322 (GET_CODE (compare_op)));
15324 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
15328 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15329 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15330 tmp = gen_reg_rtx (mode);
15332 if (mode == DImode)
15333 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
15335 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
15339 if (code == GT || code == GE)
15340 code = reverse_condition (code);
15343 HOST_WIDE_INT tmp = ct;
15348 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15349 ix86_compare_op1, VOIDmode, 0, -1);
15362 tmp = expand_simple_binop (mode, PLUS,
15364 copy_rtx (tmp), 1, OPTAB_DIRECT);
15375 tmp = expand_simple_binop (mode, IOR,
15377 copy_rtx (tmp), 1, OPTAB_DIRECT);
15379 else if (diff == -1 && ct)
15389 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15391 tmp = expand_simple_binop (mode, PLUS,
15392 copy_rtx (tmp), GEN_INT (cf),
15393 copy_rtx (tmp), 1, OPTAB_DIRECT);
15401 * andl cf - ct, dest
15411 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15414 tmp = expand_simple_binop (mode, AND,
15416 gen_int_mode (cf - ct, mode),
15417 copy_rtx (tmp), 1, OPTAB_DIRECT);
15419 tmp = expand_simple_binop (mode, PLUS,
15420 copy_rtx (tmp), GEN_INT (ct),
15421 copy_rtx (tmp), 1, OPTAB_DIRECT);
15424 if (!rtx_equal_p (tmp, out))
15425 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15427 return 1; /* DONE */
15432 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15435 tmp = ct, ct = cf, cf = tmp;
15438 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15440 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15442 /* We may be reversing unordered compare to normal compare, that
15443 is not valid in general (we may convert non-trapping condition
15444 to trapping one), however on i386 we currently emit all
15445 comparisons unordered. */
15446 compare_code = reverse_condition_maybe_unordered (compare_code);
15447 code = reverse_condition_maybe_unordered (code);
15451 compare_code = reverse_condition (compare_code);
15452 code = reverse_condition (code);
15456 compare_code = UNKNOWN;
15457 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15458 && CONST_INT_P (ix86_compare_op1))
15460 if (ix86_compare_op1 == const0_rtx
15461 && (code == LT || code == GE))
15462 compare_code = code;
15463 else if (ix86_compare_op1 == constm1_rtx)
15467 else if (code == GT)
15472 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15473 if (compare_code != UNKNOWN
15474 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15475 && (cf == -1 || ct == -1))
15477 /* If lea code below could be used, only optimize
15478 if it results in a 2 insn sequence. */
15480 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15481 || diff == 3 || diff == 5 || diff == 9)
15482 || (compare_code == LT && ct == -1)
15483 || (compare_code == GE && cf == -1))
15486 * notl op1 (if necessary)
15494 code = reverse_condition (code);
15497 out = emit_store_flag (out, code, ix86_compare_op0,
15498 ix86_compare_op1, VOIDmode, 0, -1);
15500 out = expand_simple_binop (mode, IOR,
15502 out, 1, OPTAB_DIRECT);
15503 if (out != operands[0])
15504 emit_move_insn (operands[0], out);
15506 return 1; /* DONE */
15511 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15512 || diff == 3 || diff == 5 || diff == 9)
15513 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15515 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15521 * lea cf(dest*(ct-cf)),dest
15525 * This also catches the degenerate setcc-only case.
15531 out = emit_store_flag (out, code, ix86_compare_op0,
15532 ix86_compare_op1, VOIDmode, 0, 1);
15535 /* On x86_64 the lea instruction operates on Pmode, so we need
15536 to get arithmetics done in proper mode to match. */
15538 tmp = copy_rtx (out);
15542 out1 = copy_rtx (out);
15543 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15547 tmp = gen_rtx_PLUS (mode, tmp, out1);
15553 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15556 if (!rtx_equal_p (tmp, out))
15559 out = force_operand (tmp, copy_rtx (out));
15561 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15563 if (!rtx_equal_p (out, operands[0]))
15564 emit_move_insn (operands[0], copy_rtx (out));
15566 return 1; /* DONE */
15570 * General case: Jumpful:
15571 * xorl dest,dest cmpl op1, op2
15572 * cmpl op1, op2 movl ct, dest
15573 * setcc dest jcc 1f
15574 * decl dest movl cf, dest
15575 * andl (cf-ct),dest 1:
15578 * Size 20. Size 14.
15580 * This is reasonably steep, but branch mispredict costs are
15581 * high on modern cpus, so consider failing only if optimizing
15585 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15586 && BRANCH_COST (optimize_insn_for_speed_p (),
15591 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15596 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15598 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15600 /* We may be reversing unordered compare to normal compare,
15601 that is not valid in general (we may convert non-trapping
15602 condition to trapping one), however on i386 we currently
15603 emit all comparisons unordered. */
15604 code = reverse_condition_maybe_unordered (code);
15608 code = reverse_condition (code);
15609 if (compare_code != UNKNOWN)
15610 compare_code = reverse_condition (compare_code);
15614 if (compare_code != UNKNOWN)
15616 /* notl op1 (if needed)
15621 For x < 0 (resp. x <= -1) there will be no notl,
15622 so if possible swap the constants to get rid of the
15624 True/false will be -1/0 while code below (store flag
15625 followed by decrement) is 0/-1, so the constants need
15626 to be exchanged once more. */
15628 if (compare_code == GE || !cf)
15630 code = reverse_condition (code);
15635 HOST_WIDE_INT tmp = cf;
15640 out = emit_store_flag (out, code, ix86_compare_op0,
15641 ix86_compare_op1, VOIDmode, 0, -1);
15645 out = emit_store_flag (out, code, ix86_compare_op0,
15646 ix86_compare_op1, VOIDmode, 0, 1);
15648 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15649 copy_rtx (out), 1, OPTAB_DIRECT);
15652 out = expand_simple_binop (mode, AND, copy_rtx (out),
15653 gen_int_mode (cf - ct, mode),
15654 copy_rtx (out), 1, OPTAB_DIRECT);
15656 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15657 copy_rtx (out), 1, OPTAB_DIRECT);
15658 if (!rtx_equal_p (out, operands[0]))
15659 emit_move_insn (operands[0], copy_rtx (out));
15661 return 1; /* DONE */
15665 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15667 /* Try a few things more with specific constants and a variable. */
15670 rtx var, orig_out, out, tmp;
15672 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15673 return 0; /* FAIL */
15675 /* If one of the two operands is an interesting constant, load a
15676 constant with the above and mask it in with a logical operation. */
15678 if (CONST_INT_P (operands[2]))
15681 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15682 operands[3] = constm1_rtx, op = and_optab;
15683 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15684 operands[3] = const0_rtx, op = ior_optab;
15686 return 0; /* FAIL */
15688 else if (CONST_INT_P (operands[3]))
15691 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15692 operands[2] = constm1_rtx, op = and_optab;
15693 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15694 operands[2] = const0_rtx, op = ior_optab;
15696 return 0; /* FAIL */
15699 return 0; /* FAIL */
15701 orig_out = operands[0];
15702 tmp = gen_reg_rtx (mode);
15705 /* Recurse to get the constant loaded. */
15706 if (ix86_expand_int_movcc (operands) == 0)
15707 return 0; /* FAIL */
15709 /* Mask in the interesting variable. */
15710 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15712 if (!rtx_equal_p (out, orig_out))
15713 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15715 return 1; /* DONE */
15719 * For comparison with above,
15729 if (! nonimmediate_operand (operands[2], mode))
15730 operands[2] = force_reg (mode, operands[2]);
15731 if (! nonimmediate_operand (operands[3], mode))
15732 operands[3] = force_reg (mode, operands[3]);
15734 if (! register_operand (operands[2], VOIDmode)
15736 || ! register_operand (operands[3], VOIDmode)))
15737 operands[2] = force_reg (mode, operands[2]);
15740 && ! register_operand (operands[3], VOIDmode))
15741 operands[3] = force_reg (mode, operands[3]);
15743 emit_insn (compare_seq);
15744 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15745 gen_rtx_IF_THEN_ELSE (mode,
15746 compare_op, operands[2],
15749 return 1; /* DONE */
15752 /* Swap, force into registers, or otherwise massage the two operands
15753 to an sse comparison with a mask result. Thus we differ a bit from
15754 ix86_prepare_fp_compare_args which expects to produce a flags result.
15756 The DEST operand exists to help determine whether to commute commutative
15757 operators. The POP0/POP1 operands are updated in place. The new
15758 comparison code is returned, or UNKNOWN if not implementable. */
15760 static enum rtx_code
15761 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15762 rtx *pop0, rtx *pop1)
15770 /* We have no LTGT as an operator. We could implement it with
15771 NE & ORDERED, but this requires an extra temporary. It's
15772 not clear that it's worth it. */
15779 /* These are supported directly. */
15786 /* For commutative operators, try to canonicalize the destination
15787 operand to be first in the comparison - this helps reload to
15788 avoid extra moves. */
15789 if (!dest || !rtx_equal_p (dest, *pop1))
15797 /* These are not supported directly. Swap the comparison operands
15798 to transform into something that is supported. */
15802 code = swap_condition (code);
15806 gcc_unreachable ();
15812 /* Detect conditional moves that exactly match min/max operational
15813 semantics. Note that this is IEEE safe, as long as we don't
15814 interchange the operands.
15816 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15817 and TRUE if the operation is successful and instructions are emitted. */
15820 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15821 rtx cmp_op1, rtx if_true, rtx if_false)
15823 enum machine_mode mode;
15829 else if (code == UNGE)
15832 if_true = if_false;
15838 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15840 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15845 mode = GET_MODE (dest);
15847 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15848 but MODE may be a vector mode and thus not appropriate. */
15849 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15851 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15854 if_true = force_reg (mode, if_true);
15855 v = gen_rtvec (2, if_true, if_false);
15856 tmp = gen_rtx_UNSPEC (mode, v, u);
15860 code = is_min ? SMIN : SMAX;
15861 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15864 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15868 /* Expand an sse vector comparison. Return the register with the result. */
15871 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15872 rtx op_true, rtx op_false)
15874 enum machine_mode mode = GET_MODE (dest);
15877 cmp_op0 = force_reg (mode, cmp_op0);
15878 if (!nonimmediate_operand (cmp_op1, mode))
15879 cmp_op1 = force_reg (mode, cmp_op1);
15882 || reg_overlap_mentioned_p (dest, op_true)
15883 || reg_overlap_mentioned_p (dest, op_false))
15884 dest = gen_reg_rtx (mode);
15886 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15887 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15892 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15893 operations. This is used for both scalar and vector conditional moves. */
15896 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15898 enum machine_mode mode = GET_MODE (dest);
15901 if (op_false == CONST0_RTX (mode))
15903 op_true = force_reg (mode, op_true);
15904 x = gen_rtx_AND (mode, cmp, op_true);
15905 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15907 else if (op_true == CONST0_RTX (mode))
15909 op_false = force_reg (mode, op_false);
15910 x = gen_rtx_NOT (mode, cmp);
15911 x = gen_rtx_AND (mode, x, op_false);
15912 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15914 else if (TARGET_SSE5)
15916 rtx pcmov = gen_rtx_SET (mode, dest,
15917 gen_rtx_IF_THEN_ELSE (mode, cmp,
15924 op_true = force_reg (mode, op_true);
15925 op_false = force_reg (mode, op_false);
15927 t2 = gen_reg_rtx (mode);
15929 t3 = gen_reg_rtx (mode);
15933 x = gen_rtx_AND (mode, op_true, cmp);
15934 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15936 x = gen_rtx_NOT (mode, cmp);
15937 x = gen_rtx_AND (mode, x, op_false);
15938 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15940 x = gen_rtx_IOR (mode, t3, t2);
15941 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15945 /* Expand a floating-point conditional move. Return true if successful. */
15948 ix86_expand_fp_movcc (rtx operands[])
15950 enum machine_mode mode = GET_MODE (operands[0]);
15951 enum rtx_code code = GET_CODE (operands[1]);
15952 rtx tmp, compare_op;
15954 ix86_compare_op0 = XEXP (operands[1], 0);
15955 ix86_compare_op1 = XEXP (operands[1], 1);
15956 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15958 enum machine_mode cmode;
15960 /* Since we've no cmove for sse registers, don't force bad register
15961 allocation just to gain access to it. Deny movcc when the
15962 comparison mode doesn't match the move mode. */
15963 cmode = GET_MODE (ix86_compare_op0);
15964 if (cmode == VOIDmode)
15965 cmode = GET_MODE (ix86_compare_op1);
15969 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15971 &ix86_compare_op1);
15972 if (code == UNKNOWN)
15975 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15976 ix86_compare_op1, operands[2],
15980 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15981 ix86_compare_op1, operands[2], operands[3]);
15982 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15986 /* The floating point conditional move instructions don't directly
15987 support conditions resulting from a signed integer comparison. */
15989 compare_op = ix86_expand_compare (code);
15990 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15992 tmp = gen_reg_rtx (QImode);
15993 ix86_expand_setcc (code, tmp);
15995 ix86_compare_op0 = tmp;
15996 ix86_compare_op1 = const0_rtx;
15997 compare_op = ix86_expand_compare (code);
16000 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16001 gen_rtx_IF_THEN_ELSE (mode, compare_op,
16002 operands[2], operands[3])));
16007 /* Expand a floating-point vector conditional move; a vcond operation
16008 rather than a movcc operation. */
16011 ix86_expand_fp_vcond (rtx operands[])
16013 enum rtx_code code = GET_CODE (operands[3]);
16016 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16017 &operands[4], &operands[5]);
16018 if (code == UNKNOWN)
16021 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
16022 operands[5], operands[1], operands[2]))
16025 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
16026 operands[1], operands[2]);
16027 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
16031 /* Expand a signed/unsigned integral vector conditional move. */
16034 ix86_expand_int_vcond (rtx operands[])
16036 enum machine_mode mode = GET_MODE (operands[0]);
16037 enum rtx_code code = GET_CODE (operands[3]);
16038 bool negate = false;
16041 cop0 = operands[4];
16042 cop1 = operands[5];
16044 /* SSE5 supports all of the comparisons on all vector int types. */
16047 /* Canonicalize the comparison to EQ, GT, GTU. */
16058 code = reverse_condition (code);
16064 code = reverse_condition (code);
16070 code = swap_condition (code);
16071 x = cop0, cop0 = cop1, cop1 = x;
16075 gcc_unreachable ();
16078 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16079 if (mode == V2DImode)
16084 /* SSE4.1 supports EQ. */
16085 if (!TARGET_SSE4_1)
16091 /* SSE4.2 supports GT/GTU. */
16092 if (!TARGET_SSE4_2)
16097 gcc_unreachable ();
16101 /* Unsigned parallel compare is not supported by the hardware. Play some
16102 tricks to turn this into a signed comparison against 0. */
16105 cop0 = force_reg (mode, cop0);
16114 /* Perform a parallel modulo subtraction. */
16115 t1 = gen_reg_rtx (mode);
16116 emit_insn ((mode == V4SImode
16118 : gen_subv2di3) (t1, cop0, cop1));
16120 /* Extract the original sign bit of op0. */
16121 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16123 t2 = gen_reg_rtx (mode);
16124 emit_insn ((mode == V4SImode
16126 : gen_andv2di3) (t2, cop0, mask));
16128 /* XOR it back into the result of the subtraction. This results
16129 in the sign bit set iff we saw unsigned underflow. */
16130 x = gen_reg_rtx (mode);
16131 emit_insn ((mode == V4SImode
16133 : gen_xorv2di3) (x, t1, t2));
16141 /* Perform a parallel unsigned saturating subtraction. */
16142 x = gen_reg_rtx (mode);
16143 emit_insn (gen_rtx_SET (VOIDmode, x,
16144 gen_rtx_US_MINUS (mode, cop0, cop1)));
16151 gcc_unreachable ();
16155 cop1 = CONST0_RTX (mode);
16159 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16160 operands[1+negate], operands[2-negate]);
16162 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16163 operands[2-negate]);
16167 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16168 true if we should do zero extension, else sign extension. HIGH_P is
16169 true if we want the N/2 high elements, else the low elements. */
16172 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16174 enum machine_mode imode = GET_MODE (operands[1]);
16175 rtx (*unpack)(rtx, rtx, rtx);
16182 unpack = gen_vec_interleave_highv16qi;
16184 unpack = gen_vec_interleave_lowv16qi;
16188 unpack = gen_vec_interleave_highv8hi;
16190 unpack = gen_vec_interleave_lowv8hi;
16194 unpack = gen_vec_interleave_highv4si;
16196 unpack = gen_vec_interleave_lowv4si;
16199 gcc_unreachable ();
16202 dest = gen_lowpart (imode, operands[0]);
16205 se = force_reg (imode, CONST0_RTX (imode));
16207 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16208 operands[1], pc_rtx, pc_rtx);
16210 emit_insn (unpack (dest, operands[1], se));
16213 /* This function performs the same task as ix86_expand_sse_unpack,
16214 but with SSE4.1 instructions. */
16217 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16219 enum machine_mode imode = GET_MODE (operands[1]);
16220 rtx (*unpack)(rtx, rtx);
16227 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16229 unpack = gen_sse4_1_extendv8qiv8hi2;
16233 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16235 unpack = gen_sse4_1_extendv4hiv4si2;
16239 unpack = gen_sse4_1_zero_extendv2siv2di2;
16241 unpack = gen_sse4_1_extendv2siv2di2;
16244 gcc_unreachable ();
16247 dest = operands[0];
16250 /* Shift higher 8 bytes to lower 8 bytes. */
16251 src = gen_reg_rtx (imode);
16252 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
16253 gen_lowpart (TImode, operands[1]),
16259 emit_insn (unpack (dest, src));
16262 /* This function performs the same task as ix86_expand_sse_unpack,
16263 but with sse5 instructions. */
16266 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16268 enum machine_mode imode = GET_MODE (operands[1]);
16269 int pperm_bytes[16];
16271 int h = (high_p) ? 8 : 0;
16274 rtvec v = rtvec_alloc (16);
16277 rtx op0 = operands[0], op1 = operands[1];
16282 vs = rtvec_alloc (8);
16283 h2 = (high_p) ? 8 : 0;
16284 for (i = 0; i < 8; i++)
16286 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
16287 pperm_bytes[2*i+1] = ((unsigned_p)
16289 : PPERM_SIGN | PPERM_SRC2 | i | h);
16292 for (i = 0; i < 16; i++)
16293 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16295 for (i = 0; i < 8; i++)
16296 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16298 p = gen_rtx_PARALLEL (VOIDmode, vs);
16299 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16301 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
16303 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
16307 vs = rtvec_alloc (4);
16308 h2 = (high_p) ? 4 : 0;
16309 for (i = 0; i < 4; i++)
16311 sign_extend = ((unsigned_p)
16313 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
16314 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
16315 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
16316 pperm_bytes[4*i+2] = sign_extend;
16317 pperm_bytes[4*i+3] = sign_extend;
16320 for (i = 0; i < 16; i++)
16321 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16323 for (i = 0; i < 4; i++)
16324 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16326 p = gen_rtx_PARALLEL (VOIDmode, vs);
16327 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16329 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
16331 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
16335 vs = rtvec_alloc (2);
16336 h2 = (high_p) ? 2 : 0;
16337 for (i = 0; i < 2; i++)
16339 sign_extend = ((unsigned_p)
16341 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16342 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16343 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16344 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16345 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16346 pperm_bytes[8*i+4] = sign_extend;
16347 pperm_bytes[8*i+5] = sign_extend;
16348 pperm_bytes[8*i+6] = sign_extend;
16349 pperm_bytes[8*i+7] = sign_extend;
16352 for (i = 0; i < 16; i++)
16353 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16355 for (i = 0; i < 2; i++)
16356 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16358 p = gen_rtx_PARALLEL (VOIDmode, vs);
16359 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16361 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16363 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16367 gcc_unreachable ();
16373 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16374 next narrower integer vector type */
16376 ix86_expand_sse5_pack (rtx operands[3])
16378 enum machine_mode imode = GET_MODE (operands[0]);
16379 int pperm_bytes[16];
16381 rtvec v = rtvec_alloc (16);
16383 rtx op0 = operands[0];
16384 rtx op1 = operands[1];
16385 rtx op2 = operands[2];
16390 for (i = 0; i < 8; i++)
16392 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16393 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16396 for (i = 0; i < 16; i++)
16397 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16399 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16400 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16404 for (i = 0; i < 4; i++)
16406 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16407 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16408 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16409 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16412 for (i = 0; i < 16; i++)
16413 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16415 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16416 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16420 for (i = 0; i < 2; i++)
16422 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16423 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16424 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16425 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16426 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16427 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16428 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16429 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16432 for (i = 0; i < 16; i++)
16433 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16435 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16436 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16440 gcc_unreachable ();
16446 /* Expand conditional increment or decrement using adb/sbb instructions.
16447 The default case using setcc followed by the conditional move can be
16448 done by generic code. */
16450 ix86_expand_int_addcc (rtx operands[])
16452 enum rtx_code code = GET_CODE (operands[1]);
16454 rtx val = const0_rtx;
16455 bool fpcmp = false;
16456 enum machine_mode mode = GET_MODE (operands[0]);
16458 ix86_compare_op0 = XEXP (operands[1], 0);
16459 ix86_compare_op1 = XEXP (operands[1], 1);
16460 if (operands[3] != const1_rtx
16461 && operands[3] != constm1_rtx)
16463 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16464 ix86_compare_op1, &compare_op))
16466 code = GET_CODE (compare_op);
16468 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16469 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16472 code = ix86_fp_compare_code_to_integer (code);
16479 PUT_CODE (compare_op,
16480 reverse_condition_maybe_unordered
16481 (GET_CODE (compare_op)));
16483 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16485 PUT_MODE (compare_op, mode);
16487 /* Construct either adc or sbb insn. */
16488 if ((code == LTU) == (operands[3] == constm1_rtx))
16490 switch (GET_MODE (operands[0]))
16493 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16496 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16499 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16502 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16505 gcc_unreachable ();
16510 switch (GET_MODE (operands[0]))
16513 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16516 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16519 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16522 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16525 gcc_unreachable ();
16528 return 1; /* DONE */
16532 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16533 works for floating pointer parameters and nonoffsetable memories.
16534 For pushes, it returns just stack offsets; the values will be saved
16535 in the right order. Maximally three parts are generated. */
16538 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16543 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16545 size = (GET_MODE_SIZE (mode) + 4) / 8;
16547 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16548 gcc_assert (size >= 2 && size <= 4);
16550 /* Optimize constant pool reference to immediates. This is used by fp
16551 moves, that force all constants to memory to allow combining. */
16552 if (MEM_P (operand) && MEM_READONLY_P (operand))
16554 rtx tmp = maybe_get_pool_constant (operand);
16559 if (MEM_P (operand) && !offsettable_memref_p (operand))
16561 /* The only non-offsetable memories we handle are pushes. */
16562 int ok = push_operand (operand, VOIDmode);
16566 operand = copy_rtx (operand);
16567 PUT_MODE (operand, Pmode);
16568 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16572 if (GET_CODE (operand) == CONST_VECTOR)
16574 enum machine_mode imode = int_mode_for_mode (mode);
16575 /* Caution: if we looked through a constant pool memory above,
16576 the operand may actually have a different mode now. That's
16577 ok, since we want to pun this all the way back to an integer. */
16578 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16579 gcc_assert (operand != NULL);
16585 if (mode == DImode)
16586 split_di (&operand, 1, &parts[0], &parts[1]);
16591 if (REG_P (operand))
16593 gcc_assert (reload_completed);
16594 for (i = 0; i < size; i++)
16595 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16597 else if (offsettable_memref_p (operand))
16599 operand = adjust_address (operand, SImode, 0);
16600 parts[0] = operand;
16601 for (i = 1; i < size; i++)
16602 parts[i] = adjust_address (operand, SImode, 4 * i);
16604 else if (GET_CODE (operand) == CONST_DOUBLE)
16609 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16613 real_to_target (l, &r, mode);
16614 parts[3] = gen_int_mode (l[3], SImode);
16615 parts[2] = gen_int_mode (l[2], SImode);
16618 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16619 parts[2] = gen_int_mode (l[2], SImode);
16622 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16625 gcc_unreachable ();
16627 parts[1] = gen_int_mode (l[1], SImode);
16628 parts[0] = gen_int_mode (l[0], SImode);
16631 gcc_unreachable ();
16636 if (mode == TImode)
16637 split_ti (&operand, 1, &parts[0], &parts[1]);
16638 if (mode == XFmode || mode == TFmode)
16640 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16641 if (REG_P (operand))
16643 gcc_assert (reload_completed);
16644 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16645 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16647 else if (offsettable_memref_p (operand))
16649 operand = adjust_address (operand, DImode, 0);
16650 parts[0] = operand;
16651 parts[1] = adjust_address (operand, upper_mode, 8);
16653 else if (GET_CODE (operand) == CONST_DOUBLE)
16658 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16659 real_to_target (l, &r, mode);
16661 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16662 if (HOST_BITS_PER_WIDE_INT >= 64)
16665 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16666 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16669 parts[0] = immed_double_const (l[0], l[1], DImode);
16671 if (upper_mode == SImode)
16672 parts[1] = gen_int_mode (l[2], SImode);
16673 else if (HOST_BITS_PER_WIDE_INT >= 64)
16676 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16677 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16680 parts[1] = immed_double_const (l[2], l[3], DImode);
16683 gcc_unreachable ();
16690 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16691 Return false when normal moves are needed; true when all required
16692 insns have been emitted. Operands 2-4 contain the input values
16693 int the correct order; operands 5-7 contain the output values. */
16696 ix86_split_long_move (rtx operands[])
16701 int collisions = 0;
16702 enum machine_mode mode = GET_MODE (operands[0]);
16703 bool collisionparts[4];
16705 /* The DFmode expanders may ask us to move double.
16706 For 64bit target this is single move. By hiding the fact
16707 here we simplify i386.md splitters. */
16708 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16710 /* Optimize constant pool reference to immediates. This is used by
16711 fp moves, that force all constants to memory to allow combining. */
16713 if (MEM_P (operands[1])
16714 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16715 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16716 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16717 if (push_operand (operands[0], VOIDmode))
16719 operands[0] = copy_rtx (operands[0]);
16720 PUT_MODE (operands[0], Pmode);
16723 operands[0] = gen_lowpart (DImode, operands[0]);
16724 operands[1] = gen_lowpart (DImode, operands[1]);
16725 emit_move_insn (operands[0], operands[1]);
16729 /* The only non-offsettable memory we handle is push. */
16730 if (push_operand (operands[0], VOIDmode))
16733 gcc_assert (!MEM_P (operands[0])
16734 || offsettable_memref_p (operands[0]));
16736 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16737 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16739 /* When emitting push, take care for source operands on the stack. */
16740 if (push && MEM_P (operands[1])
16741 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16743 rtx src_base = XEXP (part[1][nparts - 1], 0);
16745 /* Compensate for the stack decrement by 4. */
16746 if (!TARGET_64BIT && nparts == 3
16747 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
16748 src_base = plus_constant (src_base, 4);
16750 /* src_base refers to the stack pointer and is
16751 automatically decreased by emitted push. */
16752 for (i = 0; i < nparts; i++)
16753 part[1][i] = change_address (part[1][i],
16754 GET_MODE (part[1][i]), src_base);
16757 /* We need to do copy in the right order in case an address register
16758 of the source overlaps the destination. */
16759 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16763 for (i = 0; i < nparts; i++)
16766 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16767 if (collisionparts[i])
16771 /* Collision in the middle part can be handled by reordering. */
16772 if (collisions == 1 && nparts == 3 && collisionparts [1])
16774 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16775 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16777 else if (collisions == 1
16779 && (collisionparts [1] || collisionparts [2]))
16781 if (collisionparts [1])
16783 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16784 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16788 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16789 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16793 /* If there are more collisions, we can't handle it by reordering.
16794 Do an lea to the last part and use only one colliding move. */
16795 else if (collisions > 1)
16801 base = part[0][nparts - 1];
16803 /* Handle the case when the last part isn't valid for lea.
16804 Happens in 64-bit mode storing the 12-byte XFmode. */
16805 if (GET_MODE (base) != Pmode)
16806 base = gen_rtx_REG (Pmode, REGNO (base));
16808 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16809 part[1][0] = replace_equiv_address (part[1][0], base);
16810 for (i = 1; i < nparts; i++)
16812 tmp = plus_constant (base, UNITS_PER_WORD * i);
16813 part[1][i] = replace_equiv_address (part[1][i], tmp);
16824 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16825 emit_insn (gen_addsi3 (stack_pointer_rtx,
16826 stack_pointer_rtx, GEN_INT (-4)));
16827 emit_move_insn (part[0][2], part[1][2]);
16829 else if (nparts == 4)
16831 emit_move_insn (part[0][3], part[1][3]);
16832 emit_move_insn (part[0][2], part[1][2]);
16837 /* In 64bit mode we don't have 32bit push available. In case this is
16838 register, it is OK - we will just use larger counterpart. We also
16839 retype memory - these comes from attempt to avoid REX prefix on
16840 moving of second half of TFmode value. */
16841 if (GET_MODE (part[1][1]) == SImode)
16843 switch (GET_CODE (part[1][1]))
16846 part[1][1] = adjust_address (part[1][1], DImode, 0);
16850 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16854 gcc_unreachable ();
16857 if (GET_MODE (part[1][0]) == SImode)
16858 part[1][0] = part[1][1];
16861 emit_move_insn (part[0][1], part[1][1]);
16862 emit_move_insn (part[0][0], part[1][0]);
16866 /* Choose correct order to not overwrite the source before it is copied. */
16867 if ((REG_P (part[0][0])
16868 && REG_P (part[1][1])
16869 && (REGNO (part[0][0]) == REGNO (part[1][1])
16871 && REGNO (part[0][0]) == REGNO (part[1][2]))
16873 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16875 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16877 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16879 operands[2 + i] = part[0][j];
16880 operands[6 + i] = part[1][j];
16885 for (i = 0; i < nparts; i++)
16887 operands[2 + i] = part[0][i];
16888 operands[6 + i] = part[1][i];
16892 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16893 if (optimize_insn_for_size_p ())
16895 for (j = 0; j < nparts - 1; j++)
16896 if (CONST_INT_P (operands[6 + j])
16897 && operands[6 + j] != const0_rtx
16898 && REG_P (operands[2 + j]))
16899 for (i = j; i < nparts - 1; i++)
16900 if (CONST_INT_P (operands[7 + i])
16901 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16902 operands[7 + i] = operands[2 + j];
16905 for (i = 0; i < nparts; i++)
16906 emit_move_insn (operands[2 + i], operands[6 + i]);
16911 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16912 left shift by a constant, either using a single shift or
16913 a sequence of add instructions. */
16916 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16920 emit_insn ((mode == DImode
16922 : gen_adddi3) (operand, operand, operand));
16924 else if (!optimize_insn_for_size_p ()
16925 && count * ix86_cost->add <= ix86_cost->shift_const)
16928 for (i=0; i<count; i++)
16930 emit_insn ((mode == DImode
16932 : gen_adddi3) (operand, operand, operand));
16936 emit_insn ((mode == DImode
16938 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16942 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16944 rtx low[2], high[2];
16946 const int single_width = mode == DImode ? 32 : 64;
16948 if (CONST_INT_P (operands[2]))
16950 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16951 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16953 if (count >= single_width)
16955 emit_move_insn (high[0], low[1]);
16956 emit_move_insn (low[0], const0_rtx);
16958 if (count > single_width)
16959 ix86_expand_ashl_const (high[0], count - single_width, mode);
16963 if (!rtx_equal_p (operands[0], operands[1]))
16964 emit_move_insn (operands[0], operands[1]);
16965 emit_insn ((mode == DImode
16967 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16968 ix86_expand_ashl_const (low[0], count, mode);
16973 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16975 if (operands[1] == const1_rtx)
16977 /* Assuming we've chosen a QImode capable registers, then 1 << N
16978 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16979 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16981 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16983 ix86_expand_clear (low[0]);
16984 ix86_expand_clear (high[0]);
16985 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16987 d = gen_lowpart (QImode, low[0]);
16988 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16989 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16990 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16992 d = gen_lowpart (QImode, high[0]);
16993 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16994 s = gen_rtx_NE (QImode, flags, const0_rtx);
16995 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16998 /* Otherwise, we can get the same results by manually performing
16999 a bit extract operation on bit 5/6, and then performing the two
17000 shifts. The two methods of getting 0/1 into low/high are exactly
17001 the same size. Avoiding the shift in the bit extract case helps
17002 pentium4 a bit; no one else seems to care much either way. */
17007 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
17008 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
17010 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
17011 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
17013 emit_insn ((mode == DImode
17015 : gen_lshrdi3) (high[0], high[0],
17016 GEN_INT (mode == DImode ? 5 : 6)));
17017 emit_insn ((mode == DImode
17019 : gen_anddi3) (high[0], high[0], const1_rtx));
17020 emit_move_insn (low[0], high[0]);
17021 emit_insn ((mode == DImode
17023 : gen_xordi3) (low[0], low[0], const1_rtx));
17026 emit_insn ((mode == DImode
17028 : gen_ashldi3) (low[0], low[0], operands[2]));
17029 emit_insn ((mode == DImode
17031 : gen_ashldi3) (high[0], high[0], operands[2]));
17035 if (operands[1] == constm1_rtx)
17037 /* For -1 << N, we can avoid the shld instruction, because we
17038 know that we're shifting 0...31/63 ones into a -1. */
17039 emit_move_insn (low[0], constm1_rtx);
17040 if (optimize_insn_for_size_p ())
17041 emit_move_insn (high[0], low[0]);
17043 emit_move_insn (high[0], constm1_rtx);
17047 if (!rtx_equal_p (operands[0], operands[1]))
17048 emit_move_insn (operands[0], operands[1]);
17050 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17051 emit_insn ((mode == DImode
17053 : gen_x86_64_shld) (high[0], low[0], operands[2]));
17056 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
17058 if (TARGET_CMOVE && scratch)
17060 ix86_expand_clear (scratch);
17061 emit_insn ((mode == DImode
17062 ? gen_x86_shift_adj_1
17063 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
17067 emit_insn ((mode == DImode
17068 ? gen_x86_shift_adj_2
17069 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
17073 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17075 rtx low[2], high[2];
17077 const int single_width = mode == DImode ? 32 : 64;
17079 if (CONST_INT_P (operands[2]))
17081 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17082 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17084 if (count == single_width * 2 - 1)
17086 emit_move_insn (high[0], high[1]);
17087 emit_insn ((mode == DImode
17089 : gen_ashrdi3) (high[0], high[0],
17090 GEN_INT (single_width - 1)));
17091 emit_move_insn (low[0], high[0]);
17094 else if (count >= single_width)
17096 emit_move_insn (low[0], high[1]);
17097 emit_move_insn (high[0], low[0]);
17098 emit_insn ((mode == DImode
17100 : gen_ashrdi3) (high[0], high[0],
17101 GEN_INT (single_width - 1)));
17102 if (count > single_width)
17103 emit_insn ((mode == DImode
17105 : gen_ashrdi3) (low[0], low[0],
17106 GEN_INT (count - single_width)));
17110 if (!rtx_equal_p (operands[0], operands[1]))
17111 emit_move_insn (operands[0], operands[1]);
17112 emit_insn ((mode == DImode
17114 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17115 emit_insn ((mode == DImode
17117 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17122 if (!rtx_equal_p (operands[0], operands[1]))
17123 emit_move_insn (operands[0], operands[1]);
17125 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17127 emit_insn ((mode == DImode
17129 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17130 emit_insn ((mode == DImode
17132 : gen_ashrdi3) (high[0], high[0], operands[2]));
17134 if (TARGET_CMOVE && scratch)
17136 emit_move_insn (scratch, high[0]);
17137 emit_insn ((mode == DImode
17139 : gen_ashrdi3) (scratch, scratch,
17140 GEN_INT (single_width - 1)));
17141 emit_insn ((mode == DImode
17142 ? gen_x86_shift_adj_1
17143 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17147 emit_insn ((mode == DImode
17148 ? gen_x86_shift_adj_3
17149 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17154 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17156 rtx low[2], high[2];
17158 const int single_width = mode == DImode ? 32 : 64;
17160 if (CONST_INT_P (operands[2]))
17162 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17163 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17165 if (count >= single_width)
17167 emit_move_insn (low[0], high[1]);
17168 ix86_expand_clear (high[0]);
17170 if (count > single_width)
17171 emit_insn ((mode == DImode
17173 : gen_lshrdi3) (low[0], low[0],
17174 GEN_INT (count - single_width)));
17178 if (!rtx_equal_p (operands[0], operands[1]))
17179 emit_move_insn (operands[0], operands[1]);
17180 emit_insn ((mode == DImode
17182 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17183 emit_insn ((mode == DImode
17185 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17190 if (!rtx_equal_p (operands[0], operands[1]))
17191 emit_move_insn (operands[0], operands[1]);
17193 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17195 emit_insn ((mode == DImode
17197 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17198 emit_insn ((mode == DImode
17200 : gen_lshrdi3) (high[0], high[0], operands[2]));
17202 /* Heh. By reversing the arguments, we can reuse this pattern. */
17203 if (TARGET_CMOVE && scratch)
17205 ix86_expand_clear (scratch);
17206 emit_insn ((mode == DImode
17207 ? gen_x86_shift_adj_1
17208 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17212 emit_insn ((mode == DImode
17213 ? gen_x86_shift_adj_2
17214 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17218 /* Predict just emitted jump instruction to be taken with probability PROB. */
17220 predict_jump (int prob)
17222 rtx insn = get_last_insn ();
17223 gcc_assert (JUMP_P (insn));
17224 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17227 /* Helper function for the string operations below. Dest VARIABLE whether
17228 it is aligned to VALUE bytes. If true, jump to the label. */
17230 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17232 rtx label = gen_label_rtx ();
17233 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17234 if (GET_MODE (variable) == DImode)
17235 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17237 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17238 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17241 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17243 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17247 /* Adjust COUNTER by the VALUE. */
17249 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17251 if (GET_MODE (countreg) == DImode)
17252 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17254 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17257 /* Zero extend possibly SImode EXP to Pmode register. */
17259 ix86_zero_extend_to_Pmode (rtx exp)
17262 if (GET_MODE (exp) == VOIDmode)
17263 return force_reg (Pmode, exp);
17264 if (GET_MODE (exp) == Pmode)
17265 return copy_to_mode_reg (Pmode, exp);
17266 r = gen_reg_rtx (Pmode);
17267 emit_insn (gen_zero_extendsidi2 (r, exp));
17271 /* Divide COUNTREG by SCALE. */
17273 scale_counter (rtx countreg, int scale)
17276 rtx piece_size_mask;
17280 if (CONST_INT_P (countreg))
17281 return GEN_INT (INTVAL (countreg) / scale);
17282 gcc_assert (REG_P (countreg));
17284 piece_size_mask = GEN_INT (scale - 1);
17285 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17286 GEN_INT (exact_log2 (scale)),
17287 NULL, 1, OPTAB_DIRECT);
17291 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17292 DImode for constant loop counts. */
17294 static enum machine_mode
17295 counter_mode (rtx count_exp)
17297 if (GET_MODE (count_exp) != VOIDmode)
17298 return GET_MODE (count_exp);
17299 if (!CONST_INT_P (count_exp))
17301 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17306 /* When SRCPTR is non-NULL, output simple loop to move memory
17307 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17308 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17309 equivalent loop to set memory by VALUE (supposed to be in MODE).
17311 The size is rounded down to whole number of chunk size moved at once.
17312 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17316 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17317 rtx destptr, rtx srcptr, rtx value,
17318 rtx count, enum machine_mode mode, int unroll,
17321 rtx out_label, top_label, iter, tmp;
17322 enum machine_mode iter_mode = counter_mode (count);
17323 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17324 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17330 top_label = gen_label_rtx ();
17331 out_label = gen_label_rtx ();
17332 iter = gen_reg_rtx (iter_mode);
17334 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17335 NULL, 1, OPTAB_DIRECT);
17336 /* Those two should combine. */
17337 if (piece_size == const1_rtx)
17339 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17341 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17343 emit_move_insn (iter, const0_rtx);
17345 emit_label (top_label);
17347 tmp = convert_modes (Pmode, iter_mode, iter, true);
17348 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17349 destmem = change_address (destmem, mode, x_addr);
17353 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17354 srcmem = change_address (srcmem, mode, y_addr);
17356 /* When unrolling for chips that reorder memory reads and writes,
17357 we can save registers by using single temporary.
17358 Also using 4 temporaries is overkill in 32bit mode. */
17359 if (!TARGET_64BIT && 0)
17361 for (i = 0; i < unroll; i++)
17366 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17368 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17370 emit_move_insn (destmem, srcmem);
17376 gcc_assert (unroll <= 4);
17377 for (i = 0; i < unroll; i++)
17379 tmpreg[i] = gen_reg_rtx (mode);
17383 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17385 emit_move_insn (tmpreg[i], srcmem);
17387 for (i = 0; i < unroll; i++)
17392 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17394 emit_move_insn (destmem, tmpreg[i]);
17399 for (i = 0; i < unroll; i++)
17403 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17404 emit_move_insn (destmem, value);
17407 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17408 true, OPTAB_LIB_WIDEN);
17410 emit_move_insn (iter, tmp);
17412 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17414 if (expected_size != -1)
17416 expected_size /= GET_MODE_SIZE (mode) * unroll;
17417 if (expected_size == 0)
17419 else if (expected_size > REG_BR_PROB_BASE)
17420 predict_jump (REG_BR_PROB_BASE - 1);
17422 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17425 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17426 iter = ix86_zero_extend_to_Pmode (iter);
17427 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17428 true, OPTAB_LIB_WIDEN);
17429 if (tmp != destptr)
17430 emit_move_insn (destptr, tmp);
17433 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17434 true, OPTAB_LIB_WIDEN);
17436 emit_move_insn (srcptr, tmp);
17438 emit_label (out_label);
17441 /* Output "rep; mov" instruction.
17442 Arguments have same meaning as for previous function */
17444 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17445 rtx destptr, rtx srcptr,
17447 enum machine_mode mode)
17453 /* If the size is known, it is shorter to use rep movs. */
17454 if (mode == QImode && CONST_INT_P (count)
17455 && !(INTVAL (count) & 3))
17458 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17459 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17460 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17461 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17462 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17463 if (mode != QImode)
17465 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17466 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17467 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17468 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17469 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17470 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17474 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17475 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17477 if (CONST_INT_P (count))
17479 count = GEN_INT (INTVAL (count)
17480 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17481 destmem = shallow_copy_rtx (destmem);
17482 srcmem = shallow_copy_rtx (srcmem);
17483 set_mem_size (destmem, count);
17484 set_mem_size (srcmem, count);
17488 if (MEM_SIZE (destmem))
17489 set_mem_size (destmem, NULL_RTX);
17490 if (MEM_SIZE (srcmem))
17491 set_mem_size (srcmem, NULL_RTX);
17493 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17497 /* Output "rep; stos" instruction.
17498 Arguments have same meaning as for previous function */
17500 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17501 rtx count, enum machine_mode mode,
17507 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17508 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17509 value = force_reg (mode, gen_lowpart (mode, value));
17510 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17511 if (mode != QImode)
17513 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17514 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17515 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17518 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17519 if (orig_value == const0_rtx && CONST_INT_P (count))
17521 count = GEN_INT (INTVAL (count)
17522 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17523 destmem = shallow_copy_rtx (destmem);
17524 set_mem_size (destmem, count);
17526 else if (MEM_SIZE (destmem))
17527 set_mem_size (destmem, NULL_RTX);
17528 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17532 emit_strmov (rtx destmem, rtx srcmem,
17533 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17535 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17536 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17537 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17540 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17542 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17543 rtx destptr, rtx srcptr, rtx count, int max_size)
17546 if (CONST_INT_P (count))
17548 HOST_WIDE_INT countval = INTVAL (count);
17551 if ((countval & 0x10) && max_size > 16)
17555 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17556 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17559 gcc_unreachable ();
17562 if ((countval & 0x08) && max_size > 8)
17565 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17568 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17569 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17573 if ((countval & 0x04) && max_size > 4)
17575 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17578 if ((countval & 0x02) && max_size > 2)
17580 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17583 if ((countval & 0x01) && max_size > 1)
17585 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17592 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17593 count, 1, OPTAB_DIRECT);
17594 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17595 count, QImode, 1, 4);
17599 /* When there are stringops, we can cheaply increase dest and src pointers.
17600 Otherwise we save code size by maintaining offset (zero is readily
17601 available from preceding rep operation) and using x86 addressing modes.
17603 if (TARGET_SINGLE_STRINGOP)
17607 rtx label = ix86_expand_aligntest (count, 4, true);
17608 src = change_address (srcmem, SImode, srcptr);
17609 dest = change_address (destmem, SImode, destptr);
17610 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17611 emit_label (label);
17612 LABEL_NUSES (label) = 1;
17616 rtx label = ix86_expand_aligntest (count, 2, true);
17617 src = change_address (srcmem, HImode, srcptr);
17618 dest = change_address (destmem, HImode, destptr);
17619 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17620 emit_label (label);
17621 LABEL_NUSES (label) = 1;
17625 rtx label = ix86_expand_aligntest (count, 1, true);
17626 src = change_address (srcmem, QImode, srcptr);
17627 dest = change_address (destmem, QImode, destptr);
17628 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17629 emit_label (label);
17630 LABEL_NUSES (label) = 1;
17635 rtx offset = force_reg (Pmode, const0_rtx);
17640 rtx label = ix86_expand_aligntest (count, 4, true);
17641 src = change_address (srcmem, SImode, srcptr);
17642 dest = change_address (destmem, SImode, destptr);
17643 emit_move_insn (dest, src);
17644 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17645 true, OPTAB_LIB_WIDEN);
17647 emit_move_insn (offset, tmp);
17648 emit_label (label);
17649 LABEL_NUSES (label) = 1;
17653 rtx label = ix86_expand_aligntest (count, 2, true);
17654 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17655 src = change_address (srcmem, HImode, tmp);
17656 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17657 dest = change_address (destmem, HImode, tmp);
17658 emit_move_insn (dest, src);
17659 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17660 true, OPTAB_LIB_WIDEN);
17662 emit_move_insn (offset, tmp);
17663 emit_label (label);
17664 LABEL_NUSES (label) = 1;
17668 rtx label = ix86_expand_aligntest (count, 1, true);
17669 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17670 src = change_address (srcmem, QImode, tmp);
17671 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17672 dest = change_address (destmem, QImode, tmp);
17673 emit_move_insn (dest, src);
17674 emit_label (label);
17675 LABEL_NUSES (label) = 1;
17680 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17682 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17683 rtx count, int max_size)
17686 expand_simple_binop (counter_mode (count), AND, count,
17687 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17688 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17689 gen_lowpart (QImode, value), count, QImode,
17693 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17695 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17699 if (CONST_INT_P (count))
17701 HOST_WIDE_INT countval = INTVAL (count);
17704 if ((countval & 0x10) && max_size > 16)
17708 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17709 emit_insn (gen_strset (destptr, dest, value));
17710 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17711 emit_insn (gen_strset (destptr, dest, value));
17714 gcc_unreachable ();
17717 if ((countval & 0x08) && max_size > 8)
17721 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17722 emit_insn (gen_strset (destptr, dest, value));
17726 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17727 emit_insn (gen_strset (destptr, dest, value));
17728 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17729 emit_insn (gen_strset (destptr, dest, value));
17733 if ((countval & 0x04) && max_size > 4)
17735 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17736 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17739 if ((countval & 0x02) && max_size > 2)
17741 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17742 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17745 if ((countval & 0x01) && max_size > 1)
17747 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17748 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17755 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17760 rtx label = ix86_expand_aligntest (count, 16, true);
17763 dest = change_address (destmem, DImode, destptr);
17764 emit_insn (gen_strset (destptr, dest, value));
17765 emit_insn (gen_strset (destptr, dest, value));
17769 dest = change_address (destmem, SImode, destptr);
17770 emit_insn (gen_strset (destptr, dest, value));
17771 emit_insn (gen_strset (destptr, dest, value));
17772 emit_insn (gen_strset (destptr, dest, value));
17773 emit_insn (gen_strset (destptr, dest, value));
17775 emit_label (label);
17776 LABEL_NUSES (label) = 1;
17780 rtx label = ix86_expand_aligntest (count, 8, true);
17783 dest = change_address (destmem, DImode, destptr);
17784 emit_insn (gen_strset (destptr, dest, value));
17788 dest = change_address (destmem, SImode, destptr);
17789 emit_insn (gen_strset (destptr, dest, value));
17790 emit_insn (gen_strset (destptr, dest, value));
17792 emit_label (label);
17793 LABEL_NUSES (label) = 1;
17797 rtx label = ix86_expand_aligntest (count, 4, true);
17798 dest = change_address (destmem, SImode, destptr);
17799 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17800 emit_label (label);
17801 LABEL_NUSES (label) = 1;
17805 rtx label = ix86_expand_aligntest (count, 2, true);
17806 dest = change_address (destmem, HImode, destptr);
17807 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17808 emit_label (label);
17809 LABEL_NUSES (label) = 1;
17813 rtx label = ix86_expand_aligntest (count, 1, true);
17814 dest = change_address (destmem, QImode, destptr);
17815 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17816 emit_label (label);
17817 LABEL_NUSES (label) = 1;
17821 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17822 DESIRED_ALIGNMENT. */
17824 expand_movmem_prologue (rtx destmem, rtx srcmem,
17825 rtx destptr, rtx srcptr, rtx count,
17826 int align, int desired_alignment)
17828 if (align <= 1 && desired_alignment > 1)
17830 rtx label = ix86_expand_aligntest (destptr, 1, false);
17831 srcmem = change_address (srcmem, QImode, srcptr);
17832 destmem = change_address (destmem, QImode, destptr);
17833 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17834 ix86_adjust_counter (count, 1);
17835 emit_label (label);
17836 LABEL_NUSES (label) = 1;
17838 if (align <= 2 && desired_alignment > 2)
17840 rtx label = ix86_expand_aligntest (destptr, 2, false);
17841 srcmem = change_address (srcmem, HImode, srcptr);
17842 destmem = change_address (destmem, HImode, destptr);
17843 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17844 ix86_adjust_counter (count, 2);
17845 emit_label (label);
17846 LABEL_NUSES (label) = 1;
17848 if (align <= 4 && desired_alignment > 4)
17850 rtx label = ix86_expand_aligntest (destptr, 4, false);
17851 srcmem = change_address (srcmem, SImode, srcptr);
17852 destmem = change_address (destmem, SImode, destptr);
17853 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17854 ix86_adjust_counter (count, 4);
17855 emit_label (label);
17856 LABEL_NUSES (label) = 1;
17858 gcc_assert (desired_alignment <= 8);
17861 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17862 ALIGN_BYTES is how many bytes need to be copied. */
17864 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17865 int desired_align, int align_bytes)
17868 rtx src_size, dst_size;
17870 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17871 if (src_align_bytes >= 0)
17872 src_align_bytes = desired_align - src_align_bytes;
17873 src_size = MEM_SIZE (src);
17874 dst_size = MEM_SIZE (dst);
17875 if (align_bytes & 1)
17877 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17878 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17880 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17882 if (align_bytes & 2)
17884 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17885 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17886 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17887 set_mem_align (dst, 2 * BITS_PER_UNIT);
17888 if (src_align_bytes >= 0
17889 && (src_align_bytes & 1) == (align_bytes & 1)
17890 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17891 set_mem_align (src, 2 * BITS_PER_UNIT);
17893 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17895 if (align_bytes & 4)
17897 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17898 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17899 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17900 set_mem_align (dst, 4 * BITS_PER_UNIT);
17901 if (src_align_bytes >= 0)
17903 unsigned int src_align = 0;
17904 if ((src_align_bytes & 3) == (align_bytes & 3))
17906 else if ((src_align_bytes & 1) == (align_bytes & 1))
17908 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17909 set_mem_align (src, src_align * BITS_PER_UNIT);
17912 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17914 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17915 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17916 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17917 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17918 if (src_align_bytes >= 0)
17920 unsigned int src_align = 0;
17921 if ((src_align_bytes & 7) == (align_bytes & 7))
17923 else if ((src_align_bytes & 3) == (align_bytes & 3))
17925 else if ((src_align_bytes & 1) == (align_bytes & 1))
17927 if (src_align > (unsigned int) desired_align)
17928 src_align = desired_align;
17929 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17930 set_mem_align (src, src_align * BITS_PER_UNIT);
17933 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17935 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17940 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17941 DESIRED_ALIGNMENT. */
17943 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17944 int align, int desired_alignment)
17946 if (align <= 1 && desired_alignment > 1)
17948 rtx label = ix86_expand_aligntest (destptr, 1, false);
17949 destmem = change_address (destmem, QImode, destptr);
17950 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17951 ix86_adjust_counter (count, 1);
17952 emit_label (label);
17953 LABEL_NUSES (label) = 1;
17955 if (align <= 2 && desired_alignment > 2)
17957 rtx label = ix86_expand_aligntest (destptr, 2, false);
17958 destmem = change_address (destmem, HImode, destptr);
17959 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17960 ix86_adjust_counter (count, 2);
17961 emit_label (label);
17962 LABEL_NUSES (label) = 1;
17964 if (align <= 4 && desired_alignment > 4)
17966 rtx label = ix86_expand_aligntest (destptr, 4, false);
17967 destmem = change_address (destmem, SImode, destptr);
17968 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17969 ix86_adjust_counter (count, 4);
17970 emit_label (label);
17971 LABEL_NUSES (label) = 1;
17973 gcc_assert (desired_alignment <= 8);
17976 /* Set enough from DST to align DST known to by aligned by ALIGN to
17977 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17979 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17980 int desired_align, int align_bytes)
17983 rtx dst_size = MEM_SIZE (dst);
17984 if (align_bytes & 1)
17986 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17988 emit_insn (gen_strset (destreg, dst,
17989 gen_lowpart (QImode, value)));
17991 if (align_bytes & 2)
17993 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17994 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17995 set_mem_align (dst, 2 * BITS_PER_UNIT);
17997 emit_insn (gen_strset (destreg, dst,
17998 gen_lowpart (HImode, value)));
18000 if (align_bytes & 4)
18002 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18003 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18004 set_mem_align (dst, 4 * BITS_PER_UNIT);
18006 emit_insn (gen_strset (destreg, dst,
18007 gen_lowpart (SImode, value)));
18009 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18010 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18011 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18013 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18017 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
18018 static enum stringop_alg
18019 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
18020 int *dynamic_check)
18022 const struct stringop_algs * algs;
18023 bool optimize_for_speed;
18024 /* Algorithms using the rep prefix want at least edi and ecx;
18025 additionally, memset wants eax and memcpy wants esi. Don't
18026 consider such algorithms if the user has appropriated those
18027 registers for their own purposes. */
18028 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
18030 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
18032 #define ALG_USABLE_P(alg) (rep_prefix_usable \
18033 || (alg != rep_prefix_1_byte \
18034 && alg != rep_prefix_4_byte \
18035 && alg != rep_prefix_8_byte))
18036 const struct processor_costs *cost;
18038 /* Even if the string operation call is cold, we still might spend a lot
18039 of time processing large blocks. */
18040 if (optimize_function_for_size_p (cfun)
18041 || (optimize_insn_for_size_p ()
18042 && expected_size != -1 && expected_size < 256))
18043 optimize_for_speed = false;
18045 optimize_for_speed = true;
18047 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
18049 *dynamic_check = -1;
18051 algs = &cost->memset[TARGET_64BIT != 0];
18053 algs = &cost->memcpy[TARGET_64BIT != 0];
18054 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18055 return stringop_alg;
18056 /* rep; movq or rep; movl is the smallest variant. */
18057 else if (!optimize_for_speed)
18059 if (!count || (count & 3))
18060 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18062 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18064 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18066 else if (expected_size != -1 && expected_size < 4)
18067 return loop_1_byte;
18068 else if (expected_size != -1)
18071 enum stringop_alg alg = libcall;
18072 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18074 /* We get here if the algorithms that were not libcall-based
18075 were rep-prefix based and we are unable to use rep prefixes
18076 based on global register usage. Break out of the loop and
18077 use the heuristic below. */
18078 if (algs->size[i].max == 0)
18080 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18082 enum stringop_alg candidate = algs->size[i].alg;
18084 if (candidate != libcall && ALG_USABLE_P (candidate))
18086 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18087 last non-libcall inline algorithm. */
18088 if (TARGET_INLINE_ALL_STRINGOPS)
18090 /* When the current size is best to be copied by a libcall,
18091 but we are still forced to inline, run the heuristic below
18092 that will pick code for medium sized blocks. */
18093 if (alg != libcall)
18097 else if (ALG_USABLE_P (candidate))
18101 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18103 /* When asked to inline the call anyway, try to pick meaningful choice.
18104 We look for maximal size of block that is faster to copy by hand and
18105 take blocks of at most of that size guessing that average size will
18106 be roughly half of the block.
18108 If this turns out to be bad, we might simply specify the preferred
18109 choice in ix86_costs. */
18110 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18111 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18114 enum stringop_alg alg;
18116 bool any_alg_usable_p = true;
18118 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18120 enum stringop_alg candidate = algs->size[i].alg;
18121 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18123 if (candidate != libcall && candidate
18124 && ALG_USABLE_P (candidate))
18125 max = algs->size[i].max;
18127 /* If there aren't any usable algorithms, then recursing on
18128 smaller sizes isn't going to find anything. Just return the
18129 simple byte-at-a-time copy loop. */
18130 if (!any_alg_usable_p)
18132 /* Pick something reasonable. */
18133 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18134 *dynamic_check = 128;
18135 return loop_1_byte;
18139 alg = decide_alg (count, max / 2, memset, dynamic_check);
18140 gcc_assert (*dynamic_check == -1);
18141 gcc_assert (alg != libcall);
18142 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18143 *dynamic_check = max;
18146 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18147 #undef ALG_USABLE_P
18150 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18151 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18153 decide_alignment (int align,
18154 enum stringop_alg alg,
18157 int desired_align = 0;
18161 gcc_unreachable ();
18163 case unrolled_loop:
18164 desired_align = GET_MODE_SIZE (Pmode);
18166 case rep_prefix_8_byte:
18169 case rep_prefix_4_byte:
18170 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18171 copying whole cacheline at once. */
18172 if (TARGET_PENTIUMPRO)
18177 case rep_prefix_1_byte:
18178 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18179 copying whole cacheline at once. */
18180 if (TARGET_PENTIUMPRO)
18194 if (desired_align < align)
18195 desired_align = align;
18196 if (expected_size != -1 && expected_size < 4)
18197 desired_align = align;
18198 return desired_align;
18201 /* Return the smallest power of 2 greater than VAL. */
18203 smallest_pow2_greater_than (int val)
18211 /* Expand string move (memcpy) operation. Use i386 string operations when
18212 profitable. expand_setmem contains similar code. The code depends upon
18213 architecture, block size and alignment, but always has the same
18216 1) Prologue guard: Conditional that jumps up to epilogues for small
18217 blocks that can be handled by epilogue alone. This is faster but
18218 also needed for correctness, since prologue assume the block is larger
18219 than the desired alignment.
18221 Optional dynamic check for size and libcall for large
18222 blocks is emitted here too, with -minline-stringops-dynamically.
18224 2) Prologue: copy first few bytes in order to get destination aligned
18225 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18226 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18227 We emit either a jump tree on power of two sized blocks, or a byte loop.
18229 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18230 with specified algorithm.
18232 4) Epilogue: code copying tail of the block that is too small to be
18233 handled by main body (or up to size guarded by prologue guard). */
18236 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18237 rtx expected_align_exp, rtx expected_size_exp)
18243 rtx jump_around_label = NULL;
18244 HOST_WIDE_INT align = 1;
18245 unsigned HOST_WIDE_INT count = 0;
18246 HOST_WIDE_INT expected_size = -1;
18247 int size_needed = 0, epilogue_size_needed;
18248 int desired_align = 0, align_bytes = 0;
18249 enum stringop_alg alg;
18251 bool need_zero_guard = false;
18253 if (CONST_INT_P (align_exp))
18254 align = INTVAL (align_exp);
18255 /* i386 can do misaligned access on reasonably increased cost. */
18256 if (CONST_INT_P (expected_align_exp)
18257 && INTVAL (expected_align_exp) > align)
18258 align = INTVAL (expected_align_exp);
18259 /* ALIGN is the minimum of destination and source alignment, but we care here
18260 just about destination alignment. */
18261 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18262 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18264 if (CONST_INT_P (count_exp))
18265 count = expected_size = INTVAL (count_exp);
18266 if (CONST_INT_P (expected_size_exp) && count == 0)
18267 expected_size = INTVAL (expected_size_exp);
18269 /* Make sure we don't need to care about overflow later on. */
18270 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18273 /* Step 0: Decide on preferred algorithm, desired alignment and
18274 size of chunks to be copied by main loop. */
18276 alg = decide_alg (count, expected_size, false, &dynamic_check);
18277 desired_align = decide_alignment (align, alg, expected_size);
18279 if (!TARGET_ALIGN_STRINGOPS)
18280 align = desired_align;
18282 if (alg == libcall)
18284 gcc_assert (alg != no_stringop);
18286 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18287 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18288 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18293 gcc_unreachable ();
18295 need_zero_guard = true;
18296 size_needed = GET_MODE_SIZE (Pmode);
18298 case unrolled_loop:
18299 need_zero_guard = true;
18300 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18302 case rep_prefix_8_byte:
18305 case rep_prefix_4_byte:
18308 case rep_prefix_1_byte:
18312 need_zero_guard = true;
18317 epilogue_size_needed = size_needed;
18319 /* Step 1: Prologue guard. */
18321 /* Alignment code needs count to be in register. */
18322 if (CONST_INT_P (count_exp) && desired_align > align)
18324 if (INTVAL (count_exp) > desired_align
18325 && INTVAL (count_exp) > size_needed)
18328 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18329 if (align_bytes <= 0)
18332 align_bytes = desired_align - align_bytes;
18334 if (align_bytes == 0)
18335 count_exp = force_reg (counter_mode (count_exp), count_exp);
18337 gcc_assert (desired_align >= 1 && align >= 1);
18339 /* Ensure that alignment prologue won't copy past end of block. */
18340 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18342 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18343 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18344 Make sure it is power of 2. */
18345 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18349 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18351 /* If main algorithm works on QImode, no epilogue is needed.
18352 For small sizes just don't align anything. */
18353 if (size_needed == 1)
18354 desired_align = align;
18361 label = gen_label_rtx ();
18362 emit_cmp_and_jump_insns (count_exp,
18363 GEN_INT (epilogue_size_needed),
18364 LTU, 0, counter_mode (count_exp), 1, label);
18365 if (expected_size == -1 || expected_size < epilogue_size_needed)
18366 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18368 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18372 /* Emit code to decide on runtime whether library call or inline should be
18374 if (dynamic_check != -1)
18376 if (CONST_INT_P (count_exp))
18378 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18380 emit_block_move_via_libcall (dst, src, count_exp, false);
18381 count_exp = const0_rtx;
18387 rtx hot_label = gen_label_rtx ();
18388 jump_around_label = gen_label_rtx ();
18389 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18390 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18391 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18392 emit_block_move_via_libcall (dst, src, count_exp, false);
18393 emit_jump (jump_around_label);
18394 emit_label (hot_label);
18398 /* Step 2: Alignment prologue. */
18400 if (desired_align > align)
18402 if (align_bytes == 0)
18404 /* Except for the first move in epilogue, we no longer know
18405 constant offset in aliasing info. It don't seems to worth
18406 the pain to maintain it for the first move, so throw away
18408 src = change_address (src, BLKmode, srcreg);
18409 dst = change_address (dst, BLKmode, destreg);
18410 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18415 /* If we know how many bytes need to be stored before dst is
18416 sufficiently aligned, maintain aliasing info accurately. */
18417 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18418 desired_align, align_bytes);
18419 count_exp = plus_constant (count_exp, -align_bytes);
18420 count -= align_bytes;
18422 if (need_zero_guard
18423 && (count < (unsigned HOST_WIDE_INT) size_needed
18424 || (align_bytes == 0
18425 && count < ((unsigned HOST_WIDE_INT) size_needed
18426 + desired_align - align))))
18428 /* It is possible that we copied enough so the main loop will not
18430 gcc_assert (size_needed > 1);
18431 if (label == NULL_RTX)
18432 label = gen_label_rtx ();
18433 emit_cmp_and_jump_insns (count_exp,
18434 GEN_INT (size_needed),
18435 LTU, 0, counter_mode (count_exp), 1, label);
18436 if (expected_size == -1
18437 || expected_size < (desired_align - align) / 2 + size_needed)
18438 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18440 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18443 if (label && size_needed == 1)
18445 emit_label (label);
18446 LABEL_NUSES (label) = 1;
18448 epilogue_size_needed = 1;
18450 else if (label == NULL_RTX)
18451 epilogue_size_needed = size_needed;
18453 /* Step 3: Main loop. */
18459 gcc_unreachable ();
18461 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18462 count_exp, QImode, 1, expected_size);
18465 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18466 count_exp, Pmode, 1, expected_size);
18468 case unrolled_loop:
18469 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18470 registers for 4 temporaries anyway. */
18471 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18472 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18475 case rep_prefix_8_byte:
18476 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18479 case rep_prefix_4_byte:
18480 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18483 case rep_prefix_1_byte:
18484 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18488 /* Adjust properly the offset of src and dest memory for aliasing. */
18489 if (CONST_INT_P (count_exp))
18491 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18492 (count / size_needed) * size_needed);
18493 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18494 (count / size_needed) * size_needed);
18498 src = change_address (src, BLKmode, srcreg);
18499 dst = change_address (dst, BLKmode, destreg);
18502 /* Step 4: Epilogue to copy the remaining bytes. */
18506 /* When the main loop is done, COUNT_EXP might hold original count,
18507 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18508 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18509 bytes. Compensate if needed. */
18511 if (size_needed < epilogue_size_needed)
18514 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18515 GEN_INT (size_needed - 1), count_exp, 1,
18517 if (tmp != count_exp)
18518 emit_move_insn (count_exp, tmp);
18520 emit_label (label);
18521 LABEL_NUSES (label) = 1;
18524 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18525 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18526 epilogue_size_needed);
18527 if (jump_around_label)
18528 emit_label (jump_around_label);
18532 /* Helper function for memcpy. For QImode value 0xXY produce
18533 0xXYXYXYXY of wide specified by MODE. This is essentially
18534 a * 0x10101010, but we can do slightly better than
18535 synth_mult by unwinding the sequence by hand on CPUs with
18538 promote_duplicated_reg (enum machine_mode mode, rtx val)
18540 enum machine_mode valmode = GET_MODE (val);
18542 int nops = mode == DImode ? 3 : 2;
18544 gcc_assert (mode == SImode || mode == DImode);
18545 if (val == const0_rtx)
18546 return copy_to_mode_reg (mode, const0_rtx);
18547 if (CONST_INT_P (val))
18549 HOST_WIDE_INT v = INTVAL (val) & 255;
18553 if (mode == DImode)
18554 v |= (v << 16) << 16;
18555 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18558 if (valmode == VOIDmode)
18560 if (valmode != QImode)
18561 val = gen_lowpart (QImode, val);
18562 if (mode == QImode)
18564 if (!TARGET_PARTIAL_REG_STALL)
18566 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18567 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18568 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18569 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18571 rtx reg = convert_modes (mode, QImode, val, true);
18572 tmp = promote_duplicated_reg (mode, const1_rtx);
18573 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18578 rtx reg = convert_modes (mode, QImode, val, true);
18580 if (!TARGET_PARTIAL_REG_STALL)
18581 if (mode == SImode)
18582 emit_insn (gen_movsi_insv_1 (reg, reg));
18584 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18587 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18588 NULL, 1, OPTAB_DIRECT);
18590 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18592 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18593 NULL, 1, OPTAB_DIRECT);
18594 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18595 if (mode == SImode)
18597 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18598 NULL, 1, OPTAB_DIRECT);
18599 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18604 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18605 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18606 alignment from ALIGN to DESIRED_ALIGN. */
18608 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18613 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18614 promoted_val = promote_duplicated_reg (DImode, val);
18615 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18616 promoted_val = promote_duplicated_reg (SImode, val);
18617 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18618 promoted_val = promote_duplicated_reg (HImode, val);
18620 promoted_val = val;
18622 return promoted_val;
18625 /* Expand string clear operation (bzero). Use i386 string operations when
18626 profitable. See expand_movmem comment for explanation of individual
18627 steps performed. */
18629 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18630 rtx expected_align_exp, rtx expected_size_exp)
18635 rtx jump_around_label = NULL;
18636 HOST_WIDE_INT align = 1;
18637 unsigned HOST_WIDE_INT count = 0;
18638 HOST_WIDE_INT expected_size = -1;
18639 int size_needed = 0, epilogue_size_needed;
18640 int desired_align = 0, align_bytes = 0;
18641 enum stringop_alg alg;
18642 rtx promoted_val = NULL;
18643 bool force_loopy_epilogue = false;
18645 bool need_zero_guard = false;
18647 if (CONST_INT_P (align_exp))
18648 align = INTVAL (align_exp);
18649 /* i386 can do misaligned access on reasonably increased cost. */
18650 if (CONST_INT_P (expected_align_exp)
18651 && INTVAL (expected_align_exp) > align)
18652 align = INTVAL (expected_align_exp);
18653 if (CONST_INT_P (count_exp))
18654 count = expected_size = INTVAL (count_exp);
18655 if (CONST_INT_P (expected_size_exp) && count == 0)
18656 expected_size = INTVAL (expected_size_exp);
18658 /* Make sure we don't need to care about overflow later on. */
18659 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18662 /* Step 0: Decide on preferred algorithm, desired alignment and
18663 size of chunks to be copied by main loop. */
18665 alg = decide_alg (count, expected_size, true, &dynamic_check);
18666 desired_align = decide_alignment (align, alg, expected_size);
18668 if (!TARGET_ALIGN_STRINGOPS)
18669 align = desired_align;
18671 if (alg == libcall)
18673 gcc_assert (alg != no_stringop);
18675 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18676 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18681 gcc_unreachable ();
18683 need_zero_guard = true;
18684 size_needed = GET_MODE_SIZE (Pmode);
18686 case unrolled_loop:
18687 need_zero_guard = true;
18688 size_needed = GET_MODE_SIZE (Pmode) * 4;
18690 case rep_prefix_8_byte:
18693 case rep_prefix_4_byte:
18696 case rep_prefix_1_byte:
18700 need_zero_guard = true;
18704 epilogue_size_needed = size_needed;
18706 /* Step 1: Prologue guard. */
18708 /* Alignment code needs count to be in register. */
18709 if (CONST_INT_P (count_exp) && desired_align > align)
18711 if (INTVAL (count_exp) > desired_align
18712 && INTVAL (count_exp) > size_needed)
18715 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18716 if (align_bytes <= 0)
18719 align_bytes = desired_align - align_bytes;
18721 if (align_bytes == 0)
18723 enum machine_mode mode = SImode;
18724 if (TARGET_64BIT && (count & ~0xffffffff))
18726 count_exp = force_reg (mode, count_exp);
18729 /* Do the cheap promotion to allow better CSE across the
18730 main loop and epilogue (ie one load of the big constant in the
18731 front of all code. */
18732 if (CONST_INT_P (val_exp))
18733 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18734 desired_align, align);
18735 /* Ensure that alignment prologue won't copy past end of block. */
18736 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18738 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18739 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18740 Make sure it is power of 2. */
18741 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18743 /* To improve performance of small blocks, we jump around the VAL
18744 promoting mode. This mean that if the promoted VAL is not constant,
18745 we might not use it in the epilogue and have to use byte
18747 if (epilogue_size_needed > 2 && !promoted_val)
18748 force_loopy_epilogue = true;
18751 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18753 /* If main algorithm works on QImode, no epilogue is needed.
18754 For small sizes just don't align anything. */
18755 if (size_needed == 1)
18756 desired_align = align;
18763 label = gen_label_rtx ();
18764 emit_cmp_and_jump_insns (count_exp,
18765 GEN_INT (epilogue_size_needed),
18766 LTU, 0, counter_mode (count_exp), 1, label);
18767 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18768 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18770 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18773 if (dynamic_check != -1)
18775 rtx hot_label = gen_label_rtx ();
18776 jump_around_label = gen_label_rtx ();
18777 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18778 LEU, 0, counter_mode (count_exp), 1, hot_label);
18779 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18780 set_storage_via_libcall (dst, count_exp, val_exp, false);
18781 emit_jump (jump_around_label);
18782 emit_label (hot_label);
18785 /* Step 2: Alignment prologue. */
18787 /* Do the expensive promotion once we branched off the small blocks. */
18789 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18790 desired_align, align);
18791 gcc_assert (desired_align >= 1 && align >= 1);
18793 if (desired_align > align)
18795 if (align_bytes == 0)
18797 /* Except for the first move in epilogue, we no longer know
18798 constant offset in aliasing info. It don't seems to worth
18799 the pain to maintain it for the first move, so throw away
18801 dst = change_address (dst, BLKmode, destreg);
18802 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18807 /* If we know how many bytes need to be stored before dst is
18808 sufficiently aligned, maintain aliasing info accurately. */
18809 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18810 desired_align, align_bytes);
18811 count_exp = plus_constant (count_exp, -align_bytes);
18812 count -= align_bytes;
18814 if (need_zero_guard
18815 && (count < (unsigned HOST_WIDE_INT) size_needed
18816 || (align_bytes == 0
18817 && count < ((unsigned HOST_WIDE_INT) size_needed
18818 + desired_align - align))))
18820 /* It is possible that we copied enough so the main loop will not
18822 gcc_assert (size_needed > 1);
18823 if (label == NULL_RTX)
18824 label = gen_label_rtx ();
18825 emit_cmp_and_jump_insns (count_exp,
18826 GEN_INT (size_needed),
18827 LTU, 0, counter_mode (count_exp), 1, label);
18828 if (expected_size == -1
18829 || expected_size < (desired_align - align) / 2 + size_needed)
18830 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18832 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18835 if (label && size_needed == 1)
18837 emit_label (label);
18838 LABEL_NUSES (label) = 1;
18840 promoted_val = val_exp;
18841 epilogue_size_needed = 1;
18843 else if (label == NULL_RTX)
18844 epilogue_size_needed = size_needed;
18846 /* Step 3: Main loop. */
18852 gcc_unreachable ();
18854 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18855 count_exp, QImode, 1, expected_size);
18858 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18859 count_exp, Pmode, 1, expected_size);
18861 case unrolled_loop:
18862 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18863 count_exp, Pmode, 4, expected_size);
18865 case rep_prefix_8_byte:
18866 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18869 case rep_prefix_4_byte:
18870 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18873 case rep_prefix_1_byte:
18874 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18878 /* Adjust properly the offset of src and dest memory for aliasing. */
18879 if (CONST_INT_P (count_exp))
18880 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18881 (count / size_needed) * size_needed);
18883 dst = change_address (dst, BLKmode, destreg);
18885 /* Step 4: Epilogue to copy the remaining bytes. */
18889 /* When the main loop is done, COUNT_EXP might hold original count,
18890 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18891 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18892 bytes. Compensate if needed. */
18894 if (size_needed < epilogue_size_needed)
18897 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18898 GEN_INT (size_needed - 1), count_exp, 1,
18900 if (tmp != count_exp)
18901 emit_move_insn (count_exp, tmp);
18903 emit_label (label);
18904 LABEL_NUSES (label) = 1;
18907 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18909 if (force_loopy_epilogue)
18910 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18911 epilogue_size_needed);
18913 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18914 epilogue_size_needed);
18916 if (jump_around_label)
18917 emit_label (jump_around_label);
18921 /* Expand the appropriate insns for doing strlen if not just doing
18924 out = result, initialized with the start address
18925 align_rtx = alignment of the address.
18926 scratch = scratch register, initialized with the startaddress when
18927 not aligned, otherwise undefined
18929 This is just the body. It needs the initializations mentioned above and
18930 some address computing at the end. These things are done in i386.md. */
18933 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18937 rtx align_2_label = NULL_RTX;
18938 rtx align_3_label = NULL_RTX;
18939 rtx align_4_label = gen_label_rtx ();
18940 rtx end_0_label = gen_label_rtx ();
18942 rtx tmpreg = gen_reg_rtx (SImode);
18943 rtx scratch = gen_reg_rtx (SImode);
18947 if (CONST_INT_P (align_rtx))
18948 align = INTVAL (align_rtx);
18950 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18952 /* Is there a known alignment and is it less than 4? */
18955 rtx scratch1 = gen_reg_rtx (Pmode);
18956 emit_move_insn (scratch1, out);
18957 /* Is there a known alignment and is it not 2? */
18960 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18961 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18963 /* Leave just the 3 lower bits. */
18964 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18965 NULL_RTX, 0, OPTAB_WIDEN);
18967 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18968 Pmode, 1, align_4_label);
18969 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18970 Pmode, 1, align_2_label);
18971 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18972 Pmode, 1, align_3_label);
18976 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18977 check if is aligned to 4 - byte. */
18979 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18980 NULL_RTX, 0, OPTAB_WIDEN);
18982 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18983 Pmode, 1, align_4_label);
18986 mem = change_address (src, QImode, out);
18988 /* Now compare the bytes. */
18990 /* Compare the first n unaligned byte on a byte per byte basis. */
18991 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18992 QImode, 1, end_0_label);
18994 /* Increment the address. */
18995 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18997 /* Not needed with an alignment of 2 */
19000 emit_label (align_2_label);
19002 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19005 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19007 emit_label (align_3_label);
19010 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19013 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19016 /* Generate loop to check 4 bytes at a time. It is not a good idea to
19017 align this loop. It gives only huge programs, but does not help to
19019 emit_label (align_4_label);
19021 mem = change_address (src, SImode, out);
19022 emit_move_insn (scratch, mem);
19023 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
19025 /* This formula yields a nonzero result iff one of the bytes is zero.
19026 This saves three branches inside loop and many cycles. */
19028 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
19029 emit_insn (gen_one_cmplsi2 (scratch, scratch));
19030 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
19031 emit_insn (gen_andsi3 (tmpreg, tmpreg,
19032 gen_int_mode (0x80808080, SImode)));
19033 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
19038 rtx reg = gen_reg_rtx (SImode);
19039 rtx reg2 = gen_reg_rtx (Pmode);
19040 emit_move_insn (reg, tmpreg);
19041 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
19043 /* If zero is not in the first two bytes, move two bytes forward. */
19044 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19045 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19046 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19047 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
19048 gen_rtx_IF_THEN_ELSE (SImode, tmp,
19051 /* Emit lea manually to avoid clobbering of flags. */
19052 emit_insn (gen_rtx_SET (SImode, reg2,
19053 gen_rtx_PLUS (Pmode, out, const2_rtx)));
19055 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19056 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19057 emit_insn (gen_rtx_SET (VOIDmode, out,
19058 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19065 rtx end_2_label = gen_label_rtx ();
19066 /* Is zero in the first two bytes? */
19068 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19069 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19070 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19071 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19072 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19074 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19075 JUMP_LABEL (tmp) = end_2_label;
19077 /* Not in the first two. Move two bytes forward. */
19078 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19079 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19081 emit_label (end_2_label);
19085 /* Avoid branch in fixing the byte. */
19086 tmpreg = gen_lowpart (QImode, tmpreg);
19087 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19088 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
19089 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
19091 emit_label (end_0_label);
19094 /* Expand strlen. */
19097 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19099 rtx addr, scratch1, scratch2, scratch3, scratch4;
19101 /* The generic case of strlen expander is long. Avoid it's
19102 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19104 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19105 && !TARGET_INLINE_ALL_STRINGOPS
19106 && !optimize_insn_for_size_p ()
19107 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19110 addr = force_reg (Pmode, XEXP (src, 0));
19111 scratch1 = gen_reg_rtx (Pmode);
19113 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19114 && !optimize_insn_for_size_p ())
19116 /* Well it seems that some optimizer does not combine a call like
19117 foo(strlen(bar), strlen(bar));
19118 when the move and the subtraction is done here. It does calculate
19119 the length just once when these instructions are done inside of
19120 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19121 often used and I use one fewer register for the lifetime of
19122 output_strlen_unroll() this is better. */
19124 emit_move_insn (out, addr);
19126 ix86_expand_strlensi_unroll_1 (out, src, align);
19128 /* strlensi_unroll_1 returns the address of the zero at the end of
19129 the string, like memchr(), so compute the length by subtracting
19130 the start address. */
19131 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19137 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19138 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19141 scratch2 = gen_reg_rtx (Pmode);
19142 scratch3 = gen_reg_rtx (Pmode);
19143 scratch4 = force_reg (Pmode, constm1_rtx);
19145 emit_move_insn (scratch3, addr);
19146 eoschar = force_reg (QImode, eoschar);
19148 src = replace_equiv_address_nv (src, scratch3);
19150 /* If .md starts supporting :P, this can be done in .md. */
19151 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19152 scratch4), UNSPEC_SCAS);
19153 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19154 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19155 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19160 /* For given symbol (function) construct code to compute address of it's PLT
19161 entry in large x86-64 PIC model. */
19163 construct_plt_address (rtx symbol)
19165 rtx tmp = gen_reg_rtx (Pmode);
19166 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19168 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19169 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19171 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19172 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19177 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19179 rtx pop, int sibcall)
19181 rtx use = NULL, call;
19183 if (pop == const0_rtx)
19185 gcc_assert (!TARGET_64BIT || !pop);
19187 if (TARGET_MACHO && !TARGET_64BIT)
19190 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19191 fnaddr = machopic_indirect_call_target (fnaddr);
19196 /* Static functions and indirect calls don't need the pic register. */
19197 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19198 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19199 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19200 use_reg (&use, pic_offset_table_rtx);
19203 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19205 rtx al = gen_rtx_REG (QImode, AX_REG);
19206 emit_move_insn (al, callarg2);
19207 use_reg (&use, al);
19210 if (ix86_cmodel == CM_LARGE_PIC
19212 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19213 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19214 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19216 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
19217 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
19219 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19220 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19223 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19225 call = gen_rtx_SET (VOIDmode, retval, call);
19228 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19229 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19230 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19233 && ix86_cfun_abi () == MS_ABI
19234 && (!callarg2 || INTVAL (callarg2) != -2))
19236 /* We need to represent that SI and DI registers are clobbered
19238 static int clobbered_registers[] = {
19239 XMM6_REG, XMM7_REG, XMM8_REG,
19240 XMM9_REG, XMM10_REG, XMM11_REG,
19241 XMM12_REG, XMM13_REG, XMM14_REG,
19242 XMM15_REG, SI_REG, DI_REG
19245 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19246 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19247 UNSPEC_MS_TO_SYSV_CALL);
19251 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19252 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19255 (SSE_REGNO_P (clobbered_registers[i])
19257 clobbered_registers[i]));
19259 call = gen_rtx_PARALLEL (VOIDmode,
19260 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19264 call = emit_call_insn (call);
19266 CALL_INSN_FUNCTION_USAGE (call) = use;
19270 /* Clear stack slot assignments remembered from previous functions.
19271 This is called from INIT_EXPANDERS once before RTL is emitted for each
19274 static struct machine_function *
19275 ix86_init_machine_status (void)
19277 struct machine_function *f;
19279 f = GGC_CNEW (struct machine_function);
19280 f->use_fast_prologue_epilogue_nregs = -1;
19281 f->tls_descriptor_call_expanded_p = 0;
19282 f->call_abi = ix86_abi;
19287 /* Return a MEM corresponding to a stack slot with mode MODE.
19288 Allocate a new slot if necessary.
19290 The RTL for a function can have several slots available: N is
19291 which slot to use. */
19294 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19296 struct stack_local_entry *s;
19298 gcc_assert (n < MAX_386_STACK_LOCALS);
19300 /* Virtual slot is valid only before vregs are instantiated. */
19301 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19303 for (s = ix86_stack_locals; s; s = s->next)
19304 if (s->mode == mode && s->n == n)
19305 return copy_rtx (s->rtl);
19307 s = (struct stack_local_entry *)
19308 ggc_alloc (sizeof (struct stack_local_entry));
19311 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19313 s->next = ix86_stack_locals;
19314 ix86_stack_locals = s;
19318 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19320 static GTY(()) rtx ix86_tls_symbol;
19322 ix86_tls_get_addr (void)
19325 if (!ix86_tls_symbol)
19327 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19328 (TARGET_ANY_GNU_TLS
19330 ? "___tls_get_addr"
19331 : "__tls_get_addr");
19334 return ix86_tls_symbol;
19337 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19339 static GTY(()) rtx ix86_tls_module_base_symbol;
19341 ix86_tls_module_base (void)
19344 if (!ix86_tls_module_base_symbol)
19346 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19347 "_TLS_MODULE_BASE_");
19348 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19349 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19352 return ix86_tls_module_base_symbol;
19355 /* Calculate the length of the memory address in the instruction
19356 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19359 memory_address_length (rtx addr)
19361 struct ix86_address parts;
19362 rtx base, index, disp;
19366 if (GET_CODE (addr) == PRE_DEC
19367 || GET_CODE (addr) == POST_INC
19368 || GET_CODE (addr) == PRE_MODIFY
19369 || GET_CODE (addr) == POST_MODIFY)
19372 ok = ix86_decompose_address (addr, &parts);
19375 if (parts.base && GET_CODE (parts.base) == SUBREG)
19376 parts.base = SUBREG_REG (parts.base);
19377 if (parts.index && GET_CODE (parts.index) == SUBREG)
19378 parts.index = SUBREG_REG (parts.index);
19381 index = parts.index;
19386 - esp as the base always wants an index,
19387 - ebp as the base always wants a displacement,
19388 - r12 as the base always wants an index,
19389 - r13 as the base always wants a displacement. */
19391 /* Register Indirect. */
19392 if (base && !index && !disp)
19394 /* esp (for its index) and ebp (for its displacement) need
19395 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
19398 && (addr == arg_pointer_rtx
19399 || addr == frame_pointer_rtx
19400 || REGNO (addr) == SP_REG
19401 || REGNO (addr) == BP_REG
19402 || REGNO (addr) == R12_REG
19403 || REGNO (addr) == R13_REG))
19407 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
19408 is not disp32, but disp32(%rip), so for disp32
19409 SIB byte is needed, unless print_operand_address
19410 optimizes it into disp32(%rip) or (%rip) is implied
19412 else if (disp && !base && !index)
19419 if (GET_CODE (disp) == CONST)
19420 symbol = XEXP (disp, 0);
19421 if (GET_CODE (symbol) == PLUS
19422 && CONST_INT_P (XEXP (symbol, 1)))
19423 symbol = XEXP (symbol, 0);
19425 if (GET_CODE (symbol) != LABEL_REF
19426 && (GET_CODE (symbol) != SYMBOL_REF
19427 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
19428 && (GET_CODE (symbol) != UNSPEC
19429 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
19430 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
19437 /* Find the length of the displacement constant. */
19440 if (base && satisfies_constraint_K (disp))
19445 /* ebp always wants a displacement. Similarly r13. */
19446 else if (base && REG_P (base)
19447 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
19450 /* An index requires the two-byte modrm form.... */
19452 /* ...like esp (or r12), which always wants an index. */
19453 || base == arg_pointer_rtx
19454 || base == frame_pointer_rtx
19455 || (base && REG_P (base)
19456 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
19473 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19474 is set, expect that insn have 8bit immediate alternative. */
19476 ix86_attr_length_immediate_default (rtx insn, int shortform)
19480 extract_insn_cached (insn);
19481 for (i = recog_data.n_operands - 1; i >= 0; --i)
19482 if (CONSTANT_P (recog_data.operand[i]))
19484 enum attr_mode mode = get_attr_mode (insn);
19487 if (shortform && CONST_INT_P (recog_data.operand[i]))
19489 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
19496 ival = trunc_int_for_mode (ival, HImode);
19499 ival = trunc_int_for_mode (ival, SImode);
19504 if (IN_RANGE (ival, -128, 127))
19521 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19526 fatal_insn ("unknown insn mode", insn);
19531 /* Compute default value for "length_address" attribute. */
19533 ix86_attr_length_address_default (rtx insn)
19537 if (get_attr_type (insn) == TYPE_LEA)
19539 rtx set = PATTERN (insn), addr;
19541 if (GET_CODE (set) == PARALLEL)
19542 set = XVECEXP (set, 0, 0);
19544 gcc_assert (GET_CODE (set) == SET);
19546 addr = SET_SRC (set);
19547 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
19549 if (GET_CODE (addr) == ZERO_EXTEND)
19550 addr = XEXP (addr, 0);
19551 if (GET_CODE (addr) == SUBREG)
19552 addr = SUBREG_REG (addr);
19555 return memory_address_length (addr);
19558 extract_insn_cached (insn);
19559 for (i = recog_data.n_operands - 1; i >= 0; --i)
19560 if (MEM_P (recog_data.operand[i]))
19562 constrain_operands_cached (reload_completed);
19563 if (which_alternative != -1)
19565 const char *constraints = recog_data.constraints[i];
19566 int alt = which_alternative;
19568 while (*constraints == '=' || *constraints == '+')
19571 while (*constraints++ != ',')
19573 /* Skip ignored operands. */
19574 if (*constraints == 'X')
19577 return memory_address_length (XEXP (recog_data.operand[i], 0));
19582 /* Compute default value for "length_vex" attribute. It includes
19583 2 or 3 byte VEX prefix and 1 opcode byte. */
19586 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19591 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19592 byte VEX prefix. */
19593 if (!has_0f_opcode || has_vex_w)
19596 /* We can always use 2 byte VEX prefix in 32bit. */
19600 extract_insn_cached (insn);
19602 for (i = recog_data.n_operands - 1; i >= 0; --i)
19603 if (REG_P (recog_data.operand[i]))
19605 /* REX.W bit uses 3 byte VEX prefix. */
19606 if (GET_MODE (recog_data.operand[i]) == DImode
19607 && GENERAL_REG_P (recog_data.operand[i]))
19612 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19613 if (MEM_P (recog_data.operand[i])
19614 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19621 /* Return the maximum number of instructions a cpu can issue. */
19624 ix86_issue_rate (void)
19628 case PROCESSOR_PENTIUM:
19629 case PROCESSOR_ATOM:
19633 case PROCESSOR_PENTIUMPRO:
19634 case PROCESSOR_PENTIUM4:
19635 case PROCESSOR_ATHLON:
19637 case PROCESSOR_AMDFAM10:
19638 case PROCESSOR_NOCONA:
19639 case PROCESSOR_GENERIC32:
19640 case PROCESSOR_GENERIC64:
19643 case PROCESSOR_CORE2:
19651 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19652 by DEP_INSN and nothing set by DEP_INSN. */
19655 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19659 /* Simplify the test for uninteresting insns. */
19660 if (insn_type != TYPE_SETCC
19661 && insn_type != TYPE_ICMOV
19662 && insn_type != TYPE_FCMOV
19663 && insn_type != TYPE_IBR)
19666 if ((set = single_set (dep_insn)) != 0)
19668 set = SET_DEST (set);
19671 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19672 && XVECLEN (PATTERN (dep_insn), 0) == 2
19673 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19674 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19676 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19677 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19682 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19685 /* This test is true if the dependent insn reads the flags but
19686 not any other potentially set register. */
19687 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19690 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19696 /* Return true iff USE_INSN has a memory address with operands set by
19700 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19703 extract_insn_cached (use_insn);
19704 for (i = recog_data.n_operands - 1; i >= 0; --i)
19705 if (MEM_P (recog_data.operand[i]))
19707 rtx addr = XEXP (recog_data.operand[i], 0);
19708 return modified_in_p (addr, set_insn) != 0;
19714 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19716 enum attr_type insn_type, dep_insn_type;
19717 enum attr_memory memory;
19719 int dep_insn_code_number;
19721 /* Anti and output dependencies have zero cost on all CPUs. */
19722 if (REG_NOTE_KIND (link) != 0)
19725 dep_insn_code_number = recog_memoized (dep_insn);
19727 /* If we can't recognize the insns, we can't really do anything. */
19728 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19731 insn_type = get_attr_type (insn);
19732 dep_insn_type = get_attr_type (dep_insn);
19736 case PROCESSOR_PENTIUM:
19737 /* Address Generation Interlock adds a cycle of latency. */
19738 if (insn_type == TYPE_LEA)
19740 rtx addr = PATTERN (insn);
19742 if (GET_CODE (addr) == PARALLEL)
19743 addr = XVECEXP (addr, 0, 0);
19745 gcc_assert (GET_CODE (addr) == SET);
19747 addr = SET_SRC (addr);
19748 if (modified_in_p (addr, dep_insn))
19751 else if (ix86_agi_dependent (dep_insn, insn))
19754 /* ??? Compares pair with jump/setcc. */
19755 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19758 /* Floating point stores require value to be ready one cycle earlier. */
19759 if (insn_type == TYPE_FMOV
19760 && get_attr_memory (insn) == MEMORY_STORE
19761 && !ix86_agi_dependent (dep_insn, insn))
19765 case PROCESSOR_PENTIUMPRO:
19766 memory = get_attr_memory (insn);
19768 /* INT->FP conversion is expensive. */
19769 if (get_attr_fp_int_src (dep_insn))
19772 /* There is one cycle extra latency between an FP op and a store. */
19773 if (insn_type == TYPE_FMOV
19774 && (set = single_set (dep_insn)) != NULL_RTX
19775 && (set2 = single_set (insn)) != NULL_RTX
19776 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19777 && MEM_P (SET_DEST (set2)))
19780 /* Show ability of reorder buffer to hide latency of load by executing
19781 in parallel with previous instruction in case
19782 previous instruction is not needed to compute the address. */
19783 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19784 && !ix86_agi_dependent (dep_insn, insn))
19786 /* Claim moves to take one cycle, as core can issue one load
19787 at time and the next load can start cycle later. */
19788 if (dep_insn_type == TYPE_IMOV
19789 || dep_insn_type == TYPE_FMOV)
19797 memory = get_attr_memory (insn);
19799 /* The esp dependency is resolved before the instruction is really
19801 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19802 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19805 /* INT->FP conversion is expensive. */
19806 if (get_attr_fp_int_src (dep_insn))
19809 /* Show ability of reorder buffer to hide latency of load by executing
19810 in parallel with previous instruction in case
19811 previous instruction is not needed to compute the address. */
19812 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19813 && !ix86_agi_dependent (dep_insn, insn))
19815 /* Claim moves to take one cycle, as core can issue one load
19816 at time and the next load can start cycle later. */
19817 if (dep_insn_type == TYPE_IMOV
19818 || dep_insn_type == TYPE_FMOV)
19827 case PROCESSOR_ATHLON:
19829 case PROCESSOR_AMDFAM10:
19830 case PROCESSOR_ATOM:
19831 case PROCESSOR_GENERIC32:
19832 case PROCESSOR_GENERIC64:
19833 memory = get_attr_memory (insn);
19835 /* Show ability of reorder buffer to hide latency of load by executing
19836 in parallel with previous instruction in case
19837 previous instruction is not needed to compute the address. */
19838 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19839 && !ix86_agi_dependent (dep_insn, insn))
19841 enum attr_unit unit = get_attr_unit (insn);
19844 /* Because of the difference between the length of integer and
19845 floating unit pipeline preparation stages, the memory operands
19846 for floating point are cheaper.
19848 ??? For Athlon it the difference is most probably 2. */
19849 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19852 loadcost = TARGET_ATHLON ? 2 : 0;
19854 if (cost >= loadcost)
19867 /* How many alternative schedules to try. This should be as wide as the
19868 scheduling freedom in the DFA, but no wider. Making this value too
19869 large results extra work for the scheduler. */
19872 ia32_multipass_dfa_lookahead (void)
19876 case PROCESSOR_PENTIUM:
19879 case PROCESSOR_PENTIUMPRO:
19889 /* Compute the alignment given to a constant that is being placed in memory.
19890 EXP is the constant and ALIGN is the alignment that the object would
19892 The value of this function is used instead of that alignment to align
19896 ix86_constant_alignment (tree exp, int align)
19898 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19899 || TREE_CODE (exp) == INTEGER_CST)
19901 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19903 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19906 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19907 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19908 return BITS_PER_WORD;
19913 /* Compute the alignment for a static variable.
19914 TYPE is the data type, and ALIGN is the alignment that
19915 the object would ordinarily have. The value of this function is used
19916 instead of that alignment to align the object. */
19919 ix86_data_alignment (tree type, int align)
19921 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19923 if (AGGREGATE_TYPE_P (type)
19924 && TYPE_SIZE (type)
19925 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19926 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19927 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19928 && align < max_align)
19931 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19932 to 16byte boundary. */
19935 if (AGGREGATE_TYPE_P (type)
19936 && TYPE_SIZE (type)
19937 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19938 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19939 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19943 if (TREE_CODE (type) == ARRAY_TYPE)
19945 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19947 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19950 else if (TREE_CODE (type) == COMPLEX_TYPE)
19953 if (TYPE_MODE (type) == DCmode && align < 64)
19955 if ((TYPE_MODE (type) == XCmode
19956 || TYPE_MODE (type) == TCmode) && align < 128)
19959 else if ((TREE_CODE (type) == RECORD_TYPE
19960 || TREE_CODE (type) == UNION_TYPE
19961 || TREE_CODE (type) == QUAL_UNION_TYPE)
19962 && TYPE_FIELDS (type))
19964 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19966 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19969 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19970 || TREE_CODE (type) == INTEGER_TYPE)
19972 if (TYPE_MODE (type) == DFmode && align < 64)
19974 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19981 /* Compute the alignment for a local variable or a stack slot. EXP is
19982 the data type or decl itself, MODE is the widest mode available and
19983 ALIGN is the alignment that the object would ordinarily have. The
19984 value of this macro is used instead of that alignment to align the
19988 ix86_local_alignment (tree exp, enum machine_mode mode,
19989 unsigned int align)
19993 if (exp && DECL_P (exp))
19995 type = TREE_TYPE (exp);
20004 /* Don't do dynamic stack realignment for long long objects with
20005 -mpreferred-stack-boundary=2. */
20008 && ix86_preferred_stack_boundary < 64
20009 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
20010 && (!type || !TYPE_USER_ALIGN (type))
20011 && (!decl || !DECL_USER_ALIGN (decl)))
20014 /* If TYPE is NULL, we are allocating a stack slot for caller-save
20015 register in MODE. We will return the largest alignment of XF
20019 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
20020 align = GET_MODE_ALIGNMENT (DFmode);
20024 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20025 to 16byte boundary. */
20028 if (AGGREGATE_TYPE_P (type)
20029 && TYPE_SIZE (type)
20030 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20031 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
20032 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20035 if (TREE_CODE (type) == ARRAY_TYPE)
20037 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20039 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20042 else if (TREE_CODE (type) == COMPLEX_TYPE)
20044 if (TYPE_MODE (type) == DCmode && align < 64)
20046 if ((TYPE_MODE (type) == XCmode
20047 || TYPE_MODE (type) == TCmode) && align < 128)
20050 else if ((TREE_CODE (type) == RECORD_TYPE
20051 || TREE_CODE (type) == UNION_TYPE
20052 || TREE_CODE (type) == QUAL_UNION_TYPE)
20053 && TYPE_FIELDS (type))
20055 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20057 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20060 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20061 || TREE_CODE (type) == INTEGER_TYPE)
20064 if (TYPE_MODE (type) == DFmode && align < 64)
20066 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20072 /* Compute the minimum required alignment for dynamic stack realignment
20073 purposes for a local variable, parameter or a stack slot. EXP is
20074 the data type or decl itself, MODE is its mode and ALIGN is the
20075 alignment that the object would ordinarily have. */
20078 ix86_minimum_alignment (tree exp, enum machine_mode mode,
20079 unsigned int align)
20083 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
20086 if (exp && DECL_P (exp))
20088 type = TREE_TYPE (exp);
20097 /* Don't do dynamic stack realignment for long long objects with
20098 -mpreferred-stack-boundary=2. */
20099 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
20100 && (!type || !TYPE_USER_ALIGN (type))
20101 && (!decl || !DECL_USER_ALIGN (decl)))
20107 /* Emit RTL insns to initialize the variable parts of a trampoline.
20108 FNADDR is an RTX for the address of the function's pure code.
20109 CXT is an RTX for the static chain value for the function. */
20111 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
20115 /* Compute offset from the end of the jmp to the target function. */
20116 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
20117 plus_constant (tramp, 10),
20118 NULL_RTX, 1, OPTAB_DIRECT);
20119 emit_move_insn (gen_rtx_MEM (QImode, tramp),
20120 gen_int_mode (0xb9, QImode));
20121 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
20122 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
20123 gen_int_mode (0xe9, QImode));
20124 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
20129 /* Try to load address using shorter movl instead of movabs.
20130 We may want to support movq for kernel mode, but kernel does not use
20131 trampolines at the moment. */
20132 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
20134 fnaddr = copy_to_mode_reg (DImode, fnaddr);
20135 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20136 gen_int_mode (0xbb41, HImode));
20137 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
20138 gen_lowpart (SImode, fnaddr));
20143 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20144 gen_int_mode (0xbb49, HImode));
20145 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20149 /* Load static chain using movabs to r10. */
20150 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20151 gen_int_mode (0xba49, HImode));
20152 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
20155 /* Jump to the r11 */
20156 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
20157 gen_int_mode (0xff49, HImode));
20158 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
20159 gen_int_mode (0xe3, QImode));
20161 gcc_assert (offset <= TRAMPOLINE_SIZE);
20164 #ifdef ENABLE_EXECUTE_STACK
20165 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
20166 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
20170 /* Codes for all the SSE/MMX builtins. */
20173 IX86_BUILTIN_ADDPS,
20174 IX86_BUILTIN_ADDSS,
20175 IX86_BUILTIN_DIVPS,
20176 IX86_BUILTIN_DIVSS,
20177 IX86_BUILTIN_MULPS,
20178 IX86_BUILTIN_MULSS,
20179 IX86_BUILTIN_SUBPS,
20180 IX86_BUILTIN_SUBSS,
20182 IX86_BUILTIN_CMPEQPS,
20183 IX86_BUILTIN_CMPLTPS,
20184 IX86_BUILTIN_CMPLEPS,
20185 IX86_BUILTIN_CMPGTPS,
20186 IX86_BUILTIN_CMPGEPS,
20187 IX86_BUILTIN_CMPNEQPS,
20188 IX86_BUILTIN_CMPNLTPS,
20189 IX86_BUILTIN_CMPNLEPS,
20190 IX86_BUILTIN_CMPNGTPS,
20191 IX86_BUILTIN_CMPNGEPS,
20192 IX86_BUILTIN_CMPORDPS,
20193 IX86_BUILTIN_CMPUNORDPS,
20194 IX86_BUILTIN_CMPEQSS,
20195 IX86_BUILTIN_CMPLTSS,
20196 IX86_BUILTIN_CMPLESS,
20197 IX86_BUILTIN_CMPNEQSS,
20198 IX86_BUILTIN_CMPNLTSS,
20199 IX86_BUILTIN_CMPNLESS,
20200 IX86_BUILTIN_CMPNGTSS,
20201 IX86_BUILTIN_CMPNGESS,
20202 IX86_BUILTIN_CMPORDSS,
20203 IX86_BUILTIN_CMPUNORDSS,
20205 IX86_BUILTIN_COMIEQSS,
20206 IX86_BUILTIN_COMILTSS,
20207 IX86_BUILTIN_COMILESS,
20208 IX86_BUILTIN_COMIGTSS,
20209 IX86_BUILTIN_COMIGESS,
20210 IX86_BUILTIN_COMINEQSS,
20211 IX86_BUILTIN_UCOMIEQSS,
20212 IX86_BUILTIN_UCOMILTSS,
20213 IX86_BUILTIN_UCOMILESS,
20214 IX86_BUILTIN_UCOMIGTSS,
20215 IX86_BUILTIN_UCOMIGESS,
20216 IX86_BUILTIN_UCOMINEQSS,
20218 IX86_BUILTIN_CVTPI2PS,
20219 IX86_BUILTIN_CVTPS2PI,
20220 IX86_BUILTIN_CVTSI2SS,
20221 IX86_BUILTIN_CVTSI642SS,
20222 IX86_BUILTIN_CVTSS2SI,
20223 IX86_BUILTIN_CVTSS2SI64,
20224 IX86_BUILTIN_CVTTPS2PI,
20225 IX86_BUILTIN_CVTTSS2SI,
20226 IX86_BUILTIN_CVTTSS2SI64,
20228 IX86_BUILTIN_MAXPS,
20229 IX86_BUILTIN_MAXSS,
20230 IX86_BUILTIN_MINPS,
20231 IX86_BUILTIN_MINSS,
20233 IX86_BUILTIN_LOADUPS,
20234 IX86_BUILTIN_STOREUPS,
20235 IX86_BUILTIN_MOVSS,
20237 IX86_BUILTIN_MOVHLPS,
20238 IX86_BUILTIN_MOVLHPS,
20239 IX86_BUILTIN_LOADHPS,
20240 IX86_BUILTIN_LOADLPS,
20241 IX86_BUILTIN_STOREHPS,
20242 IX86_BUILTIN_STORELPS,
20244 IX86_BUILTIN_MASKMOVQ,
20245 IX86_BUILTIN_MOVMSKPS,
20246 IX86_BUILTIN_PMOVMSKB,
20248 IX86_BUILTIN_MOVNTPS,
20249 IX86_BUILTIN_MOVNTQ,
20251 IX86_BUILTIN_LOADDQU,
20252 IX86_BUILTIN_STOREDQU,
20254 IX86_BUILTIN_PACKSSWB,
20255 IX86_BUILTIN_PACKSSDW,
20256 IX86_BUILTIN_PACKUSWB,
20258 IX86_BUILTIN_PADDB,
20259 IX86_BUILTIN_PADDW,
20260 IX86_BUILTIN_PADDD,
20261 IX86_BUILTIN_PADDQ,
20262 IX86_BUILTIN_PADDSB,
20263 IX86_BUILTIN_PADDSW,
20264 IX86_BUILTIN_PADDUSB,
20265 IX86_BUILTIN_PADDUSW,
20266 IX86_BUILTIN_PSUBB,
20267 IX86_BUILTIN_PSUBW,
20268 IX86_BUILTIN_PSUBD,
20269 IX86_BUILTIN_PSUBQ,
20270 IX86_BUILTIN_PSUBSB,
20271 IX86_BUILTIN_PSUBSW,
20272 IX86_BUILTIN_PSUBUSB,
20273 IX86_BUILTIN_PSUBUSW,
20276 IX86_BUILTIN_PANDN,
20280 IX86_BUILTIN_PAVGB,
20281 IX86_BUILTIN_PAVGW,
20283 IX86_BUILTIN_PCMPEQB,
20284 IX86_BUILTIN_PCMPEQW,
20285 IX86_BUILTIN_PCMPEQD,
20286 IX86_BUILTIN_PCMPGTB,
20287 IX86_BUILTIN_PCMPGTW,
20288 IX86_BUILTIN_PCMPGTD,
20290 IX86_BUILTIN_PMADDWD,
20292 IX86_BUILTIN_PMAXSW,
20293 IX86_BUILTIN_PMAXUB,
20294 IX86_BUILTIN_PMINSW,
20295 IX86_BUILTIN_PMINUB,
20297 IX86_BUILTIN_PMULHUW,
20298 IX86_BUILTIN_PMULHW,
20299 IX86_BUILTIN_PMULLW,
20301 IX86_BUILTIN_PSADBW,
20302 IX86_BUILTIN_PSHUFW,
20304 IX86_BUILTIN_PSLLW,
20305 IX86_BUILTIN_PSLLD,
20306 IX86_BUILTIN_PSLLQ,
20307 IX86_BUILTIN_PSRAW,
20308 IX86_BUILTIN_PSRAD,
20309 IX86_BUILTIN_PSRLW,
20310 IX86_BUILTIN_PSRLD,
20311 IX86_BUILTIN_PSRLQ,
20312 IX86_BUILTIN_PSLLWI,
20313 IX86_BUILTIN_PSLLDI,
20314 IX86_BUILTIN_PSLLQI,
20315 IX86_BUILTIN_PSRAWI,
20316 IX86_BUILTIN_PSRADI,
20317 IX86_BUILTIN_PSRLWI,
20318 IX86_BUILTIN_PSRLDI,
20319 IX86_BUILTIN_PSRLQI,
20321 IX86_BUILTIN_PUNPCKHBW,
20322 IX86_BUILTIN_PUNPCKHWD,
20323 IX86_BUILTIN_PUNPCKHDQ,
20324 IX86_BUILTIN_PUNPCKLBW,
20325 IX86_BUILTIN_PUNPCKLWD,
20326 IX86_BUILTIN_PUNPCKLDQ,
20328 IX86_BUILTIN_SHUFPS,
20330 IX86_BUILTIN_RCPPS,
20331 IX86_BUILTIN_RCPSS,
20332 IX86_BUILTIN_RSQRTPS,
20333 IX86_BUILTIN_RSQRTPS_NR,
20334 IX86_BUILTIN_RSQRTSS,
20335 IX86_BUILTIN_RSQRTF,
20336 IX86_BUILTIN_SQRTPS,
20337 IX86_BUILTIN_SQRTPS_NR,
20338 IX86_BUILTIN_SQRTSS,
20340 IX86_BUILTIN_UNPCKHPS,
20341 IX86_BUILTIN_UNPCKLPS,
20343 IX86_BUILTIN_ANDPS,
20344 IX86_BUILTIN_ANDNPS,
20346 IX86_BUILTIN_XORPS,
20349 IX86_BUILTIN_LDMXCSR,
20350 IX86_BUILTIN_STMXCSR,
20351 IX86_BUILTIN_SFENCE,
20353 /* 3DNow! Original */
20354 IX86_BUILTIN_FEMMS,
20355 IX86_BUILTIN_PAVGUSB,
20356 IX86_BUILTIN_PF2ID,
20357 IX86_BUILTIN_PFACC,
20358 IX86_BUILTIN_PFADD,
20359 IX86_BUILTIN_PFCMPEQ,
20360 IX86_BUILTIN_PFCMPGE,
20361 IX86_BUILTIN_PFCMPGT,
20362 IX86_BUILTIN_PFMAX,
20363 IX86_BUILTIN_PFMIN,
20364 IX86_BUILTIN_PFMUL,
20365 IX86_BUILTIN_PFRCP,
20366 IX86_BUILTIN_PFRCPIT1,
20367 IX86_BUILTIN_PFRCPIT2,
20368 IX86_BUILTIN_PFRSQIT1,
20369 IX86_BUILTIN_PFRSQRT,
20370 IX86_BUILTIN_PFSUB,
20371 IX86_BUILTIN_PFSUBR,
20372 IX86_BUILTIN_PI2FD,
20373 IX86_BUILTIN_PMULHRW,
20375 /* 3DNow! Athlon Extensions */
20376 IX86_BUILTIN_PF2IW,
20377 IX86_BUILTIN_PFNACC,
20378 IX86_BUILTIN_PFPNACC,
20379 IX86_BUILTIN_PI2FW,
20380 IX86_BUILTIN_PSWAPDSI,
20381 IX86_BUILTIN_PSWAPDSF,
20384 IX86_BUILTIN_ADDPD,
20385 IX86_BUILTIN_ADDSD,
20386 IX86_BUILTIN_DIVPD,
20387 IX86_BUILTIN_DIVSD,
20388 IX86_BUILTIN_MULPD,
20389 IX86_BUILTIN_MULSD,
20390 IX86_BUILTIN_SUBPD,
20391 IX86_BUILTIN_SUBSD,
20393 IX86_BUILTIN_CMPEQPD,
20394 IX86_BUILTIN_CMPLTPD,
20395 IX86_BUILTIN_CMPLEPD,
20396 IX86_BUILTIN_CMPGTPD,
20397 IX86_BUILTIN_CMPGEPD,
20398 IX86_BUILTIN_CMPNEQPD,
20399 IX86_BUILTIN_CMPNLTPD,
20400 IX86_BUILTIN_CMPNLEPD,
20401 IX86_BUILTIN_CMPNGTPD,
20402 IX86_BUILTIN_CMPNGEPD,
20403 IX86_BUILTIN_CMPORDPD,
20404 IX86_BUILTIN_CMPUNORDPD,
20405 IX86_BUILTIN_CMPEQSD,
20406 IX86_BUILTIN_CMPLTSD,
20407 IX86_BUILTIN_CMPLESD,
20408 IX86_BUILTIN_CMPNEQSD,
20409 IX86_BUILTIN_CMPNLTSD,
20410 IX86_BUILTIN_CMPNLESD,
20411 IX86_BUILTIN_CMPORDSD,
20412 IX86_BUILTIN_CMPUNORDSD,
20414 IX86_BUILTIN_COMIEQSD,
20415 IX86_BUILTIN_COMILTSD,
20416 IX86_BUILTIN_COMILESD,
20417 IX86_BUILTIN_COMIGTSD,
20418 IX86_BUILTIN_COMIGESD,
20419 IX86_BUILTIN_COMINEQSD,
20420 IX86_BUILTIN_UCOMIEQSD,
20421 IX86_BUILTIN_UCOMILTSD,
20422 IX86_BUILTIN_UCOMILESD,
20423 IX86_BUILTIN_UCOMIGTSD,
20424 IX86_BUILTIN_UCOMIGESD,
20425 IX86_BUILTIN_UCOMINEQSD,
20427 IX86_BUILTIN_MAXPD,
20428 IX86_BUILTIN_MAXSD,
20429 IX86_BUILTIN_MINPD,
20430 IX86_BUILTIN_MINSD,
20432 IX86_BUILTIN_ANDPD,
20433 IX86_BUILTIN_ANDNPD,
20435 IX86_BUILTIN_XORPD,
20437 IX86_BUILTIN_SQRTPD,
20438 IX86_BUILTIN_SQRTSD,
20440 IX86_BUILTIN_UNPCKHPD,
20441 IX86_BUILTIN_UNPCKLPD,
20443 IX86_BUILTIN_SHUFPD,
20445 IX86_BUILTIN_LOADUPD,
20446 IX86_BUILTIN_STOREUPD,
20447 IX86_BUILTIN_MOVSD,
20449 IX86_BUILTIN_LOADHPD,
20450 IX86_BUILTIN_LOADLPD,
20452 IX86_BUILTIN_CVTDQ2PD,
20453 IX86_BUILTIN_CVTDQ2PS,
20455 IX86_BUILTIN_CVTPD2DQ,
20456 IX86_BUILTIN_CVTPD2PI,
20457 IX86_BUILTIN_CVTPD2PS,
20458 IX86_BUILTIN_CVTTPD2DQ,
20459 IX86_BUILTIN_CVTTPD2PI,
20461 IX86_BUILTIN_CVTPI2PD,
20462 IX86_BUILTIN_CVTSI2SD,
20463 IX86_BUILTIN_CVTSI642SD,
20465 IX86_BUILTIN_CVTSD2SI,
20466 IX86_BUILTIN_CVTSD2SI64,
20467 IX86_BUILTIN_CVTSD2SS,
20468 IX86_BUILTIN_CVTSS2SD,
20469 IX86_BUILTIN_CVTTSD2SI,
20470 IX86_BUILTIN_CVTTSD2SI64,
20472 IX86_BUILTIN_CVTPS2DQ,
20473 IX86_BUILTIN_CVTPS2PD,
20474 IX86_BUILTIN_CVTTPS2DQ,
20476 IX86_BUILTIN_MOVNTI,
20477 IX86_BUILTIN_MOVNTPD,
20478 IX86_BUILTIN_MOVNTDQ,
20480 IX86_BUILTIN_MOVQ128,
20483 IX86_BUILTIN_MASKMOVDQU,
20484 IX86_BUILTIN_MOVMSKPD,
20485 IX86_BUILTIN_PMOVMSKB128,
20487 IX86_BUILTIN_PACKSSWB128,
20488 IX86_BUILTIN_PACKSSDW128,
20489 IX86_BUILTIN_PACKUSWB128,
20491 IX86_BUILTIN_PADDB128,
20492 IX86_BUILTIN_PADDW128,
20493 IX86_BUILTIN_PADDD128,
20494 IX86_BUILTIN_PADDQ128,
20495 IX86_BUILTIN_PADDSB128,
20496 IX86_BUILTIN_PADDSW128,
20497 IX86_BUILTIN_PADDUSB128,
20498 IX86_BUILTIN_PADDUSW128,
20499 IX86_BUILTIN_PSUBB128,
20500 IX86_BUILTIN_PSUBW128,
20501 IX86_BUILTIN_PSUBD128,
20502 IX86_BUILTIN_PSUBQ128,
20503 IX86_BUILTIN_PSUBSB128,
20504 IX86_BUILTIN_PSUBSW128,
20505 IX86_BUILTIN_PSUBUSB128,
20506 IX86_BUILTIN_PSUBUSW128,
20508 IX86_BUILTIN_PAND128,
20509 IX86_BUILTIN_PANDN128,
20510 IX86_BUILTIN_POR128,
20511 IX86_BUILTIN_PXOR128,
20513 IX86_BUILTIN_PAVGB128,
20514 IX86_BUILTIN_PAVGW128,
20516 IX86_BUILTIN_PCMPEQB128,
20517 IX86_BUILTIN_PCMPEQW128,
20518 IX86_BUILTIN_PCMPEQD128,
20519 IX86_BUILTIN_PCMPGTB128,
20520 IX86_BUILTIN_PCMPGTW128,
20521 IX86_BUILTIN_PCMPGTD128,
20523 IX86_BUILTIN_PMADDWD128,
20525 IX86_BUILTIN_PMAXSW128,
20526 IX86_BUILTIN_PMAXUB128,
20527 IX86_BUILTIN_PMINSW128,
20528 IX86_BUILTIN_PMINUB128,
20530 IX86_BUILTIN_PMULUDQ,
20531 IX86_BUILTIN_PMULUDQ128,
20532 IX86_BUILTIN_PMULHUW128,
20533 IX86_BUILTIN_PMULHW128,
20534 IX86_BUILTIN_PMULLW128,
20536 IX86_BUILTIN_PSADBW128,
20537 IX86_BUILTIN_PSHUFHW,
20538 IX86_BUILTIN_PSHUFLW,
20539 IX86_BUILTIN_PSHUFD,
20541 IX86_BUILTIN_PSLLDQI128,
20542 IX86_BUILTIN_PSLLWI128,
20543 IX86_BUILTIN_PSLLDI128,
20544 IX86_BUILTIN_PSLLQI128,
20545 IX86_BUILTIN_PSRAWI128,
20546 IX86_BUILTIN_PSRADI128,
20547 IX86_BUILTIN_PSRLDQI128,
20548 IX86_BUILTIN_PSRLWI128,
20549 IX86_BUILTIN_PSRLDI128,
20550 IX86_BUILTIN_PSRLQI128,
20552 IX86_BUILTIN_PSLLDQ128,
20553 IX86_BUILTIN_PSLLW128,
20554 IX86_BUILTIN_PSLLD128,
20555 IX86_BUILTIN_PSLLQ128,
20556 IX86_BUILTIN_PSRAW128,
20557 IX86_BUILTIN_PSRAD128,
20558 IX86_BUILTIN_PSRLW128,
20559 IX86_BUILTIN_PSRLD128,
20560 IX86_BUILTIN_PSRLQ128,
20562 IX86_BUILTIN_PUNPCKHBW128,
20563 IX86_BUILTIN_PUNPCKHWD128,
20564 IX86_BUILTIN_PUNPCKHDQ128,
20565 IX86_BUILTIN_PUNPCKHQDQ128,
20566 IX86_BUILTIN_PUNPCKLBW128,
20567 IX86_BUILTIN_PUNPCKLWD128,
20568 IX86_BUILTIN_PUNPCKLDQ128,
20569 IX86_BUILTIN_PUNPCKLQDQ128,
20571 IX86_BUILTIN_CLFLUSH,
20572 IX86_BUILTIN_MFENCE,
20573 IX86_BUILTIN_LFENCE,
20575 IX86_BUILTIN_BSRSI,
20576 IX86_BUILTIN_BSRDI,
20577 IX86_BUILTIN_RDPMC,
20578 IX86_BUILTIN_RDTSC,
20579 IX86_BUILTIN_RDTSCP,
20580 IX86_BUILTIN_ROLQI,
20581 IX86_BUILTIN_ROLHI,
20582 IX86_BUILTIN_RORQI,
20583 IX86_BUILTIN_RORHI,
20586 IX86_BUILTIN_ADDSUBPS,
20587 IX86_BUILTIN_HADDPS,
20588 IX86_BUILTIN_HSUBPS,
20589 IX86_BUILTIN_MOVSHDUP,
20590 IX86_BUILTIN_MOVSLDUP,
20591 IX86_BUILTIN_ADDSUBPD,
20592 IX86_BUILTIN_HADDPD,
20593 IX86_BUILTIN_HSUBPD,
20594 IX86_BUILTIN_LDDQU,
20596 IX86_BUILTIN_MONITOR,
20597 IX86_BUILTIN_MWAIT,
20600 IX86_BUILTIN_PHADDW,
20601 IX86_BUILTIN_PHADDD,
20602 IX86_BUILTIN_PHADDSW,
20603 IX86_BUILTIN_PHSUBW,
20604 IX86_BUILTIN_PHSUBD,
20605 IX86_BUILTIN_PHSUBSW,
20606 IX86_BUILTIN_PMADDUBSW,
20607 IX86_BUILTIN_PMULHRSW,
20608 IX86_BUILTIN_PSHUFB,
20609 IX86_BUILTIN_PSIGNB,
20610 IX86_BUILTIN_PSIGNW,
20611 IX86_BUILTIN_PSIGND,
20612 IX86_BUILTIN_PALIGNR,
20613 IX86_BUILTIN_PABSB,
20614 IX86_BUILTIN_PABSW,
20615 IX86_BUILTIN_PABSD,
20617 IX86_BUILTIN_PHADDW128,
20618 IX86_BUILTIN_PHADDD128,
20619 IX86_BUILTIN_PHADDSW128,
20620 IX86_BUILTIN_PHSUBW128,
20621 IX86_BUILTIN_PHSUBD128,
20622 IX86_BUILTIN_PHSUBSW128,
20623 IX86_BUILTIN_PMADDUBSW128,
20624 IX86_BUILTIN_PMULHRSW128,
20625 IX86_BUILTIN_PSHUFB128,
20626 IX86_BUILTIN_PSIGNB128,
20627 IX86_BUILTIN_PSIGNW128,
20628 IX86_BUILTIN_PSIGND128,
20629 IX86_BUILTIN_PALIGNR128,
20630 IX86_BUILTIN_PABSB128,
20631 IX86_BUILTIN_PABSW128,
20632 IX86_BUILTIN_PABSD128,
20634 /* AMDFAM10 - SSE4A New Instructions. */
20635 IX86_BUILTIN_MOVNTSD,
20636 IX86_BUILTIN_MOVNTSS,
20637 IX86_BUILTIN_EXTRQI,
20638 IX86_BUILTIN_EXTRQ,
20639 IX86_BUILTIN_INSERTQI,
20640 IX86_BUILTIN_INSERTQ,
20643 IX86_BUILTIN_BLENDPD,
20644 IX86_BUILTIN_BLENDPS,
20645 IX86_BUILTIN_BLENDVPD,
20646 IX86_BUILTIN_BLENDVPS,
20647 IX86_BUILTIN_PBLENDVB128,
20648 IX86_BUILTIN_PBLENDW128,
20653 IX86_BUILTIN_INSERTPS128,
20655 IX86_BUILTIN_MOVNTDQA,
20656 IX86_BUILTIN_MPSADBW128,
20657 IX86_BUILTIN_PACKUSDW128,
20658 IX86_BUILTIN_PCMPEQQ,
20659 IX86_BUILTIN_PHMINPOSUW128,
20661 IX86_BUILTIN_PMAXSB128,
20662 IX86_BUILTIN_PMAXSD128,
20663 IX86_BUILTIN_PMAXUD128,
20664 IX86_BUILTIN_PMAXUW128,
20666 IX86_BUILTIN_PMINSB128,
20667 IX86_BUILTIN_PMINSD128,
20668 IX86_BUILTIN_PMINUD128,
20669 IX86_BUILTIN_PMINUW128,
20671 IX86_BUILTIN_PMOVSXBW128,
20672 IX86_BUILTIN_PMOVSXBD128,
20673 IX86_BUILTIN_PMOVSXBQ128,
20674 IX86_BUILTIN_PMOVSXWD128,
20675 IX86_BUILTIN_PMOVSXWQ128,
20676 IX86_BUILTIN_PMOVSXDQ128,
20678 IX86_BUILTIN_PMOVZXBW128,
20679 IX86_BUILTIN_PMOVZXBD128,
20680 IX86_BUILTIN_PMOVZXBQ128,
20681 IX86_BUILTIN_PMOVZXWD128,
20682 IX86_BUILTIN_PMOVZXWQ128,
20683 IX86_BUILTIN_PMOVZXDQ128,
20685 IX86_BUILTIN_PMULDQ128,
20686 IX86_BUILTIN_PMULLD128,
20688 IX86_BUILTIN_ROUNDPD,
20689 IX86_BUILTIN_ROUNDPS,
20690 IX86_BUILTIN_ROUNDSD,
20691 IX86_BUILTIN_ROUNDSS,
20693 IX86_BUILTIN_PTESTZ,
20694 IX86_BUILTIN_PTESTC,
20695 IX86_BUILTIN_PTESTNZC,
20697 IX86_BUILTIN_VEC_INIT_V2SI,
20698 IX86_BUILTIN_VEC_INIT_V4HI,
20699 IX86_BUILTIN_VEC_INIT_V8QI,
20700 IX86_BUILTIN_VEC_EXT_V2DF,
20701 IX86_BUILTIN_VEC_EXT_V2DI,
20702 IX86_BUILTIN_VEC_EXT_V4SF,
20703 IX86_BUILTIN_VEC_EXT_V4SI,
20704 IX86_BUILTIN_VEC_EXT_V8HI,
20705 IX86_BUILTIN_VEC_EXT_V2SI,
20706 IX86_BUILTIN_VEC_EXT_V4HI,
20707 IX86_BUILTIN_VEC_EXT_V16QI,
20708 IX86_BUILTIN_VEC_SET_V2DI,
20709 IX86_BUILTIN_VEC_SET_V4SF,
20710 IX86_BUILTIN_VEC_SET_V4SI,
20711 IX86_BUILTIN_VEC_SET_V8HI,
20712 IX86_BUILTIN_VEC_SET_V4HI,
20713 IX86_BUILTIN_VEC_SET_V16QI,
20715 IX86_BUILTIN_VEC_PACK_SFIX,
20718 IX86_BUILTIN_CRC32QI,
20719 IX86_BUILTIN_CRC32HI,
20720 IX86_BUILTIN_CRC32SI,
20721 IX86_BUILTIN_CRC32DI,
20723 IX86_BUILTIN_PCMPESTRI128,
20724 IX86_BUILTIN_PCMPESTRM128,
20725 IX86_BUILTIN_PCMPESTRA128,
20726 IX86_BUILTIN_PCMPESTRC128,
20727 IX86_BUILTIN_PCMPESTRO128,
20728 IX86_BUILTIN_PCMPESTRS128,
20729 IX86_BUILTIN_PCMPESTRZ128,
20730 IX86_BUILTIN_PCMPISTRI128,
20731 IX86_BUILTIN_PCMPISTRM128,
20732 IX86_BUILTIN_PCMPISTRA128,
20733 IX86_BUILTIN_PCMPISTRC128,
20734 IX86_BUILTIN_PCMPISTRO128,
20735 IX86_BUILTIN_PCMPISTRS128,
20736 IX86_BUILTIN_PCMPISTRZ128,
20738 IX86_BUILTIN_PCMPGTQ,
20740 /* AES instructions */
20741 IX86_BUILTIN_AESENC128,
20742 IX86_BUILTIN_AESENCLAST128,
20743 IX86_BUILTIN_AESDEC128,
20744 IX86_BUILTIN_AESDECLAST128,
20745 IX86_BUILTIN_AESIMC128,
20746 IX86_BUILTIN_AESKEYGENASSIST128,
20748 /* PCLMUL instruction */
20749 IX86_BUILTIN_PCLMULQDQ128,
20752 IX86_BUILTIN_ADDPD256,
20753 IX86_BUILTIN_ADDPS256,
20754 IX86_BUILTIN_ADDSUBPD256,
20755 IX86_BUILTIN_ADDSUBPS256,
20756 IX86_BUILTIN_ANDPD256,
20757 IX86_BUILTIN_ANDPS256,
20758 IX86_BUILTIN_ANDNPD256,
20759 IX86_BUILTIN_ANDNPS256,
20760 IX86_BUILTIN_BLENDPD256,
20761 IX86_BUILTIN_BLENDPS256,
20762 IX86_BUILTIN_BLENDVPD256,
20763 IX86_BUILTIN_BLENDVPS256,
20764 IX86_BUILTIN_DIVPD256,
20765 IX86_BUILTIN_DIVPS256,
20766 IX86_BUILTIN_DPPS256,
20767 IX86_BUILTIN_HADDPD256,
20768 IX86_BUILTIN_HADDPS256,
20769 IX86_BUILTIN_HSUBPD256,
20770 IX86_BUILTIN_HSUBPS256,
20771 IX86_BUILTIN_MAXPD256,
20772 IX86_BUILTIN_MAXPS256,
20773 IX86_BUILTIN_MINPD256,
20774 IX86_BUILTIN_MINPS256,
20775 IX86_BUILTIN_MULPD256,
20776 IX86_BUILTIN_MULPS256,
20777 IX86_BUILTIN_ORPD256,
20778 IX86_BUILTIN_ORPS256,
20779 IX86_BUILTIN_SHUFPD256,
20780 IX86_BUILTIN_SHUFPS256,
20781 IX86_BUILTIN_SUBPD256,
20782 IX86_BUILTIN_SUBPS256,
20783 IX86_BUILTIN_XORPD256,
20784 IX86_BUILTIN_XORPS256,
20785 IX86_BUILTIN_CMPSD,
20786 IX86_BUILTIN_CMPSS,
20787 IX86_BUILTIN_CMPPD,
20788 IX86_BUILTIN_CMPPS,
20789 IX86_BUILTIN_CMPPD256,
20790 IX86_BUILTIN_CMPPS256,
20791 IX86_BUILTIN_CVTDQ2PD256,
20792 IX86_BUILTIN_CVTDQ2PS256,
20793 IX86_BUILTIN_CVTPD2PS256,
20794 IX86_BUILTIN_CVTPS2DQ256,
20795 IX86_BUILTIN_CVTPS2PD256,
20796 IX86_BUILTIN_CVTTPD2DQ256,
20797 IX86_BUILTIN_CVTPD2DQ256,
20798 IX86_BUILTIN_CVTTPS2DQ256,
20799 IX86_BUILTIN_EXTRACTF128PD256,
20800 IX86_BUILTIN_EXTRACTF128PS256,
20801 IX86_BUILTIN_EXTRACTF128SI256,
20802 IX86_BUILTIN_VZEROALL,
20803 IX86_BUILTIN_VZEROUPPER,
20804 IX86_BUILTIN_VZEROUPPER_REX64,
20805 IX86_BUILTIN_VPERMILVARPD,
20806 IX86_BUILTIN_VPERMILVARPS,
20807 IX86_BUILTIN_VPERMILVARPD256,
20808 IX86_BUILTIN_VPERMILVARPS256,
20809 IX86_BUILTIN_VPERMILPD,
20810 IX86_BUILTIN_VPERMILPS,
20811 IX86_BUILTIN_VPERMILPD256,
20812 IX86_BUILTIN_VPERMILPS256,
20813 IX86_BUILTIN_VPERM2F128PD256,
20814 IX86_BUILTIN_VPERM2F128PS256,
20815 IX86_BUILTIN_VPERM2F128SI256,
20816 IX86_BUILTIN_VBROADCASTSS,
20817 IX86_BUILTIN_VBROADCASTSD256,
20818 IX86_BUILTIN_VBROADCASTSS256,
20819 IX86_BUILTIN_VBROADCASTPD256,
20820 IX86_BUILTIN_VBROADCASTPS256,
20821 IX86_BUILTIN_VINSERTF128PD256,
20822 IX86_BUILTIN_VINSERTF128PS256,
20823 IX86_BUILTIN_VINSERTF128SI256,
20824 IX86_BUILTIN_LOADUPD256,
20825 IX86_BUILTIN_LOADUPS256,
20826 IX86_BUILTIN_STOREUPD256,
20827 IX86_BUILTIN_STOREUPS256,
20828 IX86_BUILTIN_LDDQU256,
20829 IX86_BUILTIN_MOVNTDQ256,
20830 IX86_BUILTIN_MOVNTPD256,
20831 IX86_BUILTIN_MOVNTPS256,
20832 IX86_BUILTIN_LOADDQU256,
20833 IX86_BUILTIN_STOREDQU256,
20834 IX86_BUILTIN_MASKLOADPD,
20835 IX86_BUILTIN_MASKLOADPS,
20836 IX86_BUILTIN_MASKSTOREPD,
20837 IX86_BUILTIN_MASKSTOREPS,
20838 IX86_BUILTIN_MASKLOADPD256,
20839 IX86_BUILTIN_MASKLOADPS256,
20840 IX86_BUILTIN_MASKSTOREPD256,
20841 IX86_BUILTIN_MASKSTOREPS256,
20842 IX86_BUILTIN_MOVSHDUP256,
20843 IX86_BUILTIN_MOVSLDUP256,
20844 IX86_BUILTIN_MOVDDUP256,
20846 IX86_BUILTIN_SQRTPD256,
20847 IX86_BUILTIN_SQRTPS256,
20848 IX86_BUILTIN_SQRTPS_NR256,
20849 IX86_BUILTIN_RSQRTPS256,
20850 IX86_BUILTIN_RSQRTPS_NR256,
20852 IX86_BUILTIN_RCPPS256,
20854 IX86_BUILTIN_ROUNDPD256,
20855 IX86_BUILTIN_ROUNDPS256,
20857 IX86_BUILTIN_UNPCKHPD256,
20858 IX86_BUILTIN_UNPCKLPD256,
20859 IX86_BUILTIN_UNPCKHPS256,
20860 IX86_BUILTIN_UNPCKLPS256,
20862 IX86_BUILTIN_SI256_SI,
20863 IX86_BUILTIN_PS256_PS,
20864 IX86_BUILTIN_PD256_PD,
20865 IX86_BUILTIN_SI_SI256,
20866 IX86_BUILTIN_PS_PS256,
20867 IX86_BUILTIN_PD_PD256,
20869 IX86_BUILTIN_VTESTZPD,
20870 IX86_BUILTIN_VTESTCPD,
20871 IX86_BUILTIN_VTESTNZCPD,
20872 IX86_BUILTIN_VTESTZPS,
20873 IX86_BUILTIN_VTESTCPS,
20874 IX86_BUILTIN_VTESTNZCPS,
20875 IX86_BUILTIN_VTESTZPD256,
20876 IX86_BUILTIN_VTESTCPD256,
20877 IX86_BUILTIN_VTESTNZCPD256,
20878 IX86_BUILTIN_VTESTZPS256,
20879 IX86_BUILTIN_VTESTCPS256,
20880 IX86_BUILTIN_VTESTNZCPS256,
20881 IX86_BUILTIN_PTESTZ256,
20882 IX86_BUILTIN_PTESTC256,
20883 IX86_BUILTIN_PTESTNZC256,
20885 IX86_BUILTIN_MOVMSKPD256,
20886 IX86_BUILTIN_MOVMSKPS256,
20888 /* TFmode support builtins. */
20890 IX86_BUILTIN_HUGE_VALQ,
20891 IX86_BUILTIN_FABSQ,
20892 IX86_BUILTIN_COPYSIGNQ,
20894 /* Vectorizer support builtins. */
20895 IX86_BUILTIN_CPYSGNPS,
20896 IX86_BUILTIN_CPYSGNPD,
20898 IX86_BUILTIN_CVTUDQ2PS,
20900 /* SSE5 instructions */
20901 IX86_BUILTIN_FMADDSS,
20902 IX86_BUILTIN_FMADDSD,
20903 IX86_BUILTIN_FMADDPS,
20904 IX86_BUILTIN_FMADDPD,
20905 IX86_BUILTIN_FMSUBSS,
20906 IX86_BUILTIN_FMSUBSD,
20907 IX86_BUILTIN_FMSUBPS,
20908 IX86_BUILTIN_FMSUBPD,
20909 IX86_BUILTIN_FNMADDSS,
20910 IX86_BUILTIN_FNMADDSD,
20911 IX86_BUILTIN_FNMADDPS,
20912 IX86_BUILTIN_FNMADDPD,
20913 IX86_BUILTIN_FNMSUBSS,
20914 IX86_BUILTIN_FNMSUBSD,
20915 IX86_BUILTIN_FNMSUBPS,
20916 IX86_BUILTIN_FNMSUBPD,
20917 IX86_BUILTIN_PCMOV,
20918 IX86_BUILTIN_PCMOV_V2DI,
20919 IX86_BUILTIN_PCMOV_V4SI,
20920 IX86_BUILTIN_PCMOV_V8HI,
20921 IX86_BUILTIN_PCMOV_V16QI,
20922 IX86_BUILTIN_PCMOV_V4SF,
20923 IX86_BUILTIN_PCMOV_V2DF,
20924 IX86_BUILTIN_PPERM,
20925 IX86_BUILTIN_PERMPS,
20926 IX86_BUILTIN_PERMPD,
20927 IX86_BUILTIN_PMACSSWW,
20928 IX86_BUILTIN_PMACSWW,
20929 IX86_BUILTIN_PMACSSWD,
20930 IX86_BUILTIN_PMACSWD,
20931 IX86_BUILTIN_PMACSSDD,
20932 IX86_BUILTIN_PMACSDD,
20933 IX86_BUILTIN_PMACSSDQL,
20934 IX86_BUILTIN_PMACSSDQH,
20935 IX86_BUILTIN_PMACSDQL,
20936 IX86_BUILTIN_PMACSDQH,
20937 IX86_BUILTIN_PMADCSSWD,
20938 IX86_BUILTIN_PMADCSWD,
20939 IX86_BUILTIN_PHADDBW,
20940 IX86_BUILTIN_PHADDBD,
20941 IX86_BUILTIN_PHADDBQ,
20942 IX86_BUILTIN_PHADDWD,
20943 IX86_BUILTIN_PHADDWQ,
20944 IX86_BUILTIN_PHADDDQ,
20945 IX86_BUILTIN_PHADDUBW,
20946 IX86_BUILTIN_PHADDUBD,
20947 IX86_BUILTIN_PHADDUBQ,
20948 IX86_BUILTIN_PHADDUWD,
20949 IX86_BUILTIN_PHADDUWQ,
20950 IX86_BUILTIN_PHADDUDQ,
20951 IX86_BUILTIN_PHSUBBW,
20952 IX86_BUILTIN_PHSUBWD,
20953 IX86_BUILTIN_PHSUBDQ,
20954 IX86_BUILTIN_PROTB,
20955 IX86_BUILTIN_PROTW,
20956 IX86_BUILTIN_PROTD,
20957 IX86_BUILTIN_PROTQ,
20958 IX86_BUILTIN_PROTB_IMM,
20959 IX86_BUILTIN_PROTW_IMM,
20960 IX86_BUILTIN_PROTD_IMM,
20961 IX86_BUILTIN_PROTQ_IMM,
20962 IX86_BUILTIN_PSHLB,
20963 IX86_BUILTIN_PSHLW,
20964 IX86_BUILTIN_PSHLD,
20965 IX86_BUILTIN_PSHLQ,
20966 IX86_BUILTIN_PSHAB,
20967 IX86_BUILTIN_PSHAW,
20968 IX86_BUILTIN_PSHAD,
20969 IX86_BUILTIN_PSHAQ,
20970 IX86_BUILTIN_FRCZSS,
20971 IX86_BUILTIN_FRCZSD,
20972 IX86_BUILTIN_FRCZPS,
20973 IX86_BUILTIN_FRCZPD,
20974 IX86_BUILTIN_CVTPH2PS,
20975 IX86_BUILTIN_CVTPS2PH,
20977 IX86_BUILTIN_COMEQSS,
20978 IX86_BUILTIN_COMNESS,
20979 IX86_BUILTIN_COMLTSS,
20980 IX86_BUILTIN_COMLESS,
20981 IX86_BUILTIN_COMGTSS,
20982 IX86_BUILTIN_COMGESS,
20983 IX86_BUILTIN_COMUEQSS,
20984 IX86_BUILTIN_COMUNESS,
20985 IX86_BUILTIN_COMULTSS,
20986 IX86_BUILTIN_COMULESS,
20987 IX86_BUILTIN_COMUGTSS,
20988 IX86_BUILTIN_COMUGESS,
20989 IX86_BUILTIN_COMORDSS,
20990 IX86_BUILTIN_COMUNORDSS,
20991 IX86_BUILTIN_COMFALSESS,
20992 IX86_BUILTIN_COMTRUESS,
20994 IX86_BUILTIN_COMEQSD,
20995 IX86_BUILTIN_COMNESD,
20996 IX86_BUILTIN_COMLTSD,
20997 IX86_BUILTIN_COMLESD,
20998 IX86_BUILTIN_COMGTSD,
20999 IX86_BUILTIN_COMGESD,
21000 IX86_BUILTIN_COMUEQSD,
21001 IX86_BUILTIN_COMUNESD,
21002 IX86_BUILTIN_COMULTSD,
21003 IX86_BUILTIN_COMULESD,
21004 IX86_BUILTIN_COMUGTSD,
21005 IX86_BUILTIN_COMUGESD,
21006 IX86_BUILTIN_COMORDSD,
21007 IX86_BUILTIN_COMUNORDSD,
21008 IX86_BUILTIN_COMFALSESD,
21009 IX86_BUILTIN_COMTRUESD,
21011 IX86_BUILTIN_COMEQPS,
21012 IX86_BUILTIN_COMNEPS,
21013 IX86_BUILTIN_COMLTPS,
21014 IX86_BUILTIN_COMLEPS,
21015 IX86_BUILTIN_COMGTPS,
21016 IX86_BUILTIN_COMGEPS,
21017 IX86_BUILTIN_COMUEQPS,
21018 IX86_BUILTIN_COMUNEPS,
21019 IX86_BUILTIN_COMULTPS,
21020 IX86_BUILTIN_COMULEPS,
21021 IX86_BUILTIN_COMUGTPS,
21022 IX86_BUILTIN_COMUGEPS,
21023 IX86_BUILTIN_COMORDPS,
21024 IX86_BUILTIN_COMUNORDPS,
21025 IX86_BUILTIN_COMFALSEPS,
21026 IX86_BUILTIN_COMTRUEPS,
21028 IX86_BUILTIN_COMEQPD,
21029 IX86_BUILTIN_COMNEPD,
21030 IX86_BUILTIN_COMLTPD,
21031 IX86_BUILTIN_COMLEPD,
21032 IX86_BUILTIN_COMGTPD,
21033 IX86_BUILTIN_COMGEPD,
21034 IX86_BUILTIN_COMUEQPD,
21035 IX86_BUILTIN_COMUNEPD,
21036 IX86_BUILTIN_COMULTPD,
21037 IX86_BUILTIN_COMULEPD,
21038 IX86_BUILTIN_COMUGTPD,
21039 IX86_BUILTIN_COMUGEPD,
21040 IX86_BUILTIN_COMORDPD,
21041 IX86_BUILTIN_COMUNORDPD,
21042 IX86_BUILTIN_COMFALSEPD,
21043 IX86_BUILTIN_COMTRUEPD,
21045 IX86_BUILTIN_PCOMEQUB,
21046 IX86_BUILTIN_PCOMNEUB,
21047 IX86_BUILTIN_PCOMLTUB,
21048 IX86_BUILTIN_PCOMLEUB,
21049 IX86_BUILTIN_PCOMGTUB,
21050 IX86_BUILTIN_PCOMGEUB,
21051 IX86_BUILTIN_PCOMFALSEUB,
21052 IX86_BUILTIN_PCOMTRUEUB,
21053 IX86_BUILTIN_PCOMEQUW,
21054 IX86_BUILTIN_PCOMNEUW,
21055 IX86_BUILTIN_PCOMLTUW,
21056 IX86_BUILTIN_PCOMLEUW,
21057 IX86_BUILTIN_PCOMGTUW,
21058 IX86_BUILTIN_PCOMGEUW,
21059 IX86_BUILTIN_PCOMFALSEUW,
21060 IX86_BUILTIN_PCOMTRUEUW,
21061 IX86_BUILTIN_PCOMEQUD,
21062 IX86_BUILTIN_PCOMNEUD,
21063 IX86_BUILTIN_PCOMLTUD,
21064 IX86_BUILTIN_PCOMLEUD,
21065 IX86_BUILTIN_PCOMGTUD,
21066 IX86_BUILTIN_PCOMGEUD,
21067 IX86_BUILTIN_PCOMFALSEUD,
21068 IX86_BUILTIN_PCOMTRUEUD,
21069 IX86_BUILTIN_PCOMEQUQ,
21070 IX86_BUILTIN_PCOMNEUQ,
21071 IX86_BUILTIN_PCOMLTUQ,
21072 IX86_BUILTIN_PCOMLEUQ,
21073 IX86_BUILTIN_PCOMGTUQ,
21074 IX86_BUILTIN_PCOMGEUQ,
21075 IX86_BUILTIN_PCOMFALSEUQ,
21076 IX86_BUILTIN_PCOMTRUEUQ,
21078 IX86_BUILTIN_PCOMEQB,
21079 IX86_BUILTIN_PCOMNEB,
21080 IX86_BUILTIN_PCOMLTB,
21081 IX86_BUILTIN_PCOMLEB,
21082 IX86_BUILTIN_PCOMGTB,
21083 IX86_BUILTIN_PCOMGEB,
21084 IX86_BUILTIN_PCOMFALSEB,
21085 IX86_BUILTIN_PCOMTRUEB,
21086 IX86_BUILTIN_PCOMEQW,
21087 IX86_BUILTIN_PCOMNEW,
21088 IX86_BUILTIN_PCOMLTW,
21089 IX86_BUILTIN_PCOMLEW,
21090 IX86_BUILTIN_PCOMGTW,
21091 IX86_BUILTIN_PCOMGEW,
21092 IX86_BUILTIN_PCOMFALSEW,
21093 IX86_BUILTIN_PCOMTRUEW,
21094 IX86_BUILTIN_PCOMEQD,
21095 IX86_BUILTIN_PCOMNED,
21096 IX86_BUILTIN_PCOMLTD,
21097 IX86_BUILTIN_PCOMLED,
21098 IX86_BUILTIN_PCOMGTD,
21099 IX86_BUILTIN_PCOMGED,
21100 IX86_BUILTIN_PCOMFALSED,
21101 IX86_BUILTIN_PCOMTRUED,
21102 IX86_BUILTIN_PCOMEQQ,
21103 IX86_BUILTIN_PCOMNEQ,
21104 IX86_BUILTIN_PCOMLTQ,
21105 IX86_BUILTIN_PCOMLEQ,
21106 IX86_BUILTIN_PCOMGTQ,
21107 IX86_BUILTIN_PCOMGEQ,
21108 IX86_BUILTIN_PCOMFALSEQ,
21109 IX86_BUILTIN_PCOMTRUEQ,
21114 /* Table for the ix86 builtin decls. */
21115 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
21117 /* Table of all of the builtin functions that are possible with different ISA's
21118 but are waiting to be built until a function is declared to use that
21120 struct GTY(()) builtin_isa {
21121 tree type; /* builtin type to use in the declaration */
21122 const char *name; /* function name */
21123 int isa; /* isa_flags this builtin is defined for */
21124 bool const_p; /* true if the declaration is constant */
21127 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
21130 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
21131 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
21132 * function decl in the ix86_builtins array. Returns the function decl or
21133 * NULL_TREE, if the builtin was not added.
21135 * If the front end has a special hook for builtin functions, delay adding
21136 * builtin functions that aren't in the current ISA until the ISA is changed
21137 * with function specific optimization. Doing so, can save about 300K for the
21138 * default compiler. When the builtin is expanded, check at that time whether
21141 * If the front end doesn't have a special hook, record all builtins, even if
21142 * it isn't an instruction set in the current ISA in case the user uses
21143 * function specific options for a different ISA, so that we don't get scope
21144 * errors if a builtin is added in the middle of a function scope. */
21147 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
21149 tree decl = NULL_TREE;
21151 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
21153 ix86_builtins_isa[(int) code].isa = mask;
21155 if ((mask & ix86_isa_flags) != 0
21156 || (lang_hooks.builtin_function
21157 == lang_hooks.builtin_function_ext_scope))
21160 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
21162 ix86_builtins[(int) code] = decl;
21163 ix86_builtins_isa[(int) code].type = NULL_TREE;
21167 ix86_builtins[(int) code] = NULL_TREE;
21168 ix86_builtins_isa[(int) code].const_p = false;
21169 ix86_builtins_isa[(int) code].type = type;
21170 ix86_builtins_isa[(int) code].name = name;
21177 /* Like def_builtin, but also marks the function decl "const". */
21180 def_builtin_const (int mask, const char *name, tree type,
21181 enum ix86_builtins code)
21183 tree decl = def_builtin (mask, name, type, code);
21185 TREE_READONLY (decl) = 1;
21187 ix86_builtins_isa[(int) code].const_p = true;
21192 /* Add any new builtin functions for a given ISA that may not have been
21193 declared. This saves a bit of space compared to adding all of the
21194 declarations to the tree, even if we didn't use them. */
21197 ix86_add_new_builtins (int isa)
21202 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
21204 if ((ix86_builtins_isa[i].isa & isa) != 0
21205 && ix86_builtins_isa[i].type != NULL_TREE)
21207 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
21208 ix86_builtins_isa[i].type,
21209 i, BUILT_IN_MD, NULL,
21212 ix86_builtins[i] = decl;
21213 ix86_builtins_isa[i].type = NULL_TREE;
21214 if (ix86_builtins_isa[i].const_p)
21215 TREE_READONLY (decl) = 1;
21220 /* Bits for builtin_description.flag. */
21222 /* Set when we don't support the comparison natively, and should
21223 swap_comparison in order to support it. */
21224 #define BUILTIN_DESC_SWAP_OPERANDS 1
21226 struct builtin_description
21228 const unsigned int mask;
21229 const enum insn_code icode;
21230 const char *const name;
21231 const enum ix86_builtins code;
21232 const enum rtx_code comparison;
21236 static const struct builtin_description bdesc_comi[] =
21238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21240 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21241 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21243 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21244 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21245 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21246 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21251 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21254 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21255 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21259 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21264 static const struct builtin_description bdesc_pcmpestr[] =
21267 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21268 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21269 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21270 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21271 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21272 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21273 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21276 static const struct builtin_description bdesc_pcmpistr[] =
21279 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21280 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21281 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21282 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21283 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21284 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21285 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21288 /* Special builtin types */
21289 enum ix86_special_builtin_type
21291 SPECIAL_FTYPE_UNKNOWN,
21294 UINT64_FTYPE_PUNSIGNED,
21295 V32QI_FTYPE_PCCHAR,
21296 V16QI_FTYPE_PCCHAR,
21298 V8SF_FTYPE_PCFLOAT,
21300 V4DF_FTYPE_PCDOUBLE,
21301 V4SF_FTYPE_PCFLOAT,
21302 V2DF_FTYPE_PCDOUBLE,
21303 V8SF_FTYPE_PCV8SF_V8SF,
21304 V4DF_FTYPE_PCV4DF_V4DF,
21305 V4SF_FTYPE_V4SF_PCV2SF,
21306 V4SF_FTYPE_PCV4SF_V4SF,
21307 V2DF_FTYPE_V2DF_PCDOUBLE,
21308 V2DF_FTYPE_PCV2DF_V2DF,
21310 VOID_FTYPE_PV2SF_V4SF,
21311 VOID_FTYPE_PV4DI_V4DI,
21312 VOID_FTYPE_PV2DI_V2DI,
21313 VOID_FTYPE_PCHAR_V32QI,
21314 VOID_FTYPE_PCHAR_V16QI,
21315 VOID_FTYPE_PFLOAT_V8SF,
21316 VOID_FTYPE_PFLOAT_V4SF,
21317 VOID_FTYPE_PDOUBLE_V4DF,
21318 VOID_FTYPE_PDOUBLE_V2DF,
21320 VOID_FTYPE_PINT_INT,
21321 VOID_FTYPE_PV8SF_V8SF_V8SF,
21322 VOID_FTYPE_PV4DF_V4DF_V4DF,
21323 VOID_FTYPE_PV4SF_V4SF_V4SF,
21324 VOID_FTYPE_PV2DF_V2DF_V2DF
21327 /* Builtin types */
21328 enum ix86_builtin_type
21331 FLOAT128_FTYPE_FLOAT128,
21333 FLOAT128_FTYPE_FLOAT128_FLOAT128,
21334 INT_FTYPE_V8SF_V8SF_PTEST,
21335 INT_FTYPE_V4DI_V4DI_PTEST,
21336 INT_FTYPE_V4DF_V4DF_PTEST,
21337 INT_FTYPE_V4SF_V4SF_PTEST,
21338 INT_FTYPE_V2DI_V2DI_PTEST,
21339 INT_FTYPE_V2DF_V2DF_PTEST,
21374 V4SF_FTYPE_V4SF_VEC_MERGE,
21383 V2DF_FTYPE_V2DF_VEC_MERGE,
21394 V16QI_FTYPE_V16QI_V16QI,
21395 V16QI_FTYPE_V8HI_V8HI,
21396 V8QI_FTYPE_V8QI_V8QI,
21397 V8QI_FTYPE_V4HI_V4HI,
21398 V8HI_FTYPE_V8HI_V8HI,
21399 V8HI_FTYPE_V8HI_V8HI_COUNT,
21400 V8HI_FTYPE_V16QI_V16QI,
21401 V8HI_FTYPE_V4SI_V4SI,
21402 V8HI_FTYPE_V8HI_SI_COUNT,
21403 V8SF_FTYPE_V8SF_V8SF,
21404 V8SF_FTYPE_V8SF_V8SI,
21405 V4SI_FTYPE_V4SI_V4SI,
21406 V4SI_FTYPE_V4SI_V4SI_COUNT,
21407 V4SI_FTYPE_V8HI_V8HI,
21408 V4SI_FTYPE_V4SF_V4SF,
21409 V4SI_FTYPE_V2DF_V2DF,
21410 V4SI_FTYPE_V4SI_SI_COUNT,
21411 V4HI_FTYPE_V4HI_V4HI,
21412 V4HI_FTYPE_V4HI_V4HI_COUNT,
21413 V4HI_FTYPE_V8QI_V8QI,
21414 V4HI_FTYPE_V2SI_V2SI,
21415 V4HI_FTYPE_V4HI_SI_COUNT,
21416 V4DF_FTYPE_V4DF_V4DF,
21417 V4DF_FTYPE_V4DF_V4DI,
21418 V4SF_FTYPE_V4SF_V4SF,
21419 V4SF_FTYPE_V4SF_V4SF_SWAP,
21420 V4SF_FTYPE_V4SF_V4SI,
21421 V4SF_FTYPE_V4SF_V2SI,
21422 V4SF_FTYPE_V4SF_V2DF,
21423 V4SF_FTYPE_V4SF_DI,
21424 V4SF_FTYPE_V4SF_SI,
21425 V2DI_FTYPE_V2DI_V2DI,
21426 V2DI_FTYPE_V2DI_V2DI_COUNT,
21427 V2DI_FTYPE_V16QI_V16QI,
21428 V2DI_FTYPE_V4SI_V4SI,
21429 V2DI_FTYPE_V2DI_V16QI,
21430 V2DI_FTYPE_V2DF_V2DF,
21431 V2DI_FTYPE_V2DI_SI_COUNT,
21432 V2SI_FTYPE_V2SI_V2SI,
21433 V2SI_FTYPE_V2SI_V2SI_COUNT,
21434 V2SI_FTYPE_V4HI_V4HI,
21435 V2SI_FTYPE_V2SF_V2SF,
21436 V2SI_FTYPE_V2SI_SI_COUNT,
21437 V2DF_FTYPE_V2DF_V2DF,
21438 V2DF_FTYPE_V2DF_V2DF_SWAP,
21439 V2DF_FTYPE_V2DF_V4SF,
21440 V2DF_FTYPE_V2DF_V2DI,
21441 V2DF_FTYPE_V2DF_DI,
21442 V2DF_FTYPE_V2DF_SI,
21443 V2SF_FTYPE_V2SF_V2SF,
21444 V1DI_FTYPE_V1DI_V1DI,
21445 V1DI_FTYPE_V1DI_V1DI_COUNT,
21446 V1DI_FTYPE_V8QI_V8QI,
21447 V1DI_FTYPE_V2SI_V2SI,
21448 V1DI_FTYPE_V1DI_SI_COUNT,
21449 UINT64_FTYPE_UINT64_UINT64,
21450 UINT_FTYPE_UINT_UINT,
21451 UINT_FTYPE_UINT_USHORT,
21452 UINT_FTYPE_UINT_UCHAR,
21453 UINT16_FTYPE_UINT16_INT,
21454 UINT8_FTYPE_UINT8_INT,
21455 V8HI_FTYPE_V8HI_INT,
21456 V4SI_FTYPE_V4SI_INT,
21457 V4HI_FTYPE_V4HI_INT,
21458 V8SF_FTYPE_V8SF_INT,
21459 V4SI_FTYPE_V8SI_INT,
21460 V4SF_FTYPE_V8SF_INT,
21461 V2DF_FTYPE_V4DF_INT,
21462 V4DF_FTYPE_V4DF_INT,
21463 V4SF_FTYPE_V4SF_INT,
21464 V2DI_FTYPE_V2DI_INT,
21465 V2DI2TI_FTYPE_V2DI_INT,
21466 V2DF_FTYPE_V2DF_INT,
21467 V16QI_FTYPE_V16QI_V16QI_V16QI,
21468 V8SF_FTYPE_V8SF_V8SF_V8SF,
21469 V4DF_FTYPE_V4DF_V4DF_V4DF,
21470 V4SF_FTYPE_V4SF_V4SF_V4SF,
21471 V2DF_FTYPE_V2DF_V2DF_V2DF,
21472 V16QI_FTYPE_V16QI_V16QI_INT,
21473 V8SI_FTYPE_V8SI_V8SI_INT,
21474 V8SI_FTYPE_V8SI_V4SI_INT,
21475 V8HI_FTYPE_V8HI_V8HI_INT,
21476 V8SF_FTYPE_V8SF_V8SF_INT,
21477 V8SF_FTYPE_V8SF_V4SF_INT,
21478 V4SI_FTYPE_V4SI_V4SI_INT,
21479 V4DF_FTYPE_V4DF_V4DF_INT,
21480 V4DF_FTYPE_V4DF_V2DF_INT,
21481 V4SF_FTYPE_V4SF_V4SF_INT,
21482 V2DI_FTYPE_V2DI_V2DI_INT,
21483 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21484 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21485 V2DF_FTYPE_V2DF_V2DF_INT,
21486 V2DI_FTYPE_V2DI_UINT_UINT,
21487 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21490 /* Special builtins with variable number of arguments. */
21491 static const struct builtin_description bdesc_special_args[] =
21493 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
21494 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
21497 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21500 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21504 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21505 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21510 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21512 /* SSE or 3DNow!A */
21513 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21514 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21531 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21534 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21537 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21538 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21541 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21542 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21543 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21545 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21546 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21547 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21548 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21549 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21551 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21552 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21553 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21554 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21555 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21556 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21557 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21559 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21560 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21561 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21563 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21564 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21565 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21566 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21567 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21568 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21569 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21570 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21573 /* Builtins with variable number of arguments. */
21574 static const struct builtin_description bdesc_args[] =
21576 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
21577 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
21578 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
21579 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21580 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21581 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21582 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21585 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21586 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21587 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21588 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21589 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21590 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21592 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21593 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21594 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21595 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21596 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21597 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21598 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21599 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21601 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21602 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21604 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21605 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21606 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21607 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21609 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21610 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21611 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21612 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21613 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21614 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21616 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21617 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21618 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21619 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21620 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21621 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21623 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21624 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21625 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21627 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21629 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21630 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21631 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21632 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21633 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21634 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21636 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21637 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21638 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21639 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21640 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21641 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21643 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21644 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21645 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21646 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21649 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21650 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21651 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21652 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21654 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21655 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21656 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21657 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21658 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21659 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21660 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21661 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21662 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21663 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21664 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21665 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21666 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21667 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21668 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21671 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21672 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21673 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21674 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21675 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21676 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21679 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21680 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21681 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21682 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21683 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21684 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21685 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21686 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21687 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21688 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21689 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21690 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21692 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21694 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21695 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21696 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21697 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21698 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21699 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21700 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21701 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21703 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21704 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21705 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21706 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21707 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21708 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21709 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21710 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21711 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21712 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21713 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21714 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21715 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21716 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21717 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21718 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21719 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21720 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21721 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21722 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21723 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21724 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21726 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21727 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21728 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21729 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21731 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21732 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21733 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21734 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21736 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21738 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21739 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21740 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21741 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21742 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21744 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21745 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21746 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21748 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21750 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21751 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21752 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21754 /* SSE MMX or 3Dnow!A */
21755 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21756 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21757 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21759 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21760 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21761 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21762 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21764 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21765 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21767 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21770 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21772 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21773 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21774 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21775 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21776 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21777 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21779 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21780 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21781 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21782 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21783 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21785 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21787 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21788 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21789 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21790 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21792 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21793 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21794 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21796 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21797 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21798 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21799 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21800 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21801 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21802 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21803 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21805 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21806 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21807 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21808 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21809 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21810 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21811 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21813 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21814 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21815 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21816 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21817 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21822 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21823 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21824 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21827 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21829 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21831 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21832 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21833 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21834 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21836 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21838 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21839 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21840 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21842 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21844 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21845 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21846 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21847 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21848 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21849 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21850 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21851 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21853 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21854 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21855 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21856 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21857 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21859 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21862 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21863 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21865 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21867 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21868 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21875 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21876 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21878 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21880 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21881 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21882 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21885 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21889 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21890 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21891 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21892 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21894 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21895 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21896 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21898 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21899 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21901 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21902 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21904 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21906 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21907 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21909 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21911 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21912 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21913 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21914 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21915 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21916 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21917 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21920 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21921 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21922 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21923 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21924 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21925 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21927 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21928 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21929 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21930 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21934 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21936 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21938 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21939 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21941 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21944 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21945 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21948 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21949 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21951 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21952 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21953 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21954 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21955 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21956 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21959 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21960 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21961 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21962 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21963 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21964 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21966 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21967 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21968 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21969 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21970 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21971 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21972 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21973 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21974 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21975 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21976 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21977 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21978 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21979 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21980 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21981 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21982 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21983 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21984 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21985 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21986 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21987 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21988 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21989 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21992 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21993 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21996 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21997 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21998 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21999 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
22000 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22001 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22002 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22003 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
22004 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22005 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
22007 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22008 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22009 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22010 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22011 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22012 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22013 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22014 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22015 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22016 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22017 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22018 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22019 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22021 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22022 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22023 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22024 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22025 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22026 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22027 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22028 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22029 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22030 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22031 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22032 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22034 /* SSE4.1 and SSE5 */
22035 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22036 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22037 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22038 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22040 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22041 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22042 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22045 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22046 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22047 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22048 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22049 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22052 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22053 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22054 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22055 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22058 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22059 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22061 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22062 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22063 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22064 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22067 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
22070 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22071 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22074 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22075 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22077 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22078 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22079 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22080 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22081 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22082 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22083 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22084 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22085 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22086 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22087 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22088 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22089 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22090 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22091 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22092 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22093 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22094 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22095 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22097 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
22098 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
22099 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
22100 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
22102 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22104 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
22105 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
22106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22108 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22113 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22114 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22115 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
22116 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
22117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
22118 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
22119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
22120 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
22121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
22123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22124 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22126 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
22129 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22130 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22131 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22132 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22133 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
22134 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
22135 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
22137 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22139 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22141 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22142 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22143 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22144 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22145 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22147 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22149 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22150 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22152 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22153 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22154 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22155 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22157 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
22158 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
22159 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
22160 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
22161 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
22162 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
22164 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22165 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22166 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22167 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22168 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22169 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22170 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22171 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22172 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22173 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22174 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22175 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22176 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22177 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22178 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22180 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
22181 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
22185 enum multi_arg_type {
22195 MULTI_ARG_3_PERMPS,
22196 MULTI_ARG_3_PERMPD,
22203 MULTI_ARG_2_DI_IMM,
22204 MULTI_ARG_2_SI_IMM,
22205 MULTI_ARG_2_HI_IMM,
22206 MULTI_ARG_2_QI_IMM,
22207 MULTI_ARG_2_SF_CMP,
22208 MULTI_ARG_2_DF_CMP,
22209 MULTI_ARG_2_DI_CMP,
22210 MULTI_ARG_2_SI_CMP,
22211 MULTI_ARG_2_HI_CMP,
22212 MULTI_ARG_2_QI_CMP,
22235 static const struct builtin_description bdesc_multi_arg[] =
22237 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22238 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22239 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22240 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22241 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22242 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22243 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22244 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22245 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22246 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22247 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22248 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22249 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22250 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22251 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22252 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22253 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22254 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22255 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22256 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22257 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22258 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22259 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22260 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22261 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, UNKNOWN, (int)MULTI_ARG_3_PERMPS },
22262 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, UNKNOWN, (int)MULTI_ARG_3_PERMPD },
22263 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22264 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22265 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22266 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22267 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22268 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22269 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22270 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22271 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22272 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22273 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22274 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22275 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22276 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22277 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22278 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22279 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22280 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22281 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22282 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22283 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22284 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22285 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22286 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22287 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22288 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22289 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22290 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22291 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22292 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22293 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22294 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22295 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int)MULTI_ARG_1_PH2PS },
22296 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int)MULTI_ARG_1_PS2PH },
22297 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22298 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22299 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22300 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22301 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22302 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22303 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22304 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22305 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22306 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22307 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22308 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22309 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22310 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22311 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22313 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
22314 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22315 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
22316 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
22317 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
22318 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
22319 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
22320 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22321 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22322 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22323 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22324 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22325 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22326 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22327 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22328 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22330 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
22331 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22332 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
22333 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
22334 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
22335 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
22336 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
22337 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22338 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22339 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22340 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22341 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22342 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22343 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22344 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22345 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22347 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
22348 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22349 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
22350 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
22351 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
22352 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
22353 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
22354 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
22355 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22356 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
22357 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
22358 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
22359 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
22360 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
22361 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
22362 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
22364 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
22365 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22366 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
22367 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
22368 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
22369 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
22370 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
22371 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
22372 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22373 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
22374 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
22375 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
22376 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
22377 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
22378 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
22379 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
22381 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22382 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22383 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22384 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22385 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22386 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22387 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22389 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22390 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22391 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22392 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22393 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22394 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22395 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22397 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22398 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22399 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22400 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22401 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22402 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22403 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22405 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22406 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22407 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22408 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22409 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22410 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22411 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22413 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22414 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22415 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22416 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22417 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22418 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22419 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22421 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22422 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22423 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22424 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22425 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22426 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22427 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22429 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22430 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22431 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22432 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22433 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22434 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22435 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22437 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22438 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22439 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22440 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22441 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22442 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22443 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22445 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
22446 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
22447 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
22448 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
22449 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, (enum rtx_code) COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
22450 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, (enum rtx_code) COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
22451 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, (enum rtx_code) COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
22452 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, (enum rtx_code) COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
22454 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22455 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22456 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22457 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22458 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22459 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22460 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22461 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22463 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22464 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22465 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22466 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22467 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22468 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22469 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22470 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22473 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22474 in the current target ISA to allow the user to compile particular modules
22475 with different target specific options that differ from the command line
22478 ix86_init_mmx_sse_builtins (void)
22480 const struct builtin_description * d;
22483 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22484 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22485 tree V1DI_type_node
22486 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22487 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22488 tree V2DI_type_node
22489 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22490 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22491 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22492 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22493 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22494 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22495 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22497 tree pchar_type_node = build_pointer_type (char_type_node);
22498 tree pcchar_type_node
22499 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22500 tree pfloat_type_node = build_pointer_type (float_type_node);
22501 tree pcfloat_type_node
22502 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22503 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22504 tree pcv2sf_type_node
22505 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22506 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22507 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22510 tree int_ftype_v4sf_v4sf
22511 = build_function_type_list (integer_type_node,
22512 V4SF_type_node, V4SF_type_node, NULL_TREE);
22513 tree v4si_ftype_v4sf_v4sf
22514 = build_function_type_list (V4SI_type_node,
22515 V4SF_type_node, V4SF_type_node, NULL_TREE);
22516 /* MMX/SSE/integer conversions. */
22517 tree int_ftype_v4sf
22518 = build_function_type_list (integer_type_node,
22519 V4SF_type_node, NULL_TREE);
22520 tree int64_ftype_v4sf
22521 = build_function_type_list (long_long_integer_type_node,
22522 V4SF_type_node, NULL_TREE);
22523 tree int_ftype_v8qi
22524 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22525 tree v4sf_ftype_v4sf_int
22526 = build_function_type_list (V4SF_type_node,
22527 V4SF_type_node, integer_type_node, NULL_TREE);
22528 tree v4sf_ftype_v4sf_int64
22529 = build_function_type_list (V4SF_type_node,
22530 V4SF_type_node, long_long_integer_type_node,
22532 tree v4sf_ftype_v4sf_v2si
22533 = build_function_type_list (V4SF_type_node,
22534 V4SF_type_node, V2SI_type_node, NULL_TREE);
22536 /* Miscellaneous. */
22537 tree v8qi_ftype_v4hi_v4hi
22538 = build_function_type_list (V8QI_type_node,
22539 V4HI_type_node, V4HI_type_node, NULL_TREE);
22540 tree v4hi_ftype_v2si_v2si
22541 = build_function_type_list (V4HI_type_node,
22542 V2SI_type_node, V2SI_type_node, NULL_TREE);
22543 tree v4sf_ftype_v4sf_v4sf_int
22544 = build_function_type_list (V4SF_type_node,
22545 V4SF_type_node, V4SF_type_node,
22546 integer_type_node, NULL_TREE);
22547 tree v2si_ftype_v4hi_v4hi
22548 = build_function_type_list (V2SI_type_node,
22549 V4HI_type_node, V4HI_type_node, NULL_TREE);
22550 tree v4hi_ftype_v4hi_int
22551 = build_function_type_list (V4HI_type_node,
22552 V4HI_type_node, integer_type_node, NULL_TREE);
22553 tree v2si_ftype_v2si_int
22554 = build_function_type_list (V2SI_type_node,
22555 V2SI_type_node, integer_type_node, NULL_TREE);
22556 tree v1di_ftype_v1di_int
22557 = build_function_type_list (V1DI_type_node,
22558 V1DI_type_node, integer_type_node, NULL_TREE);
22560 tree void_ftype_void
22561 = build_function_type (void_type_node, void_list_node);
22562 tree void_ftype_unsigned
22563 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22564 tree void_ftype_unsigned_unsigned
22565 = build_function_type_list (void_type_node, unsigned_type_node,
22566 unsigned_type_node, NULL_TREE);
22567 tree void_ftype_pcvoid_unsigned_unsigned
22568 = build_function_type_list (void_type_node, const_ptr_type_node,
22569 unsigned_type_node, unsigned_type_node,
22571 tree unsigned_ftype_void
22572 = build_function_type (unsigned_type_node, void_list_node);
22573 tree v2si_ftype_v4sf
22574 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22575 /* Loads/stores. */
22576 tree void_ftype_v8qi_v8qi_pchar
22577 = build_function_type_list (void_type_node,
22578 V8QI_type_node, V8QI_type_node,
22579 pchar_type_node, NULL_TREE);
22580 tree v4sf_ftype_pcfloat
22581 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22582 tree v4sf_ftype_v4sf_pcv2sf
22583 = build_function_type_list (V4SF_type_node,
22584 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22585 tree void_ftype_pv2sf_v4sf
22586 = build_function_type_list (void_type_node,
22587 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22588 tree void_ftype_pfloat_v4sf
22589 = build_function_type_list (void_type_node,
22590 pfloat_type_node, V4SF_type_node, NULL_TREE);
22591 tree void_ftype_pdi_di
22592 = build_function_type_list (void_type_node,
22593 pdi_type_node, long_long_unsigned_type_node,
22595 tree void_ftype_pv2di_v2di
22596 = build_function_type_list (void_type_node,
22597 pv2di_type_node, V2DI_type_node, NULL_TREE);
22598 /* Normal vector unops. */
22599 tree v4sf_ftype_v4sf
22600 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22601 tree v16qi_ftype_v16qi
22602 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22603 tree v8hi_ftype_v8hi
22604 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22605 tree v4si_ftype_v4si
22606 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22607 tree v8qi_ftype_v8qi
22608 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22609 tree v4hi_ftype_v4hi
22610 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22612 /* Normal vector binops. */
22613 tree v4sf_ftype_v4sf_v4sf
22614 = build_function_type_list (V4SF_type_node,
22615 V4SF_type_node, V4SF_type_node, NULL_TREE);
22616 tree v8qi_ftype_v8qi_v8qi
22617 = build_function_type_list (V8QI_type_node,
22618 V8QI_type_node, V8QI_type_node, NULL_TREE);
22619 tree v4hi_ftype_v4hi_v4hi
22620 = build_function_type_list (V4HI_type_node,
22621 V4HI_type_node, V4HI_type_node, NULL_TREE);
22622 tree v2si_ftype_v2si_v2si
22623 = build_function_type_list (V2SI_type_node,
22624 V2SI_type_node, V2SI_type_node, NULL_TREE);
22625 tree v1di_ftype_v1di_v1di
22626 = build_function_type_list (V1DI_type_node,
22627 V1DI_type_node, V1DI_type_node, NULL_TREE);
22628 tree v1di_ftype_v1di_v1di_int
22629 = build_function_type_list (V1DI_type_node,
22630 V1DI_type_node, V1DI_type_node,
22631 integer_type_node, NULL_TREE);
22632 tree v2si_ftype_v2sf
22633 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22634 tree v2sf_ftype_v2si
22635 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22636 tree v2si_ftype_v2si
22637 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22638 tree v2sf_ftype_v2sf
22639 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22640 tree v2sf_ftype_v2sf_v2sf
22641 = build_function_type_list (V2SF_type_node,
22642 V2SF_type_node, V2SF_type_node, NULL_TREE);
22643 tree v2si_ftype_v2sf_v2sf
22644 = build_function_type_list (V2SI_type_node,
22645 V2SF_type_node, V2SF_type_node, NULL_TREE);
22646 tree pint_type_node = build_pointer_type (integer_type_node);
22647 tree pdouble_type_node = build_pointer_type (double_type_node);
22648 tree pcdouble_type_node = build_pointer_type (
22649 build_type_variant (double_type_node, 1, 0));
22650 tree int_ftype_v2df_v2df
22651 = build_function_type_list (integer_type_node,
22652 V2DF_type_node, V2DF_type_node, NULL_TREE);
22654 tree void_ftype_pcvoid
22655 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22656 tree v4sf_ftype_v4si
22657 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22658 tree v4si_ftype_v4sf
22659 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22660 tree v2df_ftype_v4si
22661 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22662 tree v4si_ftype_v2df
22663 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22664 tree v4si_ftype_v2df_v2df
22665 = build_function_type_list (V4SI_type_node,
22666 V2DF_type_node, V2DF_type_node, NULL_TREE);
22667 tree v2si_ftype_v2df
22668 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22669 tree v4sf_ftype_v2df
22670 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22671 tree v2df_ftype_v2si
22672 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22673 tree v2df_ftype_v4sf
22674 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22675 tree int_ftype_v2df
22676 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22677 tree int64_ftype_v2df
22678 = build_function_type_list (long_long_integer_type_node,
22679 V2DF_type_node, NULL_TREE);
22680 tree v2df_ftype_v2df_int
22681 = build_function_type_list (V2DF_type_node,
22682 V2DF_type_node, integer_type_node, NULL_TREE);
22683 tree v2df_ftype_v2df_int64
22684 = build_function_type_list (V2DF_type_node,
22685 V2DF_type_node, long_long_integer_type_node,
22687 tree v4sf_ftype_v4sf_v2df
22688 = build_function_type_list (V4SF_type_node,
22689 V4SF_type_node, V2DF_type_node, NULL_TREE);
22690 tree v2df_ftype_v2df_v4sf
22691 = build_function_type_list (V2DF_type_node,
22692 V2DF_type_node, V4SF_type_node, NULL_TREE);
22693 tree v2df_ftype_v2df_v2df_int
22694 = build_function_type_list (V2DF_type_node,
22695 V2DF_type_node, V2DF_type_node,
22698 tree v2df_ftype_v2df_pcdouble
22699 = build_function_type_list (V2DF_type_node,
22700 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22701 tree void_ftype_pdouble_v2df
22702 = build_function_type_list (void_type_node,
22703 pdouble_type_node, V2DF_type_node, NULL_TREE);
22704 tree void_ftype_pint_int
22705 = build_function_type_list (void_type_node,
22706 pint_type_node, integer_type_node, NULL_TREE);
22707 tree void_ftype_v16qi_v16qi_pchar
22708 = build_function_type_list (void_type_node,
22709 V16QI_type_node, V16QI_type_node,
22710 pchar_type_node, NULL_TREE);
22711 tree v2df_ftype_pcdouble
22712 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22713 tree v2df_ftype_v2df_v2df
22714 = build_function_type_list (V2DF_type_node,
22715 V2DF_type_node, V2DF_type_node, NULL_TREE);
22716 tree v16qi_ftype_v16qi_v16qi
22717 = build_function_type_list (V16QI_type_node,
22718 V16QI_type_node, V16QI_type_node, NULL_TREE);
22719 tree v8hi_ftype_v8hi_v8hi
22720 = build_function_type_list (V8HI_type_node,
22721 V8HI_type_node, V8HI_type_node, NULL_TREE);
22722 tree v4si_ftype_v4si_v4si
22723 = build_function_type_list (V4SI_type_node,
22724 V4SI_type_node, V4SI_type_node, NULL_TREE);
22725 tree v2di_ftype_v2di_v2di
22726 = build_function_type_list (V2DI_type_node,
22727 V2DI_type_node, V2DI_type_node, NULL_TREE);
22728 tree v2di_ftype_v2df_v2df
22729 = build_function_type_list (V2DI_type_node,
22730 V2DF_type_node, V2DF_type_node, NULL_TREE);
22731 tree v2df_ftype_v2df
22732 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22733 tree v2di_ftype_v2di_int
22734 = build_function_type_list (V2DI_type_node,
22735 V2DI_type_node, integer_type_node, NULL_TREE);
22736 tree v2di_ftype_v2di_v2di_int
22737 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22738 V2DI_type_node, integer_type_node, NULL_TREE);
22739 tree v4si_ftype_v4si_int
22740 = build_function_type_list (V4SI_type_node,
22741 V4SI_type_node, integer_type_node, NULL_TREE);
22742 tree v8hi_ftype_v8hi_int
22743 = build_function_type_list (V8HI_type_node,
22744 V8HI_type_node, integer_type_node, NULL_TREE);
22745 tree v4si_ftype_v8hi_v8hi
22746 = build_function_type_list (V4SI_type_node,
22747 V8HI_type_node, V8HI_type_node, NULL_TREE);
22748 tree v1di_ftype_v8qi_v8qi
22749 = build_function_type_list (V1DI_type_node,
22750 V8QI_type_node, V8QI_type_node, NULL_TREE);
22751 tree v1di_ftype_v2si_v2si
22752 = build_function_type_list (V1DI_type_node,
22753 V2SI_type_node, V2SI_type_node, NULL_TREE);
22754 tree v2di_ftype_v16qi_v16qi
22755 = build_function_type_list (V2DI_type_node,
22756 V16QI_type_node, V16QI_type_node, NULL_TREE);
22757 tree v2di_ftype_v4si_v4si
22758 = build_function_type_list (V2DI_type_node,
22759 V4SI_type_node, V4SI_type_node, NULL_TREE);
22760 tree int_ftype_v16qi
22761 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22762 tree v16qi_ftype_pcchar
22763 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22764 tree void_ftype_pchar_v16qi
22765 = build_function_type_list (void_type_node,
22766 pchar_type_node, V16QI_type_node, NULL_TREE);
22768 tree v2di_ftype_v2di_unsigned_unsigned
22769 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22770 unsigned_type_node, unsigned_type_node,
22772 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22773 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22774 unsigned_type_node, unsigned_type_node,
22776 tree v2di_ftype_v2di_v16qi
22777 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22779 tree v2df_ftype_v2df_v2df_v2df
22780 = build_function_type_list (V2DF_type_node,
22781 V2DF_type_node, V2DF_type_node,
22782 V2DF_type_node, NULL_TREE);
22783 tree v4sf_ftype_v4sf_v4sf_v4sf
22784 = build_function_type_list (V4SF_type_node,
22785 V4SF_type_node, V4SF_type_node,
22786 V4SF_type_node, NULL_TREE);
22787 tree v8hi_ftype_v16qi
22788 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22790 tree v4si_ftype_v16qi
22791 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22793 tree v2di_ftype_v16qi
22794 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22796 tree v4si_ftype_v8hi
22797 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22799 tree v2di_ftype_v8hi
22800 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22802 tree v2di_ftype_v4si
22803 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22805 tree v2di_ftype_pv2di
22806 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22808 tree v16qi_ftype_v16qi_v16qi_int
22809 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22810 V16QI_type_node, integer_type_node,
22812 tree v16qi_ftype_v16qi_v16qi_v16qi
22813 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22814 V16QI_type_node, V16QI_type_node,
22816 tree v8hi_ftype_v8hi_v8hi_int
22817 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22818 V8HI_type_node, integer_type_node,
22820 tree v4si_ftype_v4si_v4si_int
22821 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22822 V4SI_type_node, integer_type_node,
22824 tree int_ftype_v2di_v2di
22825 = build_function_type_list (integer_type_node,
22826 V2DI_type_node, V2DI_type_node,
22828 tree int_ftype_v16qi_int_v16qi_int_int
22829 = build_function_type_list (integer_type_node,
22836 tree v16qi_ftype_v16qi_int_v16qi_int_int
22837 = build_function_type_list (V16QI_type_node,
22844 tree int_ftype_v16qi_v16qi_int
22845 = build_function_type_list (integer_type_node,
22851 /* SSE5 instructions */
22852 tree v2di_ftype_v2di_v2di_v2di
22853 = build_function_type_list (V2DI_type_node,
22859 tree v4si_ftype_v4si_v4si_v4si
22860 = build_function_type_list (V4SI_type_node,
22866 tree v4si_ftype_v4si_v4si_v2di
22867 = build_function_type_list (V4SI_type_node,
22873 tree v8hi_ftype_v8hi_v8hi_v8hi
22874 = build_function_type_list (V8HI_type_node,
22880 tree v8hi_ftype_v8hi_v8hi_v4si
22881 = build_function_type_list (V8HI_type_node,
22887 tree v2df_ftype_v2df_v2df_v16qi
22888 = build_function_type_list (V2DF_type_node,
22894 tree v4sf_ftype_v4sf_v4sf_v16qi
22895 = build_function_type_list (V4SF_type_node,
22901 tree v2di_ftype_v2di_si
22902 = build_function_type_list (V2DI_type_node,
22907 tree v4si_ftype_v4si_si
22908 = build_function_type_list (V4SI_type_node,
22913 tree v8hi_ftype_v8hi_si
22914 = build_function_type_list (V8HI_type_node,
22919 tree v16qi_ftype_v16qi_si
22920 = build_function_type_list (V16QI_type_node,
22924 tree v4sf_ftype_v4hi
22925 = build_function_type_list (V4SF_type_node,
22929 tree v4hi_ftype_v4sf
22930 = build_function_type_list (V4HI_type_node,
22934 tree v2di_ftype_v2di
22935 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22937 tree v16qi_ftype_v8hi_v8hi
22938 = build_function_type_list (V16QI_type_node,
22939 V8HI_type_node, V8HI_type_node,
22941 tree v8hi_ftype_v4si_v4si
22942 = build_function_type_list (V8HI_type_node,
22943 V4SI_type_node, V4SI_type_node,
22945 tree v8hi_ftype_v16qi_v16qi
22946 = build_function_type_list (V8HI_type_node,
22947 V16QI_type_node, V16QI_type_node,
22949 tree v4hi_ftype_v8qi_v8qi
22950 = build_function_type_list (V4HI_type_node,
22951 V8QI_type_node, V8QI_type_node,
22953 tree unsigned_ftype_unsigned_uchar
22954 = build_function_type_list (unsigned_type_node,
22955 unsigned_type_node,
22956 unsigned_char_type_node,
22958 tree unsigned_ftype_unsigned_ushort
22959 = build_function_type_list (unsigned_type_node,
22960 unsigned_type_node,
22961 short_unsigned_type_node,
22963 tree unsigned_ftype_unsigned_unsigned
22964 = build_function_type_list (unsigned_type_node,
22965 unsigned_type_node,
22966 unsigned_type_node,
22968 tree uint64_ftype_uint64_uint64
22969 = build_function_type_list (long_long_unsigned_type_node,
22970 long_long_unsigned_type_node,
22971 long_long_unsigned_type_node,
22973 tree float_ftype_float
22974 = build_function_type_list (float_type_node,
22979 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22981 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22983 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22985 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22987 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22989 tree v8sf_ftype_v8sf
22990 = build_function_type_list (V8SF_type_node,
22993 tree v8si_ftype_v8sf
22994 = build_function_type_list (V8SI_type_node,
22997 tree v8sf_ftype_v8si
22998 = build_function_type_list (V8SF_type_node,
23001 tree v4si_ftype_v4df
23002 = build_function_type_list (V4SI_type_node,
23005 tree v4df_ftype_v4df
23006 = build_function_type_list (V4DF_type_node,
23009 tree v4df_ftype_v4si
23010 = build_function_type_list (V4DF_type_node,
23013 tree v4df_ftype_v4sf
23014 = build_function_type_list (V4DF_type_node,
23017 tree v4sf_ftype_v4df
23018 = build_function_type_list (V4SF_type_node,
23021 tree v8sf_ftype_v8sf_v8sf
23022 = build_function_type_list (V8SF_type_node,
23023 V8SF_type_node, V8SF_type_node,
23025 tree v4df_ftype_v4df_v4df
23026 = build_function_type_list (V4DF_type_node,
23027 V4DF_type_node, V4DF_type_node,
23029 tree v8sf_ftype_v8sf_int
23030 = build_function_type_list (V8SF_type_node,
23031 V8SF_type_node, integer_type_node,
23033 tree v4si_ftype_v8si_int
23034 = build_function_type_list (V4SI_type_node,
23035 V8SI_type_node, integer_type_node,
23037 tree v4df_ftype_v4df_int
23038 = build_function_type_list (V4DF_type_node,
23039 V4DF_type_node, integer_type_node,
23041 tree v4sf_ftype_v8sf_int
23042 = build_function_type_list (V4SF_type_node,
23043 V8SF_type_node, integer_type_node,
23045 tree v2df_ftype_v4df_int
23046 = build_function_type_list (V2DF_type_node,
23047 V4DF_type_node, integer_type_node,
23049 tree v8sf_ftype_v8sf_v8sf_int
23050 = build_function_type_list (V8SF_type_node,
23051 V8SF_type_node, V8SF_type_node,
23054 tree v8sf_ftype_v8sf_v8sf_v8sf
23055 = build_function_type_list (V8SF_type_node,
23056 V8SF_type_node, V8SF_type_node,
23059 tree v4df_ftype_v4df_v4df_v4df
23060 = build_function_type_list (V4DF_type_node,
23061 V4DF_type_node, V4DF_type_node,
23064 tree v8si_ftype_v8si_v8si_int
23065 = build_function_type_list (V8SI_type_node,
23066 V8SI_type_node, V8SI_type_node,
23069 tree v4df_ftype_v4df_v4df_int
23070 = build_function_type_list (V4DF_type_node,
23071 V4DF_type_node, V4DF_type_node,
23074 tree v8sf_ftype_pcfloat
23075 = build_function_type_list (V8SF_type_node,
23078 tree v4df_ftype_pcdouble
23079 = build_function_type_list (V4DF_type_node,
23080 pcdouble_type_node,
23082 tree pcv4sf_type_node
23083 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
23084 tree pcv2df_type_node
23085 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
23086 tree v8sf_ftype_pcv4sf
23087 = build_function_type_list (V8SF_type_node,
23090 tree v4df_ftype_pcv2df
23091 = build_function_type_list (V4DF_type_node,
23094 tree v32qi_ftype_pcchar
23095 = build_function_type_list (V32QI_type_node,
23098 tree void_ftype_pchar_v32qi
23099 = build_function_type_list (void_type_node,
23100 pchar_type_node, V32QI_type_node,
23102 tree v8si_ftype_v8si_v4si_int
23103 = build_function_type_list (V8SI_type_node,
23104 V8SI_type_node, V4SI_type_node,
23107 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
23108 tree void_ftype_pv4di_v4di
23109 = build_function_type_list (void_type_node,
23110 pv4di_type_node, V4DI_type_node,
23112 tree v8sf_ftype_v8sf_v4sf_int
23113 = build_function_type_list (V8SF_type_node,
23114 V8SF_type_node, V4SF_type_node,
23117 tree v4df_ftype_v4df_v2df_int
23118 = build_function_type_list (V4DF_type_node,
23119 V4DF_type_node, V2DF_type_node,
23122 tree void_ftype_pfloat_v8sf
23123 = build_function_type_list (void_type_node,
23124 pfloat_type_node, V8SF_type_node,
23126 tree void_ftype_pdouble_v4df
23127 = build_function_type_list (void_type_node,
23128 pdouble_type_node, V4DF_type_node,
23130 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
23131 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
23132 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
23133 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
23134 tree pcv8sf_type_node
23135 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
23136 tree pcv4df_type_node
23137 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
23138 tree v8sf_ftype_pcv8sf_v8sf
23139 = build_function_type_list (V8SF_type_node,
23140 pcv8sf_type_node, V8SF_type_node,
23142 tree v4df_ftype_pcv4df_v4df
23143 = build_function_type_list (V4DF_type_node,
23144 pcv4df_type_node, V4DF_type_node,
23146 tree v4sf_ftype_pcv4sf_v4sf
23147 = build_function_type_list (V4SF_type_node,
23148 pcv4sf_type_node, V4SF_type_node,
23150 tree v2df_ftype_pcv2df_v2df
23151 = build_function_type_list (V2DF_type_node,
23152 pcv2df_type_node, V2DF_type_node,
23154 tree void_ftype_pv8sf_v8sf_v8sf
23155 = build_function_type_list (void_type_node,
23156 pv8sf_type_node, V8SF_type_node,
23159 tree void_ftype_pv4df_v4df_v4df
23160 = build_function_type_list (void_type_node,
23161 pv4df_type_node, V4DF_type_node,
23164 tree void_ftype_pv4sf_v4sf_v4sf
23165 = build_function_type_list (void_type_node,
23166 pv4sf_type_node, V4SF_type_node,
23169 tree void_ftype_pv2df_v2df_v2df
23170 = build_function_type_list (void_type_node,
23171 pv2df_type_node, V2DF_type_node,
23174 tree v4df_ftype_v2df
23175 = build_function_type_list (V4DF_type_node,
23178 tree v8sf_ftype_v4sf
23179 = build_function_type_list (V8SF_type_node,
23182 tree v8si_ftype_v4si
23183 = build_function_type_list (V8SI_type_node,
23186 tree v2df_ftype_v4df
23187 = build_function_type_list (V2DF_type_node,
23190 tree v4sf_ftype_v8sf
23191 = build_function_type_list (V4SF_type_node,
23194 tree v4si_ftype_v8si
23195 = build_function_type_list (V4SI_type_node,
23198 tree int_ftype_v4df
23199 = build_function_type_list (integer_type_node,
23202 tree int_ftype_v8sf
23203 = build_function_type_list (integer_type_node,
23206 tree int_ftype_v8sf_v8sf
23207 = build_function_type_list (integer_type_node,
23208 V8SF_type_node, V8SF_type_node,
23210 tree int_ftype_v4di_v4di
23211 = build_function_type_list (integer_type_node,
23212 V4DI_type_node, V4DI_type_node,
23214 tree int_ftype_v4df_v4df
23215 = build_function_type_list (integer_type_node,
23216 V4DF_type_node, V4DF_type_node,
23218 tree v8sf_ftype_v8sf_v8si
23219 = build_function_type_list (V8SF_type_node,
23220 V8SF_type_node, V8SI_type_node,
23222 tree v4df_ftype_v4df_v4di
23223 = build_function_type_list (V4DF_type_node,
23224 V4DF_type_node, V4DI_type_node,
23226 tree v4sf_ftype_v4sf_v4si
23227 = build_function_type_list (V4SF_type_node,
23228 V4SF_type_node, V4SI_type_node, NULL_TREE);
23229 tree v2df_ftype_v2df_v2di
23230 = build_function_type_list (V2DF_type_node,
23231 V2DF_type_node, V2DI_type_node, NULL_TREE);
23233 /* Integer intrinsics. */
23234 tree uint64_ftype_void
23235 = build_function_type (long_long_unsigned_type_node,
23238 = build_function_type_list (integer_type_node,
23239 integer_type_node, NULL_TREE);
23240 tree int64_ftype_int64
23241 = build_function_type_list (long_long_integer_type_node,
23242 long_long_integer_type_node,
23244 tree uint64_ftype_int
23245 = build_function_type_list (long_long_unsigned_type_node,
23246 integer_type_node, NULL_TREE);
23247 tree punsigned_type_node = build_pointer_type (unsigned_type_node);
23248 tree uint64_ftype_punsigned
23249 = build_function_type_list (long_long_unsigned_type_node,
23250 punsigned_type_node, NULL_TREE);
23251 tree ushort_ftype_ushort_int
23252 = build_function_type_list (short_unsigned_type_node,
23253 short_unsigned_type_node,
23256 tree uchar_ftype_uchar_int
23257 = build_function_type_list (unsigned_char_type_node,
23258 unsigned_char_type_node,
23264 /* Add all special builtins with variable number of operands. */
23265 for (i = 0, d = bdesc_special_args;
23266 i < ARRAY_SIZE (bdesc_special_args);
23274 switch ((enum ix86_special_builtin_type) d->flag)
23276 case VOID_FTYPE_VOID:
23277 type = void_ftype_void;
23279 case UINT64_FTYPE_VOID:
23280 type = uint64_ftype_void;
23282 case UINT64_FTYPE_PUNSIGNED:
23283 type = uint64_ftype_punsigned;
23285 case V32QI_FTYPE_PCCHAR:
23286 type = v32qi_ftype_pcchar;
23288 case V16QI_FTYPE_PCCHAR:
23289 type = v16qi_ftype_pcchar;
23291 case V8SF_FTYPE_PCV4SF:
23292 type = v8sf_ftype_pcv4sf;
23294 case V8SF_FTYPE_PCFLOAT:
23295 type = v8sf_ftype_pcfloat;
23297 case V4DF_FTYPE_PCV2DF:
23298 type = v4df_ftype_pcv2df;
23300 case V4DF_FTYPE_PCDOUBLE:
23301 type = v4df_ftype_pcdouble;
23303 case V4SF_FTYPE_PCFLOAT:
23304 type = v4sf_ftype_pcfloat;
23306 case V2DI_FTYPE_PV2DI:
23307 type = v2di_ftype_pv2di;
23309 case V2DF_FTYPE_PCDOUBLE:
23310 type = v2df_ftype_pcdouble;
23312 case V8SF_FTYPE_PCV8SF_V8SF:
23313 type = v8sf_ftype_pcv8sf_v8sf;
23315 case V4DF_FTYPE_PCV4DF_V4DF:
23316 type = v4df_ftype_pcv4df_v4df;
23318 case V4SF_FTYPE_V4SF_PCV2SF:
23319 type = v4sf_ftype_v4sf_pcv2sf;
23321 case V4SF_FTYPE_PCV4SF_V4SF:
23322 type = v4sf_ftype_pcv4sf_v4sf;
23324 case V2DF_FTYPE_V2DF_PCDOUBLE:
23325 type = v2df_ftype_v2df_pcdouble;
23327 case V2DF_FTYPE_PCV2DF_V2DF:
23328 type = v2df_ftype_pcv2df_v2df;
23330 case VOID_FTYPE_PV2SF_V4SF:
23331 type = void_ftype_pv2sf_v4sf;
23333 case VOID_FTYPE_PV4DI_V4DI:
23334 type = void_ftype_pv4di_v4di;
23336 case VOID_FTYPE_PV2DI_V2DI:
23337 type = void_ftype_pv2di_v2di;
23339 case VOID_FTYPE_PCHAR_V32QI:
23340 type = void_ftype_pchar_v32qi;
23342 case VOID_FTYPE_PCHAR_V16QI:
23343 type = void_ftype_pchar_v16qi;
23345 case VOID_FTYPE_PFLOAT_V8SF:
23346 type = void_ftype_pfloat_v8sf;
23348 case VOID_FTYPE_PFLOAT_V4SF:
23349 type = void_ftype_pfloat_v4sf;
23351 case VOID_FTYPE_PDOUBLE_V4DF:
23352 type = void_ftype_pdouble_v4df;
23354 case VOID_FTYPE_PDOUBLE_V2DF:
23355 type = void_ftype_pdouble_v2df;
23357 case VOID_FTYPE_PDI_DI:
23358 type = void_ftype_pdi_di;
23360 case VOID_FTYPE_PINT_INT:
23361 type = void_ftype_pint_int;
23363 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23364 type = void_ftype_pv8sf_v8sf_v8sf;
23366 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23367 type = void_ftype_pv4df_v4df_v4df;
23369 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23370 type = void_ftype_pv4sf_v4sf_v4sf;
23372 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23373 type = void_ftype_pv2df_v2df_v2df;
23376 gcc_unreachable ();
23379 def_builtin (d->mask, d->name, type, d->code);
23382 /* Add all builtins with variable number of operands. */
23383 for (i = 0, d = bdesc_args;
23384 i < ARRAY_SIZE (bdesc_args);
23392 switch ((enum ix86_builtin_type) d->flag)
23394 case FLOAT_FTYPE_FLOAT:
23395 type = float_ftype_float;
23397 case INT_FTYPE_V8SF_V8SF_PTEST:
23398 type = int_ftype_v8sf_v8sf;
23400 case INT_FTYPE_V4DI_V4DI_PTEST:
23401 type = int_ftype_v4di_v4di;
23403 case INT_FTYPE_V4DF_V4DF_PTEST:
23404 type = int_ftype_v4df_v4df;
23406 case INT_FTYPE_V4SF_V4SF_PTEST:
23407 type = int_ftype_v4sf_v4sf;
23409 case INT_FTYPE_V2DI_V2DI_PTEST:
23410 type = int_ftype_v2di_v2di;
23412 case INT_FTYPE_V2DF_V2DF_PTEST:
23413 type = int_ftype_v2df_v2df;
23415 case INT_FTYPE_INT:
23416 type = int_ftype_int;
23418 case UINT64_FTYPE_INT:
23419 type = uint64_ftype_int;
23421 case INT64_FTYPE_INT64:
23422 type = int64_ftype_int64;
23424 case INT64_FTYPE_V4SF:
23425 type = int64_ftype_v4sf;
23427 case INT64_FTYPE_V2DF:
23428 type = int64_ftype_v2df;
23430 case INT_FTYPE_V16QI:
23431 type = int_ftype_v16qi;
23433 case INT_FTYPE_V8QI:
23434 type = int_ftype_v8qi;
23436 case INT_FTYPE_V8SF:
23437 type = int_ftype_v8sf;
23439 case INT_FTYPE_V4DF:
23440 type = int_ftype_v4df;
23442 case INT_FTYPE_V4SF:
23443 type = int_ftype_v4sf;
23445 case INT_FTYPE_V2DF:
23446 type = int_ftype_v2df;
23448 case V16QI_FTYPE_V16QI:
23449 type = v16qi_ftype_v16qi;
23451 case V8SI_FTYPE_V8SF:
23452 type = v8si_ftype_v8sf;
23454 case V8SI_FTYPE_V4SI:
23455 type = v8si_ftype_v4si;
23457 case V8HI_FTYPE_V8HI:
23458 type = v8hi_ftype_v8hi;
23460 case V8HI_FTYPE_V16QI:
23461 type = v8hi_ftype_v16qi;
23463 case V8QI_FTYPE_V8QI:
23464 type = v8qi_ftype_v8qi;
23466 case V8SF_FTYPE_V8SF:
23467 type = v8sf_ftype_v8sf;
23469 case V8SF_FTYPE_V8SI:
23470 type = v8sf_ftype_v8si;
23472 case V8SF_FTYPE_V4SF:
23473 type = v8sf_ftype_v4sf;
23475 case V4SI_FTYPE_V4DF:
23476 type = v4si_ftype_v4df;
23478 case V4SI_FTYPE_V4SI:
23479 type = v4si_ftype_v4si;
23481 case V4SI_FTYPE_V16QI:
23482 type = v4si_ftype_v16qi;
23484 case V4SI_FTYPE_V8SI:
23485 type = v4si_ftype_v8si;
23487 case V4SI_FTYPE_V8HI:
23488 type = v4si_ftype_v8hi;
23490 case V4SI_FTYPE_V4SF:
23491 type = v4si_ftype_v4sf;
23493 case V4SI_FTYPE_V2DF:
23494 type = v4si_ftype_v2df;
23496 case V4HI_FTYPE_V4HI:
23497 type = v4hi_ftype_v4hi;
23499 case V4DF_FTYPE_V4DF:
23500 type = v4df_ftype_v4df;
23502 case V4DF_FTYPE_V4SI:
23503 type = v4df_ftype_v4si;
23505 case V4DF_FTYPE_V4SF:
23506 type = v4df_ftype_v4sf;
23508 case V4DF_FTYPE_V2DF:
23509 type = v4df_ftype_v2df;
23511 case V4SF_FTYPE_V4SF:
23512 case V4SF_FTYPE_V4SF_VEC_MERGE:
23513 type = v4sf_ftype_v4sf;
23515 case V4SF_FTYPE_V8SF:
23516 type = v4sf_ftype_v8sf;
23518 case V4SF_FTYPE_V4SI:
23519 type = v4sf_ftype_v4si;
23521 case V4SF_FTYPE_V4DF:
23522 type = v4sf_ftype_v4df;
23524 case V4SF_FTYPE_V2DF:
23525 type = v4sf_ftype_v2df;
23527 case V2DI_FTYPE_V2DI:
23528 type = v2di_ftype_v2di;
23530 case V2DI_FTYPE_V16QI:
23531 type = v2di_ftype_v16qi;
23533 case V2DI_FTYPE_V8HI:
23534 type = v2di_ftype_v8hi;
23536 case V2DI_FTYPE_V4SI:
23537 type = v2di_ftype_v4si;
23539 case V2SI_FTYPE_V2SI:
23540 type = v2si_ftype_v2si;
23542 case V2SI_FTYPE_V4SF:
23543 type = v2si_ftype_v4sf;
23545 case V2SI_FTYPE_V2DF:
23546 type = v2si_ftype_v2df;
23548 case V2SI_FTYPE_V2SF:
23549 type = v2si_ftype_v2sf;
23551 case V2DF_FTYPE_V4DF:
23552 type = v2df_ftype_v4df;
23554 case V2DF_FTYPE_V4SF:
23555 type = v2df_ftype_v4sf;
23557 case V2DF_FTYPE_V2DF:
23558 case V2DF_FTYPE_V2DF_VEC_MERGE:
23559 type = v2df_ftype_v2df;
23561 case V2DF_FTYPE_V2SI:
23562 type = v2df_ftype_v2si;
23564 case V2DF_FTYPE_V4SI:
23565 type = v2df_ftype_v4si;
23567 case V2SF_FTYPE_V2SF:
23568 type = v2sf_ftype_v2sf;
23570 case V2SF_FTYPE_V2SI:
23571 type = v2sf_ftype_v2si;
23573 case V16QI_FTYPE_V16QI_V16QI:
23574 type = v16qi_ftype_v16qi_v16qi;
23576 case V16QI_FTYPE_V8HI_V8HI:
23577 type = v16qi_ftype_v8hi_v8hi;
23579 case V8QI_FTYPE_V8QI_V8QI:
23580 type = v8qi_ftype_v8qi_v8qi;
23582 case V8QI_FTYPE_V4HI_V4HI:
23583 type = v8qi_ftype_v4hi_v4hi;
23585 case V8HI_FTYPE_V8HI_V8HI:
23586 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23587 type = v8hi_ftype_v8hi_v8hi;
23589 case V8HI_FTYPE_V16QI_V16QI:
23590 type = v8hi_ftype_v16qi_v16qi;
23592 case V8HI_FTYPE_V4SI_V4SI:
23593 type = v8hi_ftype_v4si_v4si;
23595 case V8HI_FTYPE_V8HI_SI_COUNT:
23596 type = v8hi_ftype_v8hi_int;
23598 case V8SF_FTYPE_V8SF_V8SF:
23599 type = v8sf_ftype_v8sf_v8sf;
23601 case V8SF_FTYPE_V8SF_V8SI:
23602 type = v8sf_ftype_v8sf_v8si;
23604 case V4SI_FTYPE_V4SI_V4SI:
23605 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23606 type = v4si_ftype_v4si_v4si;
23608 case V4SI_FTYPE_V8HI_V8HI:
23609 type = v4si_ftype_v8hi_v8hi;
23611 case V4SI_FTYPE_V4SF_V4SF:
23612 type = v4si_ftype_v4sf_v4sf;
23614 case V4SI_FTYPE_V2DF_V2DF:
23615 type = v4si_ftype_v2df_v2df;
23617 case V4SI_FTYPE_V4SI_SI_COUNT:
23618 type = v4si_ftype_v4si_int;
23620 case V4HI_FTYPE_V4HI_V4HI:
23621 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23622 type = v4hi_ftype_v4hi_v4hi;
23624 case V4HI_FTYPE_V8QI_V8QI:
23625 type = v4hi_ftype_v8qi_v8qi;
23627 case V4HI_FTYPE_V2SI_V2SI:
23628 type = v4hi_ftype_v2si_v2si;
23630 case V4HI_FTYPE_V4HI_SI_COUNT:
23631 type = v4hi_ftype_v4hi_int;
23633 case V4DF_FTYPE_V4DF_V4DF:
23634 type = v4df_ftype_v4df_v4df;
23636 case V4DF_FTYPE_V4DF_V4DI:
23637 type = v4df_ftype_v4df_v4di;
23639 case V4SF_FTYPE_V4SF_V4SF:
23640 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23641 type = v4sf_ftype_v4sf_v4sf;
23643 case V4SF_FTYPE_V4SF_V4SI:
23644 type = v4sf_ftype_v4sf_v4si;
23646 case V4SF_FTYPE_V4SF_V2SI:
23647 type = v4sf_ftype_v4sf_v2si;
23649 case V4SF_FTYPE_V4SF_V2DF:
23650 type = v4sf_ftype_v4sf_v2df;
23652 case V4SF_FTYPE_V4SF_DI:
23653 type = v4sf_ftype_v4sf_int64;
23655 case V4SF_FTYPE_V4SF_SI:
23656 type = v4sf_ftype_v4sf_int;
23658 case V2DI_FTYPE_V2DI_V2DI:
23659 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23660 type = v2di_ftype_v2di_v2di;
23662 case V2DI_FTYPE_V16QI_V16QI:
23663 type = v2di_ftype_v16qi_v16qi;
23665 case V2DI_FTYPE_V4SI_V4SI:
23666 type = v2di_ftype_v4si_v4si;
23668 case V2DI_FTYPE_V2DI_V16QI:
23669 type = v2di_ftype_v2di_v16qi;
23671 case V2DI_FTYPE_V2DF_V2DF:
23672 type = v2di_ftype_v2df_v2df;
23674 case V2DI_FTYPE_V2DI_SI_COUNT:
23675 type = v2di_ftype_v2di_int;
23677 case V2SI_FTYPE_V2SI_V2SI:
23678 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23679 type = v2si_ftype_v2si_v2si;
23681 case V2SI_FTYPE_V4HI_V4HI:
23682 type = v2si_ftype_v4hi_v4hi;
23684 case V2SI_FTYPE_V2SF_V2SF:
23685 type = v2si_ftype_v2sf_v2sf;
23687 case V2SI_FTYPE_V2SI_SI_COUNT:
23688 type = v2si_ftype_v2si_int;
23690 case V2DF_FTYPE_V2DF_V2DF:
23691 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23692 type = v2df_ftype_v2df_v2df;
23694 case V2DF_FTYPE_V2DF_V4SF:
23695 type = v2df_ftype_v2df_v4sf;
23697 case V2DF_FTYPE_V2DF_V2DI:
23698 type = v2df_ftype_v2df_v2di;
23700 case V2DF_FTYPE_V2DF_DI:
23701 type = v2df_ftype_v2df_int64;
23703 case V2DF_FTYPE_V2DF_SI:
23704 type = v2df_ftype_v2df_int;
23706 case V2SF_FTYPE_V2SF_V2SF:
23707 type = v2sf_ftype_v2sf_v2sf;
23709 case V1DI_FTYPE_V1DI_V1DI:
23710 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23711 type = v1di_ftype_v1di_v1di;
23713 case V1DI_FTYPE_V8QI_V8QI:
23714 type = v1di_ftype_v8qi_v8qi;
23716 case V1DI_FTYPE_V2SI_V2SI:
23717 type = v1di_ftype_v2si_v2si;
23719 case V1DI_FTYPE_V1DI_SI_COUNT:
23720 type = v1di_ftype_v1di_int;
23722 case UINT64_FTYPE_UINT64_UINT64:
23723 type = uint64_ftype_uint64_uint64;
23725 case UINT_FTYPE_UINT_UINT:
23726 type = unsigned_ftype_unsigned_unsigned;
23728 case UINT_FTYPE_UINT_USHORT:
23729 type = unsigned_ftype_unsigned_ushort;
23731 case UINT_FTYPE_UINT_UCHAR:
23732 type = unsigned_ftype_unsigned_uchar;
23734 case UINT16_FTYPE_UINT16_INT:
23735 type = ushort_ftype_ushort_int;
23737 case UINT8_FTYPE_UINT8_INT:
23738 type = uchar_ftype_uchar_int;
23740 case V8HI_FTYPE_V8HI_INT:
23741 type = v8hi_ftype_v8hi_int;
23743 case V8SF_FTYPE_V8SF_INT:
23744 type = v8sf_ftype_v8sf_int;
23746 case V4SI_FTYPE_V4SI_INT:
23747 type = v4si_ftype_v4si_int;
23749 case V4SI_FTYPE_V8SI_INT:
23750 type = v4si_ftype_v8si_int;
23752 case V4HI_FTYPE_V4HI_INT:
23753 type = v4hi_ftype_v4hi_int;
23755 case V4DF_FTYPE_V4DF_INT:
23756 type = v4df_ftype_v4df_int;
23758 case V4SF_FTYPE_V4SF_INT:
23759 type = v4sf_ftype_v4sf_int;
23761 case V4SF_FTYPE_V8SF_INT:
23762 type = v4sf_ftype_v8sf_int;
23764 case V2DI_FTYPE_V2DI_INT:
23765 case V2DI2TI_FTYPE_V2DI_INT:
23766 type = v2di_ftype_v2di_int;
23768 case V2DF_FTYPE_V2DF_INT:
23769 type = v2df_ftype_v2df_int;
23771 case V2DF_FTYPE_V4DF_INT:
23772 type = v2df_ftype_v4df_int;
23774 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23775 type = v16qi_ftype_v16qi_v16qi_v16qi;
23777 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23778 type = v8sf_ftype_v8sf_v8sf_v8sf;
23780 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23781 type = v4df_ftype_v4df_v4df_v4df;
23783 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23784 type = v4sf_ftype_v4sf_v4sf_v4sf;
23786 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23787 type = v2df_ftype_v2df_v2df_v2df;
23789 case V16QI_FTYPE_V16QI_V16QI_INT:
23790 type = v16qi_ftype_v16qi_v16qi_int;
23792 case V8SI_FTYPE_V8SI_V8SI_INT:
23793 type = v8si_ftype_v8si_v8si_int;
23795 case V8SI_FTYPE_V8SI_V4SI_INT:
23796 type = v8si_ftype_v8si_v4si_int;
23798 case V8HI_FTYPE_V8HI_V8HI_INT:
23799 type = v8hi_ftype_v8hi_v8hi_int;
23801 case V8SF_FTYPE_V8SF_V8SF_INT:
23802 type = v8sf_ftype_v8sf_v8sf_int;
23804 case V8SF_FTYPE_V8SF_V4SF_INT:
23805 type = v8sf_ftype_v8sf_v4sf_int;
23807 case V4SI_FTYPE_V4SI_V4SI_INT:
23808 type = v4si_ftype_v4si_v4si_int;
23810 case V4DF_FTYPE_V4DF_V4DF_INT:
23811 type = v4df_ftype_v4df_v4df_int;
23813 case V4DF_FTYPE_V4DF_V2DF_INT:
23814 type = v4df_ftype_v4df_v2df_int;
23816 case V4SF_FTYPE_V4SF_V4SF_INT:
23817 type = v4sf_ftype_v4sf_v4sf_int;
23819 case V2DI_FTYPE_V2DI_V2DI_INT:
23820 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23821 type = v2di_ftype_v2di_v2di_int;
23823 case V2DF_FTYPE_V2DF_V2DF_INT:
23824 type = v2df_ftype_v2df_v2df_int;
23826 case V2DI_FTYPE_V2DI_UINT_UINT:
23827 type = v2di_ftype_v2di_unsigned_unsigned;
23829 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23830 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23832 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23833 type = v1di_ftype_v1di_v1di_int;
23836 gcc_unreachable ();
23839 def_builtin_const (d->mask, d->name, type, d->code);
23842 /* pcmpestr[im] insns. */
23843 for (i = 0, d = bdesc_pcmpestr;
23844 i < ARRAY_SIZE (bdesc_pcmpestr);
23847 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23848 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23850 ftype = int_ftype_v16qi_int_v16qi_int_int;
23851 def_builtin_const (d->mask, d->name, ftype, d->code);
23854 /* pcmpistr[im] insns. */
23855 for (i = 0, d = bdesc_pcmpistr;
23856 i < ARRAY_SIZE (bdesc_pcmpistr);
23859 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23860 ftype = v16qi_ftype_v16qi_v16qi_int;
23862 ftype = int_ftype_v16qi_v16qi_int;
23863 def_builtin_const (d->mask, d->name, ftype, d->code);
23866 /* comi/ucomi insns. */
23867 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23868 if (d->mask == OPTION_MASK_ISA_SSE2)
23869 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23871 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23874 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23875 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23877 /* SSE or 3DNow!A */
23878 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23881 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23883 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23884 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23887 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23888 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23891 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23892 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23893 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23894 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23895 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23896 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23899 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23902 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23903 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23905 /* Access to the vec_init patterns. */
23906 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23907 integer_type_node, NULL_TREE);
23908 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23910 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23911 short_integer_type_node,
23912 short_integer_type_node,
23913 short_integer_type_node, NULL_TREE);
23914 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23916 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23917 char_type_node, char_type_node,
23918 char_type_node, char_type_node,
23919 char_type_node, char_type_node,
23920 char_type_node, NULL_TREE);
23921 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23923 /* Access to the vec_extract patterns. */
23924 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23925 integer_type_node, NULL_TREE);
23926 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23928 ftype = build_function_type_list (long_long_integer_type_node,
23929 V2DI_type_node, integer_type_node,
23931 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23933 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23934 integer_type_node, NULL_TREE);
23935 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23937 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23938 integer_type_node, NULL_TREE);
23939 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23941 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23942 integer_type_node, NULL_TREE);
23943 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23945 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23946 integer_type_node, NULL_TREE);
23947 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23949 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23950 integer_type_node, NULL_TREE);
23951 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23953 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23954 integer_type_node, NULL_TREE);
23955 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23957 /* Access to the vec_set patterns. */
23958 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23960 integer_type_node, NULL_TREE);
23961 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23963 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23965 integer_type_node, NULL_TREE);
23966 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23968 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23970 integer_type_node, NULL_TREE);
23971 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23973 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23975 integer_type_node, NULL_TREE);
23976 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23978 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23980 integer_type_node, NULL_TREE);
23981 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23983 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23985 integer_type_node, NULL_TREE);
23986 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23988 /* Add SSE5 multi-arg argument instructions */
23989 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23991 tree mtype = NULL_TREE;
23996 switch ((enum multi_arg_type)d->flag)
23998 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23999 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
24000 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
24001 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
24002 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
24003 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
24004 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
24005 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
24006 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
24007 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
24008 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
24009 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
24010 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
24011 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
24012 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
24013 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
24014 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
24015 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
24016 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
24017 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
24018 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
24019 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
24020 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
24021 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
24022 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
24023 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
24024 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
24025 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
24026 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
24027 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
24028 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
24029 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
24030 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
24031 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
24032 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
24033 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
24034 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
24035 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
24036 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
24037 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
24038 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
24039 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
24040 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
24041 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
24042 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
24043 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
24044 case MULTI_ARG_UNKNOWN:
24046 gcc_unreachable ();
24050 def_builtin_const (d->mask, d->name, mtype, d->code);
24054 /* Internal method for ix86_init_builtins. */
24057 ix86_init_builtins_va_builtins_abi (void)
24059 tree ms_va_ref, sysv_va_ref;
24060 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
24061 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
24062 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
24063 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
24067 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
24068 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
24069 ms_va_ref = build_reference_type (ms_va_list_type_node);
24071 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
24074 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24075 fnvoid_va_start_ms =
24076 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
24077 fnvoid_va_end_sysv =
24078 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
24079 fnvoid_va_start_sysv =
24080 build_varargs_function_type_list (void_type_node, sysv_va_ref,
24082 fnvoid_va_copy_ms =
24083 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
24085 fnvoid_va_copy_sysv =
24086 build_function_type_list (void_type_node, sysv_va_ref,
24087 sysv_va_ref, NULL_TREE);
24089 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
24090 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
24091 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
24092 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
24093 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
24094 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
24095 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
24096 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24097 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
24098 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24099 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
24100 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
24104 ix86_init_builtins (void)
24106 tree float128_type_node = make_node (REAL_TYPE);
24109 /* The __float80 type. */
24110 if (TYPE_MODE (long_double_type_node) == XFmode)
24111 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
24115 /* The __float80 type. */
24116 tree float80_type_node = make_node (REAL_TYPE);
24118 TYPE_PRECISION (float80_type_node) = 80;
24119 layout_type (float80_type_node);
24120 (*lang_hooks.types.register_builtin_type) (float80_type_node,
24124 /* The __float128 type. */
24125 TYPE_PRECISION (float128_type_node) = 128;
24126 layout_type (float128_type_node);
24127 (*lang_hooks.types.register_builtin_type) (float128_type_node,
24130 /* TFmode support builtins. */
24131 ftype = build_function_type (float128_type_node, void_list_node);
24132 decl = add_builtin_function ("__builtin_infq", ftype,
24133 IX86_BUILTIN_INFQ, BUILT_IN_MD,
24135 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
24137 decl = add_builtin_function ("__builtin_huge_valq", ftype,
24138 IX86_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
24140 ix86_builtins[(int) IX86_BUILTIN_HUGE_VALQ] = decl;
24142 /* We will expand them to normal call if SSE2 isn't available since
24143 they are used by libgcc. */
24144 ftype = build_function_type_list (float128_type_node,
24145 float128_type_node,
24147 decl = add_builtin_function ("__builtin_fabsq", ftype,
24148 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
24149 "__fabstf2", NULL_TREE);
24150 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
24151 TREE_READONLY (decl) = 1;
24153 ftype = build_function_type_list (float128_type_node,
24154 float128_type_node,
24155 float128_type_node,
24157 decl = add_builtin_function ("__builtin_copysignq", ftype,
24158 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
24159 "__copysigntf3", NULL_TREE);
24160 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
24161 TREE_READONLY (decl) = 1;
24163 ix86_init_mmx_sse_builtins ();
24165 ix86_init_builtins_va_builtins_abi ();
24168 /* Errors in the source file can cause expand_expr to return const0_rtx
24169 where we expect a vector. To avoid crashing, use one of the vector
24170 clear instructions. */
24172 safe_vector_operand (rtx x, enum machine_mode mode)
24174 if (x == const0_rtx)
24175 x = CONST0_RTX (mode);
24179 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
24182 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
24185 tree arg0 = CALL_EXPR_ARG (exp, 0);
24186 tree arg1 = CALL_EXPR_ARG (exp, 1);
24187 rtx op0 = expand_normal (arg0);
24188 rtx op1 = expand_normal (arg1);
24189 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24190 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24191 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
24193 if (VECTOR_MODE_P (mode0))
24194 op0 = safe_vector_operand (op0, mode0);
24195 if (VECTOR_MODE_P (mode1))
24196 op1 = safe_vector_operand (op1, mode1);
24198 if (optimize || !target
24199 || GET_MODE (target) != tmode
24200 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24201 target = gen_reg_rtx (tmode);
24203 if (GET_MODE (op1) == SImode && mode1 == TImode)
24205 rtx x = gen_reg_rtx (V4SImode);
24206 emit_insn (gen_sse2_loadd (x, op1));
24207 op1 = gen_lowpart (TImode, x);
24210 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
24211 op0 = copy_to_mode_reg (mode0, op0);
24212 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
24213 op1 = copy_to_mode_reg (mode1, op1);
24215 pat = GEN_FCN (icode) (target, op0, op1);
24224 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
24227 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
24228 enum multi_arg_type m_type,
24229 enum rtx_code sub_code)
24234 bool comparison_p = false;
24236 bool last_arg_constant = false;
24237 int num_memory = 0;
24240 enum machine_mode mode;
24243 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24247 case MULTI_ARG_3_SF:
24248 case MULTI_ARG_3_DF:
24249 case MULTI_ARG_3_DI:
24250 case MULTI_ARG_3_SI:
24251 case MULTI_ARG_3_SI_DI:
24252 case MULTI_ARG_3_HI:
24253 case MULTI_ARG_3_HI_SI:
24254 case MULTI_ARG_3_QI:
24255 case MULTI_ARG_3_PERMPS:
24256 case MULTI_ARG_3_PERMPD:
24260 case MULTI_ARG_2_SF:
24261 case MULTI_ARG_2_DF:
24262 case MULTI_ARG_2_DI:
24263 case MULTI_ARG_2_SI:
24264 case MULTI_ARG_2_HI:
24265 case MULTI_ARG_2_QI:
24269 case MULTI_ARG_2_DI_IMM:
24270 case MULTI_ARG_2_SI_IMM:
24271 case MULTI_ARG_2_HI_IMM:
24272 case MULTI_ARG_2_QI_IMM:
24274 last_arg_constant = true;
24277 case MULTI_ARG_1_SF:
24278 case MULTI_ARG_1_DF:
24279 case MULTI_ARG_1_DI:
24280 case MULTI_ARG_1_SI:
24281 case MULTI_ARG_1_HI:
24282 case MULTI_ARG_1_QI:
24283 case MULTI_ARG_1_SI_DI:
24284 case MULTI_ARG_1_HI_DI:
24285 case MULTI_ARG_1_HI_SI:
24286 case MULTI_ARG_1_QI_DI:
24287 case MULTI_ARG_1_QI_SI:
24288 case MULTI_ARG_1_QI_HI:
24289 case MULTI_ARG_1_PH2PS:
24290 case MULTI_ARG_1_PS2PH:
24294 case MULTI_ARG_2_SF_CMP:
24295 case MULTI_ARG_2_DF_CMP:
24296 case MULTI_ARG_2_DI_CMP:
24297 case MULTI_ARG_2_SI_CMP:
24298 case MULTI_ARG_2_HI_CMP:
24299 case MULTI_ARG_2_QI_CMP:
24301 comparison_p = true;
24304 case MULTI_ARG_2_SF_TF:
24305 case MULTI_ARG_2_DF_TF:
24306 case MULTI_ARG_2_DI_TF:
24307 case MULTI_ARG_2_SI_TF:
24308 case MULTI_ARG_2_HI_TF:
24309 case MULTI_ARG_2_QI_TF:
24314 case MULTI_ARG_UNKNOWN:
24316 gcc_unreachable ();
24319 if (optimize || !target
24320 || GET_MODE (target) != tmode
24321 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24322 target = gen_reg_rtx (tmode);
24324 gcc_assert (nargs <= 4);
24326 for (i = 0; i < nargs; i++)
24328 tree arg = CALL_EXPR_ARG (exp, i);
24329 rtx op = expand_normal (arg);
24330 int adjust = (comparison_p) ? 1 : 0;
24331 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24333 if (last_arg_constant && i == nargs-1)
24335 if (!CONST_INT_P (op))
24337 error ("last argument must be an immediate");
24338 return gen_reg_rtx (tmode);
24343 if (VECTOR_MODE_P (mode))
24344 op = safe_vector_operand (op, mode);
24346 /* If we aren't optimizing, only allow one memory operand to be
24348 if (memory_operand (op, mode))
24351 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24354 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
24356 op = force_reg (mode, op);
24360 args[i].mode = mode;
24366 pat = GEN_FCN (icode) (target, args[0].op);
24371 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24372 GEN_INT ((int)sub_code));
24373 else if (! comparison_p)
24374 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24377 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24381 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24386 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24390 gcc_unreachable ();
24400 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24401 insns with vec_merge. */
24404 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24408 tree arg0 = CALL_EXPR_ARG (exp, 0);
24409 rtx op1, op0 = expand_normal (arg0);
24410 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24411 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24413 if (optimize || !target
24414 || GET_MODE (target) != tmode
24415 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
24416 target = gen_reg_rtx (tmode);
24418 if (VECTOR_MODE_P (mode0))
24419 op0 = safe_vector_operand (op0, mode0);
24421 if ((optimize && !register_operand (op0, mode0))
24422 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
24423 op0 = copy_to_mode_reg (mode0, op0);
24426 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
24427 op1 = copy_to_mode_reg (mode0, op1);
24429 pat = GEN_FCN (icode) (target, op0, op1);
24436 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24439 ix86_expand_sse_compare (const struct builtin_description *d,
24440 tree exp, rtx target, bool swap)
24443 tree arg0 = CALL_EXPR_ARG (exp, 0);
24444 tree arg1 = CALL_EXPR_ARG (exp, 1);
24445 rtx op0 = expand_normal (arg0);
24446 rtx op1 = expand_normal (arg1);
24448 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24449 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24450 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24451 enum rtx_code comparison = d->comparison;
24453 if (VECTOR_MODE_P (mode0))
24454 op0 = safe_vector_operand (op0, mode0);
24455 if (VECTOR_MODE_P (mode1))
24456 op1 = safe_vector_operand (op1, mode1);
24458 /* Swap operands if we have a comparison that isn't available in
24462 rtx tmp = gen_reg_rtx (mode1);
24463 emit_move_insn (tmp, op1);
24468 if (optimize || !target
24469 || GET_MODE (target) != tmode
24470 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
24471 target = gen_reg_rtx (tmode);
24473 if ((optimize && !register_operand (op0, mode0))
24474 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
24475 op0 = copy_to_mode_reg (mode0, op0);
24476 if ((optimize && !register_operand (op1, mode1))
24477 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
24478 op1 = copy_to_mode_reg (mode1, op1);
24480 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24481 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24488 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24491 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24495 tree arg0 = CALL_EXPR_ARG (exp, 0);
24496 tree arg1 = CALL_EXPR_ARG (exp, 1);
24497 rtx op0 = expand_normal (arg0);
24498 rtx op1 = expand_normal (arg1);
24499 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24500 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24501 enum rtx_code comparison = d->comparison;
24503 if (VECTOR_MODE_P (mode0))
24504 op0 = safe_vector_operand (op0, mode0);
24505 if (VECTOR_MODE_P (mode1))
24506 op1 = safe_vector_operand (op1, mode1);
24508 /* Swap operands if we have a comparison that isn't available in
24510 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24517 target = gen_reg_rtx (SImode);
24518 emit_move_insn (target, const0_rtx);
24519 target = gen_rtx_SUBREG (QImode, target, 0);
24521 if ((optimize && !register_operand (op0, mode0))
24522 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24523 op0 = copy_to_mode_reg (mode0, op0);
24524 if ((optimize && !register_operand (op1, mode1))
24525 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24526 op1 = copy_to_mode_reg (mode1, op1);
24528 pat = GEN_FCN (d->icode) (op0, op1);
24532 emit_insn (gen_rtx_SET (VOIDmode,
24533 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24534 gen_rtx_fmt_ee (comparison, QImode,
24538 return SUBREG_REG (target);
24541 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24544 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24548 tree arg0 = CALL_EXPR_ARG (exp, 0);
24549 tree arg1 = CALL_EXPR_ARG (exp, 1);
24550 rtx op0 = expand_normal (arg0);
24551 rtx op1 = expand_normal (arg1);
24552 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24553 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24554 enum rtx_code comparison = d->comparison;
24556 if (VECTOR_MODE_P (mode0))
24557 op0 = safe_vector_operand (op0, mode0);
24558 if (VECTOR_MODE_P (mode1))
24559 op1 = safe_vector_operand (op1, mode1);
24561 target = gen_reg_rtx (SImode);
24562 emit_move_insn (target, const0_rtx);
24563 target = gen_rtx_SUBREG (QImode, target, 0);
24565 if ((optimize && !register_operand (op0, mode0))
24566 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24567 op0 = copy_to_mode_reg (mode0, op0);
24568 if ((optimize && !register_operand (op1, mode1))
24569 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24570 op1 = copy_to_mode_reg (mode1, op1);
24572 pat = GEN_FCN (d->icode) (op0, op1);
24576 emit_insn (gen_rtx_SET (VOIDmode,
24577 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24578 gen_rtx_fmt_ee (comparison, QImode,
24582 return SUBREG_REG (target);
24585 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24588 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24589 tree exp, rtx target)
24592 tree arg0 = CALL_EXPR_ARG (exp, 0);
24593 tree arg1 = CALL_EXPR_ARG (exp, 1);
24594 tree arg2 = CALL_EXPR_ARG (exp, 2);
24595 tree arg3 = CALL_EXPR_ARG (exp, 3);
24596 tree arg4 = CALL_EXPR_ARG (exp, 4);
24597 rtx scratch0, scratch1;
24598 rtx op0 = expand_normal (arg0);
24599 rtx op1 = expand_normal (arg1);
24600 rtx op2 = expand_normal (arg2);
24601 rtx op3 = expand_normal (arg3);
24602 rtx op4 = expand_normal (arg4);
24603 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24605 tmode0 = insn_data[d->icode].operand[0].mode;
24606 tmode1 = insn_data[d->icode].operand[1].mode;
24607 modev2 = insn_data[d->icode].operand[2].mode;
24608 modei3 = insn_data[d->icode].operand[3].mode;
24609 modev4 = insn_data[d->icode].operand[4].mode;
24610 modei5 = insn_data[d->icode].operand[5].mode;
24611 modeimm = insn_data[d->icode].operand[6].mode;
24613 if (VECTOR_MODE_P (modev2))
24614 op0 = safe_vector_operand (op0, modev2);
24615 if (VECTOR_MODE_P (modev4))
24616 op2 = safe_vector_operand (op2, modev4);
24618 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24619 op0 = copy_to_mode_reg (modev2, op0);
24620 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24621 op1 = copy_to_mode_reg (modei3, op1);
24622 if ((optimize && !register_operand (op2, modev4))
24623 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24624 op2 = copy_to_mode_reg (modev4, op2);
24625 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24626 op3 = copy_to_mode_reg (modei5, op3);
24628 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24630 error ("the fifth argument must be a 8-bit immediate");
24634 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24636 if (optimize || !target
24637 || GET_MODE (target) != tmode0
24638 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24639 target = gen_reg_rtx (tmode0);
24641 scratch1 = gen_reg_rtx (tmode1);
24643 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24645 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24647 if (optimize || !target
24648 || GET_MODE (target) != tmode1
24649 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24650 target = gen_reg_rtx (tmode1);
24652 scratch0 = gen_reg_rtx (tmode0);
24654 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24658 gcc_assert (d->flag);
24660 scratch0 = gen_reg_rtx (tmode0);
24661 scratch1 = gen_reg_rtx (tmode1);
24663 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24673 target = gen_reg_rtx (SImode);
24674 emit_move_insn (target, const0_rtx);
24675 target = gen_rtx_SUBREG (QImode, target, 0);
24678 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24679 gen_rtx_fmt_ee (EQ, QImode,
24680 gen_rtx_REG ((enum machine_mode) d->flag,
24683 return SUBREG_REG (target);
24690 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24693 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24694 tree exp, rtx target)
24697 tree arg0 = CALL_EXPR_ARG (exp, 0);
24698 tree arg1 = CALL_EXPR_ARG (exp, 1);
24699 tree arg2 = CALL_EXPR_ARG (exp, 2);
24700 rtx scratch0, scratch1;
24701 rtx op0 = expand_normal (arg0);
24702 rtx op1 = expand_normal (arg1);
24703 rtx op2 = expand_normal (arg2);
24704 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24706 tmode0 = insn_data[d->icode].operand[0].mode;
24707 tmode1 = insn_data[d->icode].operand[1].mode;
24708 modev2 = insn_data[d->icode].operand[2].mode;
24709 modev3 = insn_data[d->icode].operand[3].mode;
24710 modeimm = insn_data[d->icode].operand[4].mode;
24712 if (VECTOR_MODE_P (modev2))
24713 op0 = safe_vector_operand (op0, modev2);
24714 if (VECTOR_MODE_P (modev3))
24715 op1 = safe_vector_operand (op1, modev3);
24717 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24718 op0 = copy_to_mode_reg (modev2, op0);
24719 if ((optimize && !register_operand (op1, modev3))
24720 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24721 op1 = copy_to_mode_reg (modev3, op1);
24723 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24725 error ("the third argument must be a 8-bit immediate");
24729 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24731 if (optimize || !target
24732 || GET_MODE (target) != tmode0
24733 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24734 target = gen_reg_rtx (tmode0);
24736 scratch1 = gen_reg_rtx (tmode1);
24738 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24740 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24742 if (optimize || !target
24743 || GET_MODE (target) != tmode1
24744 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24745 target = gen_reg_rtx (tmode1);
24747 scratch0 = gen_reg_rtx (tmode0);
24749 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24753 gcc_assert (d->flag);
24755 scratch0 = gen_reg_rtx (tmode0);
24756 scratch1 = gen_reg_rtx (tmode1);
24758 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24768 target = gen_reg_rtx (SImode);
24769 emit_move_insn (target, const0_rtx);
24770 target = gen_rtx_SUBREG (QImode, target, 0);
24773 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24774 gen_rtx_fmt_ee (EQ, QImode,
24775 gen_rtx_REG ((enum machine_mode) d->flag,
24778 return SUBREG_REG (target);
24784 /* Subroutine of ix86_expand_builtin to take care of insns with
24785 variable number of operands. */
24788 ix86_expand_args_builtin (const struct builtin_description *d,
24789 tree exp, rtx target)
24791 rtx pat, real_target;
24792 unsigned int i, nargs;
24793 unsigned int nargs_constant = 0;
24794 int num_memory = 0;
24798 enum machine_mode mode;
24800 bool last_arg_count = false;
24801 enum insn_code icode = d->icode;
24802 const struct insn_data *insn_p = &insn_data[icode];
24803 enum machine_mode tmode = insn_p->operand[0].mode;
24804 enum machine_mode rmode = VOIDmode;
24806 enum rtx_code comparison = d->comparison;
24808 switch ((enum ix86_builtin_type) d->flag)
24810 case INT_FTYPE_V8SF_V8SF_PTEST:
24811 case INT_FTYPE_V4DI_V4DI_PTEST:
24812 case INT_FTYPE_V4DF_V4DF_PTEST:
24813 case INT_FTYPE_V4SF_V4SF_PTEST:
24814 case INT_FTYPE_V2DI_V2DI_PTEST:
24815 case INT_FTYPE_V2DF_V2DF_PTEST:
24816 return ix86_expand_sse_ptest (d, exp, target);
24817 case FLOAT128_FTYPE_FLOAT128:
24818 case FLOAT_FTYPE_FLOAT:
24819 case INT_FTYPE_INT:
24820 case UINT64_FTYPE_INT:
24821 case INT64_FTYPE_INT64:
24822 case INT64_FTYPE_V4SF:
24823 case INT64_FTYPE_V2DF:
24824 case INT_FTYPE_V16QI:
24825 case INT_FTYPE_V8QI:
24826 case INT_FTYPE_V8SF:
24827 case INT_FTYPE_V4DF:
24828 case INT_FTYPE_V4SF:
24829 case INT_FTYPE_V2DF:
24830 case V16QI_FTYPE_V16QI:
24831 case V8SI_FTYPE_V8SF:
24832 case V8SI_FTYPE_V4SI:
24833 case V8HI_FTYPE_V8HI:
24834 case V8HI_FTYPE_V16QI:
24835 case V8QI_FTYPE_V8QI:
24836 case V8SF_FTYPE_V8SF:
24837 case V8SF_FTYPE_V8SI:
24838 case V8SF_FTYPE_V4SF:
24839 case V4SI_FTYPE_V4SI:
24840 case V4SI_FTYPE_V16QI:
24841 case V4SI_FTYPE_V4SF:
24842 case V4SI_FTYPE_V8SI:
24843 case V4SI_FTYPE_V8HI:
24844 case V4SI_FTYPE_V4DF:
24845 case V4SI_FTYPE_V2DF:
24846 case V4HI_FTYPE_V4HI:
24847 case V4DF_FTYPE_V4DF:
24848 case V4DF_FTYPE_V4SI:
24849 case V4DF_FTYPE_V4SF:
24850 case V4DF_FTYPE_V2DF:
24851 case V4SF_FTYPE_V4SF:
24852 case V4SF_FTYPE_V4SI:
24853 case V4SF_FTYPE_V8SF:
24854 case V4SF_FTYPE_V4DF:
24855 case V4SF_FTYPE_V2DF:
24856 case V2DI_FTYPE_V2DI:
24857 case V2DI_FTYPE_V16QI:
24858 case V2DI_FTYPE_V8HI:
24859 case V2DI_FTYPE_V4SI:
24860 case V2DF_FTYPE_V2DF:
24861 case V2DF_FTYPE_V4SI:
24862 case V2DF_FTYPE_V4DF:
24863 case V2DF_FTYPE_V4SF:
24864 case V2DF_FTYPE_V2SI:
24865 case V2SI_FTYPE_V2SI:
24866 case V2SI_FTYPE_V4SF:
24867 case V2SI_FTYPE_V2SF:
24868 case V2SI_FTYPE_V2DF:
24869 case V2SF_FTYPE_V2SF:
24870 case V2SF_FTYPE_V2SI:
24873 case V4SF_FTYPE_V4SF_VEC_MERGE:
24874 case V2DF_FTYPE_V2DF_VEC_MERGE:
24875 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24876 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24877 case V16QI_FTYPE_V16QI_V16QI:
24878 case V16QI_FTYPE_V8HI_V8HI:
24879 case V8QI_FTYPE_V8QI_V8QI:
24880 case V8QI_FTYPE_V4HI_V4HI:
24881 case V8HI_FTYPE_V8HI_V8HI:
24882 case V8HI_FTYPE_V16QI_V16QI:
24883 case V8HI_FTYPE_V4SI_V4SI:
24884 case V8SF_FTYPE_V8SF_V8SF:
24885 case V8SF_FTYPE_V8SF_V8SI:
24886 case V4SI_FTYPE_V4SI_V4SI:
24887 case V4SI_FTYPE_V8HI_V8HI:
24888 case V4SI_FTYPE_V4SF_V4SF:
24889 case V4SI_FTYPE_V2DF_V2DF:
24890 case V4HI_FTYPE_V4HI_V4HI:
24891 case V4HI_FTYPE_V8QI_V8QI:
24892 case V4HI_FTYPE_V2SI_V2SI:
24893 case V4DF_FTYPE_V4DF_V4DF:
24894 case V4DF_FTYPE_V4DF_V4DI:
24895 case V4SF_FTYPE_V4SF_V4SF:
24896 case V4SF_FTYPE_V4SF_V4SI:
24897 case V4SF_FTYPE_V4SF_V2SI:
24898 case V4SF_FTYPE_V4SF_V2DF:
24899 case V4SF_FTYPE_V4SF_DI:
24900 case V4SF_FTYPE_V4SF_SI:
24901 case V2DI_FTYPE_V2DI_V2DI:
24902 case V2DI_FTYPE_V16QI_V16QI:
24903 case V2DI_FTYPE_V4SI_V4SI:
24904 case V2DI_FTYPE_V2DI_V16QI:
24905 case V2DI_FTYPE_V2DF_V2DF:
24906 case V2SI_FTYPE_V2SI_V2SI:
24907 case V2SI_FTYPE_V4HI_V4HI:
24908 case V2SI_FTYPE_V2SF_V2SF:
24909 case V2DF_FTYPE_V2DF_V2DF:
24910 case V2DF_FTYPE_V2DF_V4SF:
24911 case V2DF_FTYPE_V2DF_V2DI:
24912 case V2DF_FTYPE_V2DF_DI:
24913 case V2DF_FTYPE_V2DF_SI:
24914 case V2SF_FTYPE_V2SF_V2SF:
24915 case V1DI_FTYPE_V1DI_V1DI:
24916 case V1DI_FTYPE_V8QI_V8QI:
24917 case V1DI_FTYPE_V2SI_V2SI:
24918 if (comparison == UNKNOWN)
24919 return ix86_expand_binop_builtin (icode, exp, target);
24922 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24923 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24924 gcc_assert (comparison != UNKNOWN);
24928 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24929 case V8HI_FTYPE_V8HI_SI_COUNT:
24930 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24931 case V4SI_FTYPE_V4SI_SI_COUNT:
24932 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24933 case V4HI_FTYPE_V4HI_SI_COUNT:
24934 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24935 case V2DI_FTYPE_V2DI_SI_COUNT:
24936 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24937 case V2SI_FTYPE_V2SI_SI_COUNT:
24938 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24939 case V1DI_FTYPE_V1DI_SI_COUNT:
24941 last_arg_count = true;
24943 case UINT64_FTYPE_UINT64_UINT64:
24944 case UINT_FTYPE_UINT_UINT:
24945 case UINT_FTYPE_UINT_USHORT:
24946 case UINT_FTYPE_UINT_UCHAR:
24947 case UINT16_FTYPE_UINT16_INT:
24948 case UINT8_FTYPE_UINT8_INT:
24951 case V2DI2TI_FTYPE_V2DI_INT:
24954 nargs_constant = 1;
24956 case V8HI_FTYPE_V8HI_INT:
24957 case V8SF_FTYPE_V8SF_INT:
24958 case V4SI_FTYPE_V4SI_INT:
24959 case V4SI_FTYPE_V8SI_INT:
24960 case V4HI_FTYPE_V4HI_INT:
24961 case V4DF_FTYPE_V4DF_INT:
24962 case V4SF_FTYPE_V4SF_INT:
24963 case V4SF_FTYPE_V8SF_INT:
24964 case V2DI_FTYPE_V2DI_INT:
24965 case V2DF_FTYPE_V2DF_INT:
24966 case V2DF_FTYPE_V4DF_INT:
24968 nargs_constant = 1;
24970 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24971 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24972 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24973 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24974 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24977 case V16QI_FTYPE_V16QI_V16QI_INT:
24978 case V8HI_FTYPE_V8HI_V8HI_INT:
24979 case V8SI_FTYPE_V8SI_V8SI_INT:
24980 case V8SI_FTYPE_V8SI_V4SI_INT:
24981 case V8SF_FTYPE_V8SF_V8SF_INT:
24982 case V8SF_FTYPE_V8SF_V4SF_INT:
24983 case V4SI_FTYPE_V4SI_V4SI_INT:
24984 case V4DF_FTYPE_V4DF_V4DF_INT:
24985 case V4DF_FTYPE_V4DF_V2DF_INT:
24986 case V4SF_FTYPE_V4SF_V4SF_INT:
24987 case V2DI_FTYPE_V2DI_V2DI_INT:
24988 case V2DF_FTYPE_V2DF_V2DF_INT:
24990 nargs_constant = 1;
24992 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24995 nargs_constant = 1;
24997 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
25000 nargs_constant = 1;
25002 case V2DI_FTYPE_V2DI_UINT_UINT:
25004 nargs_constant = 2;
25006 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
25008 nargs_constant = 2;
25011 gcc_unreachable ();
25014 gcc_assert (nargs <= ARRAY_SIZE (args));
25016 if (comparison != UNKNOWN)
25018 gcc_assert (nargs == 2);
25019 return ix86_expand_sse_compare (d, exp, target, swap);
25022 if (rmode == VOIDmode || rmode == tmode)
25026 || GET_MODE (target) != tmode
25027 || ! (*insn_p->operand[0].predicate) (target, tmode))
25028 target = gen_reg_rtx (tmode);
25029 real_target = target;
25033 target = gen_reg_rtx (rmode);
25034 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
25037 for (i = 0; i < nargs; i++)
25039 tree arg = CALL_EXPR_ARG (exp, i);
25040 rtx op = expand_normal (arg);
25041 enum machine_mode mode = insn_p->operand[i + 1].mode;
25042 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
25044 if (last_arg_count && (i + 1) == nargs)
25046 /* SIMD shift insns take either an 8-bit immediate or
25047 register as count. But builtin functions take int as
25048 count. If count doesn't match, we put it in register. */
25051 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
25052 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
25053 op = copy_to_reg (op);
25056 else if ((nargs - i) <= nargs_constant)
25061 case CODE_FOR_sse4_1_roundpd:
25062 case CODE_FOR_sse4_1_roundps:
25063 case CODE_FOR_sse4_1_roundsd:
25064 case CODE_FOR_sse4_1_roundss:
25065 case CODE_FOR_sse4_1_blendps:
25066 case CODE_FOR_avx_blendpd256:
25067 case CODE_FOR_avx_vpermilv4df:
25068 case CODE_FOR_avx_roundpd256:
25069 case CODE_FOR_avx_roundps256:
25070 error ("the last argument must be a 4-bit immediate");
25073 case CODE_FOR_sse4_1_blendpd:
25074 case CODE_FOR_avx_vpermilv2df:
25075 error ("the last argument must be a 2-bit immediate");
25078 case CODE_FOR_avx_vextractf128v4df:
25079 case CODE_FOR_avx_vextractf128v8sf:
25080 case CODE_FOR_avx_vextractf128v8si:
25081 case CODE_FOR_avx_vinsertf128v4df:
25082 case CODE_FOR_avx_vinsertf128v8sf:
25083 case CODE_FOR_avx_vinsertf128v8si:
25084 error ("the last argument must be a 1-bit immediate");
25087 case CODE_FOR_avx_cmpsdv2df3:
25088 case CODE_FOR_avx_cmpssv4sf3:
25089 case CODE_FOR_avx_cmppdv2df3:
25090 case CODE_FOR_avx_cmppsv4sf3:
25091 case CODE_FOR_avx_cmppdv4df3:
25092 case CODE_FOR_avx_cmppsv8sf3:
25093 error ("the last argument must be a 5-bit immediate");
25097 switch (nargs_constant)
25100 if ((nargs - i) == nargs_constant)
25102 error ("the next to last argument must be an 8-bit immediate");
25106 error ("the last argument must be an 8-bit immediate");
25109 gcc_unreachable ();
25116 if (VECTOR_MODE_P (mode))
25117 op = safe_vector_operand (op, mode);
25119 /* If we aren't optimizing, only allow one memory operand to
25121 if (memory_operand (op, mode))
25124 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
25126 if (optimize || !match || num_memory > 1)
25127 op = copy_to_mode_reg (mode, op);
25131 op = copy_to_reg (op);
25132 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
25137 args[i].mode = mode;
25143 pat = GEN_FCN (icode) (real_target, args[0].op);
25146 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
25149 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25153 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
25154 args[2].op, args[3].op);
25157 gcc_unreachable ();
25167 /* Subroutine of ix86_expand_builtin to take care of special insns
25168 with variable number of operands. */
25171 ix86_expand_special_args_builtin (const struct builtin_description *d,
25172 tree exp, rtx target)
25176 unsigned int i, nargs, arg_adjust, memory;
25180 enum machine_mode mode;
25182 enum insn_code icode = d->icode;
25183 bool last_arg_constant = false;
25184 const struct insn_data *insn_p = &insn_data[icode];
25185 enum machine_mode tmode = insn_p->operand[0].mode;
25186 enum { load, store } klass;
25188 switch ((enum ix86_special_builtin_type) d->flag)
25190 case VOID_FTYPE_VOID:
25191 emit_insn (GEN_FCN (icode) (target));
25193 case UINT64_FTYPE_VOID:
25198 case UINT64_FTYPE_PUNSIGNED:
25199 case V2DI_FTYPE_PV2DI:
25200 case V32QI_FTYPE_PCCHAR:
25201 case V16QI_FTYPE_PCCHAR:
25202 case V8SF_FTYPE_PCV4SF:
25203 case V8SF_FTYPE_PCFLOAT:
25204 case V4SF_FTYPE_PCFLOAT:
25205 case V4DF_FTYPE_PCV2DF:
25206 case V4DF_FTYPE_PCDOUBLE:
25207 case V2DF_FTYPE_PCDOUBLE:
25212 case VOID_FTYPE_PV2SF_V4SF:
25213 case VOID_FTYPE_PV4DI_V4DI:
25214 case VOID_FTYPE_PV2DI_V2DI:
25215 case VOID_FTYPE_PCHAR_V32QI:
25216 case VOID_FTYPE_PCHAR_V16QI:
25217 case VOID_FTYPE_PFLOAT_V8SF:
25218 case VOID_FTYPE_PFLOAT_V4SF:
25219 case VOID_FTYPE_PDOUBLE_V4DF:
25220 case VOID_FTYPE_PDOUBLE_V2DF:
25221 case VOID_FTYPE_PDI_DI:
25222 case VOID_FTYPE_PINT_INT:
25225 /* Reserve memory operand for target. */
25226 memory = ARRAY_SIZE (args);
25228 case V4SF_FTYPE_V4SF_PCV2SF:
25229 case V2DF_FTYPE_V2DF_PCDOUBLE:
25234 case V8SF_FTYPE_PCV8SF_V8SF:
25235 case V4DF_FTYPE_PCV4DF_V4DF:
25236 case V4SF_FTYPE_PCV4SF_V4SF:
25237 case V2DF_FTYPE_PCV2DF_V2DF:
25242 case VOID_FTYPE_PV8SF_V8SF_V8SF:
25243 case VOID_FTYPE_PV4DF_V4DF_V4DF:
25244 case VOID_FTYPE_PV4SF_V4SF_V4SF:
25245 case VOID_FTYPE_PV2DF_V2DF_V2DF:
25248 /* Reserve memory operand for target. */
25249 memory = ARRAY_SIZE (args);
25252 gcc_unreachable ();
25255 gcc_assert (nargs <= ARRAY_SIZE (args));
25257 if (klass == store)
25259 arg = CALL_EXPR_ARG (exp, 0);
25260 op = expand_normal (arg);
25261 gcc_assert (target == 0);
25262 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25270 || GET_MODE (target) != tmode
25271 || ! (*insn_p->operand[0].predicate) (target, tmode))
25272 target = gen_reg_rtx (tmode);
25275 for (i = 0; i < nargs; i++)
25277 enum machine_mode mode = insn_p->operand[i + 1].mode;
25280 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25281 op = expand_normal (arg);
25282 match = (*insn_p->operand[i + 1].predicate) (op, mode);
25284 if (last_arg_constant && (i + 1) == nargs)
25290 error ("the last argument must be an 8-bit immediate");
25298 /* This must be the memory operand. */
25299 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25300 gcc_assert (GET_MODE (op) == mode
25301 || GET_MODE (op) == VOIDmode);
25305 /* This must be register. */
25306 if (VECTOR_MODE_P (mode))
25307 op = safe_vector_operand (op, mode);
25309 gcc_assert (GET_MODE (op) == mode
25310 || GET_MODE (op) == VOIDmode);
25311 op = copy_to_mode_reg (mode, op);
25316 args[i].mode = mode;
25322 pat = GEN_FCN (icode) (target);
25325 pat = GEN_FCN (icode) (target, args[0].op);
25328 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25331 gcc_unreachable ();
25337 return klass == store ? 0 : target;
25340 /* Return the integer constant in ARG. Constrain it to be in the range
25341 of the subparts of VEC_TYPE; issue an error if not. */
25344 get_element_number (tree vec_type, tree arg)
25346 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25348 if (!host_integerp (arg, 1)
25349 || (elt = tree_low_cst (arg, 1), elt > max))
25351 error ("selector must be an integer constant in the range 0..%wi", max);
25358 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25359 ix86_expand_vector_init. We DO have language-level syntax for this, in
25360 the form of (type){ init-list }. Except that since we can't place emms
25361 instructions from inside the compiler, we can't allow the use of MMX
25362 registers unless the user explicitly asks for it. So we do *not* define
25363 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25364 we have builtins invoked by mmintrin.h that gives us license to emit
25365 these sorts of instructions. */
25368 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25370 enum machine_mode tmode = TYPE_MODE (type);
25371 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25372 int i, n_elt = GET_MODE_NUNITS (tmode);
25373 rtvec v = rtvec_alloc (n_elt);
25375 gcc_assert (VECTOR_MODE_P (tmode));
25376 gcc_assert (call_expr_nargs (exp) == n_elt);
25378 for (i = 0; i < n_elt; ++i)
25380 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25381 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25384 if (!target || !register_operand (target, tmode))
25385 target = gen_reg_rtx (tmode);
25387 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25391 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25392 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25393 had a language-level syntax for referencing vector elements. */
25396 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25398 enum machine_mode tmode, mode0;
25403 arg0 = CALL_EXPR_ARG (exp, 0);
25404 arg1 = CALL_EXPR_ARG (exp, 1);
25406 op0 = expand_normal (arg0);
25407 elt = get_element_number (TREE_TYPE (arg0), arg1);
25409 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25410 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25411 gcc_assert (VECTOR_MODE_P (mode0));
25413 op0 = force_reg (mode0, op0);
25415 if (optimize || !target || !register_operand (target, tmode))
25416 target = gen_reg_rtx (tmode);
25418 ix86_expand_vector_extract (true, target, op0, elt);
25423 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25424 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25425 a language-level syntax for referencing vector elements. */
25428 ix86_expand_vec_set_builtin (tree exp)
25430 enum machine_mode tmode, mode1;
25431 tree arg0, arg1, arg2;
25433 rtx op0, op1, target;
25435 arg0 = CALL_EXPR_ARG (exp, 0);
25436 arg1 = CALL_EXPR_ARG (exp, 1);
25437 arg2 = CALL_EXPR_ARG (exp, 2);
25439 tmode = TYPE_MODE (TREE_TYPE (arg0));
25440 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25441 gcc_assert (VECTOR_MODE_P (tmode));
25443 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25444 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25445 elt = get_element_number (TREE_TYPE (arg0), arg2);
25447 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25448 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25450 op0 = force_reg (tmode, op0);
25451 op1 = force_reg (mode1, op1);
25453 /* OP0 is the source of these builtin functions and shouldn't be
25454 modified. Create a copy, use it and return it as target. */
25455 target = gen_reg_rtx (tmode);
25456 emit_move_insn (target, op0);
25457 ix86_expand_vector_set (true, target, op1, elt);
25462 /* Expand an expression EXP that calls a built-in function,
25463 with result going to TARGET if that's convenient
25464 (and in mode MODE if that's convenient).
25465 SUBTARGET may be used as the target for computing one of EXP's operands.
25466 IGNORE is nonzero if the value is to be ignored. */
25469 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25470 enum machine_mode mode ATTRIBUTE_UNUSED,
25471 int ignore ATTRIBUTE_UNUSED)
25473 const struct builtin_description *d;
25475 enum insn_code icode;
25476 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25477 tree arg0, arg1, arg2;
25478 rtx op0, op1, op2, pat;
25479 enum machine_mode mode0, mode1, mode2;
25480 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25482 /* Determine whether the builtin function is available under the current ISA.
25483 Originally the builtin was not created if it wasn't applicable to the
25484 current ISA based on the command line switches. With function specific
25485 options, we need to check in the context of the function making the call
25486 whether it is supported. */
25487 if (ix86_builtins_isa[fcode].isa
25488 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25490 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25491 NULL, NULL, false);
25494 error ("%qE needs unknown isa option", fndecl);
25497 gcc_assert (opts != NULL);
25498 error ("%qE needs isa option %s", fndecl, opts);
25506 case IX86_BUILTIN_MASKMOVQ:
25507 case IX86_BUILTIN_MASKMOVDQU:
25508 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25509 ? CODE_FOR_mmx_maskmovq
25510 : CODE_FOR_sse2_maskmovdqu);
25511 /* Note the arg order is different from the operand order. */
25512 arg1 = CALL_EXPR_ARG (exp, 0);
25513 arg2 = CALL_EXPR_ARG (exp, 1);
25514 arg0 = CALL_EXPR_ARG (exp, 2);
25515 op0 = expand_normal (arg0);
25516 op1 = expand_normal (arg1);
25517 op2 = expand_normal (arg2);
25518 mode0 = insn_data[icode].operand[0].mode;
25519 mode1 = insn_data[icode].operand[1].mode;
25520 mode2 = insn_data[icode].operand[2].mode;
25522 op0 = force_reg (Pmode, op0);
25523 op0 = gen_rtx_MEM (mode1, op0);
25525 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25526 op0 = copy_to_mode_reg (mode0, op0);
25527 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25528 op1 = copy_to_mode_reg (mode1, op1);
25529 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25530 op2 = copy_to_mode_reg (mode2, op2);
25531 pat = GEN_FCN (icode) (op0, op1, op2);
25537 case IX86_BUILTIN_LDMXCSR:
25538 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25539 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25540 emit_move_insn (target, op0);
25541 emit_insn (gen_sse_ldmxcsr (target));
25544 case IX86_BUILTIN_STMXCSR:
25545 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25546 emit_insn (gen_sse_stmxcsr (target));
25547 return copy_to_mode_reg (SImode, target);
25549 case IX86_BUILTIN_CLFLUSH:
25550 arg0 = CALL_EXPR_ARG (exp, 0);
25551 op0 = expand_normal (arg0);
25552 icode = CODE_FOR_sse2_clflush;
25553 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25554 op0 = copy_to_mode_reg (Pmode, op0);
25556 emit_insn (gen_sse2_clflush (op0));
25559 case IX86_BUILTIN_MONITOR:
25560 arg0 = CALL_EXPR_ARG (exp, 0);
25561 arg1 = CALL_EXPR_ARG (exp, 1);
25562 arg2 = CALL_EXPR_ARG (exp, 2);
25563 op0 = expand_normal (arg0);
25564 op1 = expand_normal (arg1);
25565 op2 = expand_normal (arg2);
25567 op0 = copy_to_mode_reg (Pmode, op0);
25569 op1 = copy_to_mode_reg (SImode, op1);
25571 op2 = copy_to_mode_reg (SImode, op2);
25572 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25575 case IX86_BUILTIN_MWAIT:
25576 arg0 = CALL_EXPR_ARG (exp, 0);
25577 arg1 = CALL_EXPR_ARG (exp, 1);
25578 op0 = expand_normal (arg0);
25579 op1 = expand_normal (arg1);
25581 op0 = copy_to_mode_reg (SImode, op0);
25583 op1 = copy_to_mode_reg (SImode, op1);
25584 emit_insn (gen_sse3_mwait (op0, op1));
25587 case IX86_BUILTIN_VEC_INIT_V2SI:
25588 case IX86_BUILTIN_VEC_INIT_V4HI:
25589 case IX86_BUILTIN_VEC_INIT_V8QI:
25590 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25592 case IX86_BUILTIN_VEC_EXT_V2DF:
25593 case IX86_BUILTIN_VEC_EXT_V2DI:
25594 case IX86_BUILTIN_VEC_EXT_V4SF:
25595 case IX86_BUILTIN_VEC_EXT_V4SI:
25596 case IX86_BUILTIN_VEC_EXT_V8HI:
25597 case IX86_BUILTIN_VEC_EXT_V2SI:
25598 case IX86_BUILTIN_VEC_EXT_V4HI:
25599 case IX86_BUILTIN_VEC_EXT_V16QI:
25600 return ix86_expand_vec_ext_builtin (exp, target);
25602 case IX86_BUILTIN_VEC_SET_V2DI:
25603 case IX86_BUILTIN_VEC_SET_V4SF:
25604 case IX86_BUILTIN_VEC_SET_V4SI:
25605 case IX86_BUILTIN_VEC_SET_V8HI:
25606 case IX86_BUILTIN_VEC_SET_V4HI:
25607 case IX86_BUILTIN_VEC_SET_V16QI:
25608 return ix86_expand_vec_set_builtin (exp);
25610 case IX86_BUILTIN_INFQ:
25611 case IX86_BUILTIN_HUGE_VALQ:
25613 REAL_VALUE_TYPE inf;
25617 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25619 tmp = validize_mem (force_const_mem (mode, tmp));
25622 target = gen_reg_rtx (mode);
25624 emit_move_insn (target, tmp);
25632 for (i = 0, d = bdesc_special_args;
25633 i < ARRAY_SIZE (bdesc_special_args);
25635 if (d->code == fcode)
25636 return ix86_expand_special_args_builtin (d, exp, target);
25638 for (i = 0, d = bdesc_args;
25639 i < ARRAY_SIZE (bdesc_args);
25641 if (d->code == fcode)
25644 case IX86_BUILTIN_FABSQ:
25645 case IX86_BUILTIN_COPYSIGNQ:
25647 /* Emit a normal call if SSE2 isn't available. */
25648 return expand_call (exp, target, ignore);
25650 return ix86_expand_args_builtin (d, exp, target);
25653 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25654 if (d->code == fcode)
25655 return ix86_expand_sse_comi (d, exp, target);
25657 for (i = 0, d = bdesc_pcmpestr;
25658 i < ARRAY_SIZE (bdesc_pcmpestr);
25660 if (d->code == fcode)
25661 return ix86_expand_sse_pcmpestr (d, exp, target);
25663 for (i = 0, d = bdesc_pcmpistr;
25664 i < ARRAY_SIZE (bdesc_pcmpistr);
25666 if (d->code == fcode)
25667 return ix86_expand_sse_pcmpistr (d, exp, target);
25669 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25670 if (d->code == fcode)
25671 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25672 (enum multi_arg_type)d->flag,
25675 gcc_unreachable ();
25678 /* Returns a function decl for a vectorized version of the builtin function
25679 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25680 if it is not available. */
25683 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25686 enum machine_mode in_mode, out_mode;
25689 if (TREE_CODE (type_out) != VECTOR_TYPE
25690 || TREE_CODE (type_in) != VECTOR_TYPE)
25693 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25694 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25695 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25696 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25700 case BUILT_IN_SQRT:
25701 if (out_mode == DFmode && out_n == 2
25702 && in_mode == DFmode && in_n == 2)
25703 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25706 case BUILT_IN_SQRTF:
25707 if (out_mode == SFmode && out_n == 4
25708 && in_mode == SFmode && in_n == 4)
25709 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25712 case BUILT_IN_LRINT:
25713 if (out_mode == SImode && out_n == 4
25714 && in_mode == DFmode && in_n == 2)
25715 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25718 case BUILT_IN_LRINTF:
25719 if (out_mode == SImode && out_n == 4
25720 && in_mode == SFmode && in_n == 4)
25721 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25724 case BUILT_IN_COPYSIGN:
25725 if (out_mode == DFmode && out_n == 2
25726 && in_mode == DFmode && in_n == 2)
25727 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
25730 case BUILT_IN_COPYSIGNF:
25731 if (out_mode == SFmode && out_n == 4
25732 && in_mode == SFmode && in_n == 4)
25733 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
25740 /* Dispatch to a handler for a vectorization library. */
25741 if (ix86_veclib_handler)
25742 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
25748 /* Handler for an SVML-style interface to
25749 a library with vectorized intrinsics. */
25752 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25755 tree fntype, new_fndecl, args;
25758 enum machine_mode el_mode, in_mode;
25761 /* The SVML is suitable for unsafe math only. */
25762 if (!flag_unsafe_math_optimizations)
25765 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25766 n = TYPE_VECTOR_SUBPARTS (type_out);
25767 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25768 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25769 if (el_mode != in_mode
25777 case BUILT_IN_LOG10:
25779 case BUILT_IN_TANH:
25781 case BUILT_IN_ATAN:
25782 case BUILT_IN_ATAN2:
25783 case BUILT_IN_ATANH:
25784 case BUILT_IN_CBRT:
25785 case BUILT_IN_SINH:
25787 case BUILT_IN_ASINH:
25788 case BUILT_IN_ASIN:
25789 case BUILT_IN_COSH:
25791 case BUILT_IN_ACOSH:
25792 case BUILT_IN_ACOS:
25793 if (el_mode != DFmode || n != 2)
25797 case BUILT_IN_EXPF:
25798 case BUILT_IN_LOGF:
25799 case BUILT_IN_LOG10F:
25800 case BUILT_IN_POWF:
25801 case BUILT_IN_TANHF:
25802 case BUILT_IN_TANF:
25803 case BUILT_IN_ATANF:
25804 case BUILT_IN_ATAN2F:
25805 case BUILT_IN_ATANHF:
25806 case BUILT_IN_CBRTF:
25807 case BUILT_IN_SINHF:
25808 case BUILT_IN_SINF:
25809 case BUILT_IN_ASINHF:
25810 case BUILT_IN_ASINF:
25811 case BUILT_IN_COSHF:
25812 case BUILT_IN_COSF:
25813 case BUILT_IN_ACOSHF:
25814 case BUILT_IN_ACOSF:
25815 if (el_mode != SFmode || n != 4)
25823 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25825 if (fn == BUILT_IN_LOGF)
25826 strcpy (name, "vmlsLn4");
25827 else if (fn == BUILT_IN_LOG)
25828 strcpy (name, "vmldLn2");
25831 sprintf (name, "vmls%s", bname+10);
25832 name[strlen (name)-1] = '4';
25835 sprintf (name, "vmld%s2", bname+10);
25837 /* Convert to uppercase. */
25841 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25842 args = TREE_CHAIN (args))
25846 fntype = build_function_type_list (type_out, type_in, NULL);
25848 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25850 /* Build a function declaration for the vectorized function. */
25851 new_fndecl = build_decl (BUILTINS_LOCATION,
25852 FUNCTION_DECL, get_identifier (name), fntype);
25853 TREE_PUBLIC (new_fndecl) = 1;
25854 DECL_EXTERNAL (new_fndecl) = 1;
25855 DECL_IS_NOVOPS (new_fndecl) = 1;
25856 TREE_READONLY (new_fndecl) = 1;
25861 /* Handler for an ACML-style interface to
25862 a library with vectorized intrinsics. */
25865 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25867 char name[20] = "__vr.._";
25868 tree fntype, new_fndecl, args;
25871 enum machine_mode el_mode, in_mode;
25874 /* The ACML is 64bits only and suitable for unsafe math only as
25875 it does not correctly support parts of IEEE with the required
25876 precision such as denormals. */
25878 || !flag_unsafe_math_optimizations)
25881 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25882 n = TYPE_VECTOR_SUBPARTS (type_out);
25883 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25884 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25885 if (el_mode != in_mode
25895 case BUILT_IN_LOG2:
25896 case BUILT_IN_LOG10:
25899 if (el_mode != DFmode
25904 case BUILT_IN_SINF:
25905 case BUILT_IN_COSF:
25906 case BUILT_IN_EXPF:
25907 case BUILT_IN_POWF:
25908 case BUILT_IN_LOGF:
25909 case BUILT_IN_LOG2F:
25910 case BUILT_IN_LOG10F:
25913 if (el_mode != SFmode
25922 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25923 sprintf (name + 7, "%s", bname+10);
25926 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25927 args = TREE_CHAIN (args))
25931 fntype = build_function_type_list (type_out, type_in, NULL);
25933 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25935 /* Build a function declaration for the vectorized function. */
25936 new_fndecl = build_decl (BUILTINS_LOCATION,
25937 FUNCTION_DECL, get_identifier (name), fntype);
25938 TREE_PUBLIC (new_fndecl) = 1;
25939 DECL_EXTERNAL (new_fndecl) = 1;
25940 DECL_IS_NOVOPS (new_fndecl) = 1;
25941 TREE_READONLY (new_fndecl) = 1;
25947 /* Returns a decl of a function that implements conversion of an integer vector
25948 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25949 side of the conversion.
25950 Return NULL_TREE if it is not available. */
25953 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25955 if (TREE_CODE (type) != VECTOR_TYPE)
25961 switch (TYPE_MODE (type))
25964 return TYPE_UNSIGNED (type)
25965 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
25966 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25971 case FIX_TRUNC_EXPR:
25972 switch (TYPE_MODE (type))
25975 return TYPE_UNSIGNED (type)
25977 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25987 /* Returns a code for a target-specific builtin that implements
25988 reciprocal of the function, or NULL_TREE if not available. */
25991 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25992 bool sqrt ATTRIBUTE_UNUSED)
25994 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25995 && flag_finite_math_only && !flag_trapping_math
25996 && flag_unsafe_math_optimizations))
26000 /* Machine dependent builtins. */
26003 /* Vectorized version of sqrt to rsqrt conversion. */
26004 case IX86_BUILTIN_SQRTPS_NR:
26005 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
26011 /* Normal builtins. */
26014 /* Sqrt to rsqrt conversion. */
26015 case BUILT_IN_SQRTF:
26016 return ix86_builtins[IX86_BUILTIN_RSQRTF];
26023 /* Store OPERAND to the memory after reload is completed. This means
26024 that we can't easily use assign_stack_local. */
26026 ix86_force_to_memory (enum machine_mode mode, rtx operand)
26030 gcc_assert (reload_completed);
26031 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
26033 result = gen_rtx_MEM (mode,
26034 gen_rtx_PLUS (Pmode,
26036 GEN_INT (-RED_ZONE_SIZE)));
26037 emit_move_insn (result, operand);
26039 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
26045 operand = gen_lowpart (DImode, operand);
26049 gen_rtx_SET (VOIDmode,
26050 gen_rtx_MEM (DImode,
26051 gen_rtx_PRE_DEC (DImode,
26052 stack_pointer_rtx)),
26056 gcc_unreachable ();
26058 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26067 split_di (&operand, 1, operands, operands + 1);
26069 gen_rtx_SET (VOIDmode,
26070 gen_rtx_MEM (SImode,
26071 gen_rtx_PRE_DEC (Pmode,
26072 stack_pointer_rtx)),
26075 gen_rtx_SET (VOIDmode,
26076 gen_rtx_MEM (SImode,
26077 gen_rtx_PRE_DEC (Pmode,
26078 stack_pointer_rtx)),
26083 /* Store HImodes as SImodes. */
26084 operand = gen_lowpart (SImode, operand);
26088 gen_rtx_SET (VOIDmode,
26089 gen_rtx_MEM (GET_MODE (operand),
26090 gen_rtx_PRE_DEC (SImode,
26091 stack_pointer_rtx)),
26095 gcc_unreachable ();
26097 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26102 /* Free operand from the memory. */
26104 ix86_free_from_memory (enum machine_mode mode)
26106 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
26110 if (mode == DImode || TARGET_64BIT)
26114 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26115 to pop or add instruction if registers are available. */
26116 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26117 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26122 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26123 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26125 static const enum reg_class *
26126 i386_ira_cover_classes (void)
26128 static const enum reg_class sse_fpmath_classes[] = {
26129 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26131 static const enum reg_class no_sse_fpmath_classes[] = {
26132 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26135 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26138 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26139 QImode must go into class Q_REGS.
26140 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26141 movdf to do mem-to-mem moves through integer regs. */
26143 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26145 enum machine_mode mode = GET_MODE (x);
26147 /* We're only allowed to return a subclass of CLASS. Many of the
26148 following checks fail for NO_REGS, so eliminate that early. */
26149 if (regclass == NO_REGS)
26152 /* All classes can load zeros. */
26153 if (x == CONST0_RTX (mode))
26156 /* Force constants into memory if we are loading a (nonzero) constant into
26157 an MMX or SSE register. This is because there are no MMX/SSE instructions
26158 to load from a constant. */
26160 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26163 /* Prefer SSE regs only, if we can use them for math. */
26164 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26165 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26167 /* Floating-point constants need more complex checks. */
26168 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26170 /* General regs can load everything. */
26171 if (reg_class_subset_p (regclass, GENERAL_REGS))
26174 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26175 zero above. We only want to wind up preferring 80387 registers if
26176 we plan on doing computation with them. */
26178 && standard_80387_constant_p (x))
26180 /* Limit class to non-sse. */
26181 if (regclass == FLOAT_SSE_REGS)
26183 if (regclass == FP_TOP_SSE_REGS)
26185 if (regclass == FP_SECOND_SSE_REGS)
26186 return FP_SECOND_REG;
26187 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26194 /* Generally when we see PLUS here, it's the function invariant
26195 (plus soft-fp const_int). Which can only be computed into general
26197 if (GET_CODE (x) == PLUS)
26198 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26200 /* QImode constants are easy to load, but non-constant QImode data
26201 must go into Q_REGS. */
26202 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26204 if (reg_class_subset_p (regclass, Q_REGS))
26206 if (reg_class_subset_p (Q_REGS, regclass))
26214 /* Discourage putting floating-point values in SSE registers unless
26215 SSE math is being used, and likewise for the 387 registers. */
26217 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26219 enum machine_mode mode = GET_MODE (x);
26221 /* Restrict the output reload class to the register bank that we are doing
26222 math on. If we would like not to return a subset of CLASS, reject this
26223 alternative: if reload cannot do this, it will still use its choice. */
26224 mode = GET_MODE (x);
26225 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26226 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26228 if (X87_FLOAT_MODE_P (mode))
26230 if (regclass == FP_TOP_SSE_REGS)
26232 else if (regclass == FP_SECOND_SSE_REGS)
26233 return FP_SECOND_REG;
26235 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26241 static enum reg_class
26242 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
26243 enum machine_mode mode,
26244 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26246 /* QImode spills from non-QI registers require
26247 intermediate register on 32bit targets. */
26248 if (!in_p && mode == QImode && !TARGET_64BIT
26249 && (rclass == GENERAL_REGS
26250 || rclass == LEGACY_REGS
26251 || rclass == INDEX_REGS))
26260 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26261 regno = true_regnum (x);
26263 /* Return Q_REGS if the operand is in memory. */
26271 /* If we are copying between general and FP registers, we need a memory
26272 location. The same is true for SSE and MMX registers.
26274 To optimize register_move_cost performance, allow inline variant.
26276 The macro can't work reliably when one of the CLASSES is class containing
26277 registers from multiple units (SSE, MMX, integer). We avoid this by never
26278 combining those units in single alternative in the machine description.
26279 Ensure that this constraint holds to avoid unexpected surprises.
26281 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26282 enforce these sanity checks. */
26285 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26286 enum machine_mode mode, int strict)
26288 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26289 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26290 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26291 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26292 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26293 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26295 gcc_assert (!strict);
26299 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26302 /* ??? This is a lie. We do have moves between mmx/general, and for
26303 mmx/sse2. But by saying we need secondary memory we discourage the
26304 register allocator from using the mmx registers unless needed. */
26305 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26308 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26310 /* SSE1 doesn't have any direct moves from other classes. */
26314 /* If the target says that inter-unit moves are more expensive
26315 than moving through memory, then don't generate them. */
26316 if (!TARGET_INTER_UNIT_MOVES)
26319 /* Between SSE and general, we have moves no larger than word size. */
26320 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26328 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26329 enum machine_mode mode, int strict)
26331 return inline_secondary_memory_needed (class1, class2, mode, strict);
26334 /* Return true if the registers in CLASS cannot represent the change from
26335 modes FROM to TO. */
26338 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26339 enum reg_class regclass)
26344 /* x87 registers can't do subreg at all, as all values are reformatted
26345 to extended precision. */
26346 if (MAYBE_FLOAT_CLASS_P (regclass))
26349 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26351 /* Vector registers do not support QI or HImode loads. If we don't
26352 disallow a change to these modes, reload will assume it's ok to
26353 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26354 the vec_dupv4hi pattern. */
26355 if (GET_MODE_SIZE (from) < 4)
26358 /* Vector registers do not support subreg with nonzero offsets, which
26359 are otherwise valid for integer registers. Since we can't see
26360 whether we have a nonzero offset from here, prohibit all
26361 nonparadoxical subregs changing size. */
26362 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26369 /* Return the cost of moving data of mode M between a
26370 register and memory. A value of 2 is the default; this cost is
26371 relative to those in `REGISTER_MOVE_COST'.
26373 This function is used extensively by register_move_cost that is used to
26374 build tables at startup. Make it inline in this case.
26375 When IN is 2, return maximum of in and out move cost.
26377 If moving between registers and memory is more expensive than
26378 between two registers, you should define this macro to express the
26381 Model also increased moving costs of QImode registers in non
26385 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26389 if (FLOAT_CLASS_P (regclass))
26407 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26408 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26410 if (SSE_CLASS_P (regclass))
26413 switch (GET_MODE_SIZE (mode))
26428 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26429 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26431 if (MMX_CLASS_P (regclass))
26434 switch (GET_MODE_SIZE (mode))
26446 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26447 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26449 switch (GET_MODE_SIZE (mode))
26452 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26455 return ix86_cost->int_store[0];
26456 if (TARGET_PARTIAL_REG_DEPENDENCY
26457 && optimize_function_for_speed_p (cfun))
26458 cost = ix86_cost->movzbl_load;
26460 cost = ix86_cost->int_load[0];
26462 return MAX (cost, ix86_cost->int_store[0]);
26468 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26470 return ix86_cost->movzbl_load;
26472 return ix86_cost->int_store[0] + 4;
26477 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26478 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26480 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26481 if (mode == TFmode)
26484 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26486 cost = ix86_cost->int_load[2];
26488 cost = ix86_cost->int_store[2];
26489 return (cost * (((int) GET_MODE_SIZE (mode)
26490 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26495 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
26497 return inline_memory_move_cost (mode, regclass, in);
26501 /* Return the cost of moving data from a register in class CLASS1 to
26502 one in class CLASS2.
26504 It is not required that the cost always equal 2 when FROM is the same as TO;
26505 on some machines it is expensive to move between registers if they are not
26506 general registers. */
26509 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
26510 enum reg_class class2)
26512 /* In case we require secondary memory, compute cost of the store followed
26513 by load. In order to avoid bad register allocation choices, we need
26514 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26516 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26520 cost += inline_memory_move_cost (mode, class1, 2);
26521 cost += inline_memory_move_cost (mode, class2, 2);
26523 /* In case of copying from general_purpose_register we may emit multiple
26524 stores followed by single load causing memory size mismatch stall.
26525 Count this as arbitrarily high cost of 20. */
26526 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26529 /* In the case of FP/MMX moves, the registers actually overlap, and we
26530 have to switch modes in order to treat them differently. */
26531 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26532 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26538 /* Moves between SSE/MMX and integer unit are expensive. */
26539 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26540 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26542 /* ??? By keeping returned value relatively high, we limit the number
26543 of moves between integer and MMX/SSE registers for all targets.
26544 Additionally, high value prevents problem with x86_modes_tieable_p(),
26545 where integer modes in MMX/SSE registers are not tieable
26546 because of missing QImode and HImode moves to, from or between
26547 MMX/SSE registers. */
26548 return MAX (8, ix86_cost->mmxsse_to_integer);
26550 if (MAYBE_FLOAT_CLASS_P (class1))
26551 return ix86_cost->fp_move;
26552 if (MAYBE_SSE_CLASS_P (class1))
26553 return ix86_cost->sse_move;
26554 if (MAYBE_MMX_CLASS_P (class1))
26555 return ix86_cost->mmx_move;
26559 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26562 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26564 /* Flags and only flags can only hold CCmode values. */
26565 if (CC_REGNO_P (regno))
26566 return GET_MODE_CLASS (mode) == MODE_CC;
26567 if (GET_MODE_CLASS (mode) == MODE_CC
26568 || GET_MODE_CLASS (mode) == MODE_RANDOM
26569 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26571 if (FP_REGNO_P (regno))
26572 return VALID_FP_MODE_P (mode);
26573 if (SSE_REGNO_P (regno))
26575 /* We implement the move patterns for all vector modes into and
26576 out of SSE registers, even when no operation instructions
26577 are available. OImode move is available only when AVX is
26579 return ((TARGET_AVX && mode == OImode)
26580 || VALID_AVX256_REG_MODE (mode)
26581 || VALID_SSE_REG_MODE (mode)
26582 || VALID_SSE2_REG_MODE (mode)
26583 || VALID_MMX_REG_MODE (mode)
26584 || VALID_MMX_REG_MODE_3DNOW (mode));
26586 if (MMX_REGNO_P (regno))
26588 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26589 so if the register is available at all, then we can move data of
26590 the given mode into or out of it. */
26591 return (VALID_MMX_REG_MODE (mode)
26592 || VALID_MMX_REG_MODE_3DNOW (mode));
26595 if (mode == QImode)
26597 /* Take care for QImode values - they can be in non-QI regs,
26598 but then they do cause partial register stalls. */
26599 if (regno <= BX_REG || TARGET_64BIT)
26601 if (!TARGET_PARTIAL_REG_STALL)
26603 return reload_in_progress || reload_completed;
26605 /* We handle both integer and floats in the general purpose registers. */
26606 else if (VALID_INT_MODE_P (mode))
26608 else if (VALID_FP_MODE_P (mode))
26610 else if (VALID_DFP_MODE_P (mode))
26612 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26613 on to use that value in smaller contexts, this can easily force a
26614 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26615 supporting DImode, allow it. */
26616 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26622 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26623 tieable integer mode. */
26626 ix86_tieable_integer_mode_p (enum machine_mode mode)
26635 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26638 return TARGET_64BIT;
26645 /* Return true if MODE1 is accessible in a register that can hold MODE2
26646 without copying. That is, all register classes that can hold MODE2
26647 can also hold MODE1. */
26650 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26652 if (mode1 == mode2)
26655 if (ix86_tieable_integer_mode_p (mode1)
26656 && ix86_tieable_integer_mode_p (mode2))
26659 /* MODE2 being XFmode implies fp stack or general regs, which means we
26660 can tie any smaller floating point modes to it. Note that we do not
26661 tie this with TFmode. */
26662 if (mode2 == XFmode)
26663 return mode1 == SFmode || mode1 == DFmode;
26665 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26666 that we can tie it with SFmode. */
26667 if (mode2 == DFmode)
26668 return mode1 == SFmode;
26670 /* If MODE2 is only appropriate for an SSE register, then tie with
26671 any other mode acceptable to SSE registers. */
26672 if (GET_MODE_SIZE (mode2) == 16
26673 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26674 return (GET_MODE_SIZE (mode1) == 16
26675 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26677 /* If MODE2 is appropriate for an MMX register, then tie
26678 with any other mode acceptable to MMX registers. */
26679 if (GET_MODE_SIZE (mode2) == 8
26680 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26681 return (GET_MODE_SIZE (mode1) == 8
26682 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26687 /* Compute a (partial) cost for rtx X. Return true if the complete
26688 cost has been computed, and false if subexpressions should be
26689 scanned. In either case, *TOTAL contains the cost result. */
26692 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26694 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26695 enum machine_mode mode = GET_MODE (x);
26696 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26704 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26706 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26708 else if (flag_pic && SYMBOLIC_CONST (x)
26710 || (!GET_CODE (x) != LABEL_REF
26711 && (GET_CODE (x) != SYMBOL_REF
26712 || !SYMBOL_REF_LOCAL_P (x)))))
26719 if (mode == VOIDmode)
26722 switch (standard_80387_constant_p (x))
26727 default: /* Other constants */
26732 /* Start with (MEM (SYMBOL_REF)), since that's where
26733 it'll probably end up. Add a penalty for size. */
26734 *total = (COSTS_N_INSNS (1)
26735 + (flag_pic != 0 && !TARGET_64BIT)
26736 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26742 /* The zero extensions is often completely free on x86_64, so make
26743 it as cheap as possible. */
26744 if (TARGET_64BIT && mode == DImode
26745 && GET_MODE (XEXP (x, 0)) == SImode)
26747 else if (TARGET_ZERO_EXTEND_WITH_AND)
26748 *total = cost->add;
26750 *total = cost->movzx;
26754 *total = cost->movsx;
26758 if (CONST_INT_P (XEXP (x, 1))
26759 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26761 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26764 *total = cost->add;
26767 if ((value == 2 || value == 3)
26768 && cost->lea <= cost->shift_const)
26770 *total = cost->lea;
26780 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26782 if (CONST_INT_P (XEXP (x, 1)))
26784 if (INTVAL (XEXP (x, 1)) > 32)
26785 *total = cost->shift_const + COSTS_N_INSNS (2);
26787 *total = cost->shift_const * 2;
26791 if (GET_CODE (XEXP (x, 1)) == AND)
26792 *total = cost->shift_var * 2;
26794 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26799 if (CONST_INT_P (XEXP (x, 1)))
26800 *total = cost->shift_const;
26802 *total = cost->shift_var;
26807 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26809 /* ??? SSE scalar cost should be used here. */
26810 *total = cost->fmul;
26813 else if (X87_FLOAT_MODE_P (mode))
26815 *total = cost->fmul;
26818 else if (FLOAT_MODE_P (mode))
26820 /* ??? SSE vector cost should be used here. */
26821 *total = cost->fmul;
26826 rtx op0 = XEXP (x, 0);
26827 rtx op1 = XEXP (x, 1);
26829 if (CONST_INT_P (XEXP (x, 1)))
26831 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26832 for (nbits = 0; value != 0; value &= value - 1)
26836 /* This is arbitrary. */
26839 /* Compute costs correctly for widening multiplication. */
26840 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26841 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26842 == GET_MODE_SIZE (mode))
26844 int is_mulwiden = 0;
26845 enum machine_mode inner_mode = GET_MODE (op0);
26847 if (GET_CODE (op0) == GET_CODE (op1))
26848 is_mulwiden = 1, op1 = XEXP (op1, 0);
26849 else if (CONST_INT_P (op1))
26851 if (GET_CODE (op0) == SIGN_EXTEND)
26852 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26855 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26859 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26862 *total = (cost->mult_init[MODE_INDEX (mode)]
26863 + nbits * cost->mult_bit
26864 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26873 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26874 /* ??? SSE cost should be used here. */
26875 *total = cost->fdiv;
26876 else if (X87_FLOAT_MODE_P (mode))
26877 *total = cost->fdiv;
26878 else if (FLOAT_MODE_P (mode))
26879 /* ??? SSE vector cost should be used here. */
26880 *total = cost->fdiv;
26882 *total = cost->divide[MODE_INDEX (mode)];
26886 if (GET_MODE_CLASS (mode) == MODE_INT
26887 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26889 if (GET_CODE (XEXP (x, 0)) == PLUS
26890 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26891 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26892 && CONSTANT_P (XEXP (x, 1)))
26894 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26895 if (val == 2 || val == 4 || val == 8)
26897 *total = cost->lea;
26898 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26899 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26900 outer_code, speed);
26901 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26905 else if (GET_CODE (XEXP (x, 0)) == MULT
26906 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26908 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26909 if (val == 2 || val == 4 || val == 8)
26911 *total = cost->lea;
26912 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26913 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26917 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26919 *total = cost->lea;
26920 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26921 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26922 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26929 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26931 /* ??? SSE cost should be used here. */
26932 *total = cost->fadd;
26935 else if (X87_FLOAT_MODE_P (mode))
26937 *total = cost->fadd;
26940 else if (FLOAT_MODE_P (mode))
26942 /* ??? SSE vector cost should be used here. */
26943 *total = cost->fadd;
26951 if (!TARGET_64BIT && mode == DImode)
26953 *total = (cost->add * 2
26954 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26955 << (GET_MODE (XEXP (x, 0)) != DImode))
26956 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26957 << (GET_MODE (XEXP (x, 1)) != DImode)));
26963 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26965 /* ??? SSE cost should be used here. */
26966 *total = cost->fchs;
26969 else if (X87_FLOAT_MODE_P (mode))
26971 *total = cost->fchs;
26974 else if (FLOAT_MODE_P (mode))
26976 /* ??? SSE vector cost should be used here. */
26977 *total = cost->fchs;
26983 if (!TARGET_64BIT && mode == DImode)
26984 *total = cost->add * 2;
26986 *total = cost->add;
26990 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26991 && XEXP (XEXP (x, 0), 1) == const1_rtx
26992 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26993 && XEXP (x, 1) == const0_rtx)
26995 /* This kind of construct is implemented using test[bwl].
26996 Treat it as if we had an AND. */
26997 *total = (cost->add
26998 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26999 + rtx_cost (const1_rtx, outer_code, speed));
27005 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
27010 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27011 /* ??? SSE cost should be used here. */
27012 *total = cost->fabs;
27013 else if (X87_FLOAT_MODE_P (mode))
27014 *total = cost->fabs;
27015 else if (FLOAT_MODE_P (mode))
27016 /* ??? SSE vector cost should be used here. */
27017 *total = cost->fabs;
27021 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27022 /* ??? SSE cost should be used here. */
27023 *total = cost->fsqrt;
27024 else if (X87_FLOAT_MODE_P (mode))
27025 *total = cost->fsqrt;
27026 else if (FLOAT_MODE_P (mode))
27027 /* ??? SSE vector cost should be used here. */
27028 *total = cost->fsqrt;
27032 if (XINT (x, 1) == UNSPEC_TP)
27043 static int current_machopic_label_num;
27045 /* Given a symbol name and its associated stub, write out the
27046 definition of the stub. */
27049 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27051 unsigned int length;
27052 char *binder_name, *symbol_name, lazy_ptr_name[32];
27053 int label = ++current_machopic_label_num;
27055 /* For 64-bit we shouldn't get here. */
27056 gcc_assert (!TARGET_64BIT);
27058 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27059 symb = (*targetm.strip_name_encoding) (symb);
27061 length = strlen (stub);
27062 binder_name = XALLOCAVEC (char, length + 32);
27063 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27065 length = strlen (symb);
27066 symbol_name = XALLOCAVEC (char, length + 32);
27067 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27069 sprintf (lazy_ptr_name, "L%d$lz", label);
27072 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27074 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27076 fprintf (file, "%s:\n", stub);
27077 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27081 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27082 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27083 fprintf (file, "\tjmp\t*%%edx\n");
27086 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27088 fprintf (file, "%s:\n", binder_name);
27092 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27093 fputs ("\tpushl\t%eax\n", file);
27096 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27098 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27100 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27101 fprintf (file, "%s:\n", lazy_ptr_name);
27102 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27103 fprintf (file, ASM_LONG "%s\n", binder_name);
27107 darwin_x86_file_end (void)
27109 darwin_file_end ();
27112 #endif /* TARGET_MACHO */
27114 /* Order the registers for register allocator. */
27117 x86_order_regs_for_local_alloc (void)
27122 /* First allocate the local general purpose registers. */
27123 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27124 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27125 reg_alloc_order [pos++] = i;
27127 /* Global general purpose registers. */
27128 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27129 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27130 reg_alloc_order [pos++] = i;
27132 /* x87 registers come first in case we are doing FP math
27134 if (!TARGET_SSE_MATH)
27135 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27136 reg_alloc_order [pos++] = i;
27138 /* SSE registers. */
27139 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27140 reg_alloc_order [pos++] = i;
27141 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27142 reg_alloc_order [pos++] = i;
27144 /* x87 registers. */
27145 if (TARGET_SSE_MATH)
27146 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27147 reg_alloc_order [pos++] = i;
27149 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27150 reg_alloc_order [pos++] = i;
27152 /* Initialize the rest of array as we do not allocate some registers
27154 while (pos < FIRST_PSEUDO_REGISTER)
27155 reg_alloc_order [pos++] = 0;
27158 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27159 struct attribute_spec.handler. */
27161 ix86_handle_abi_attribute (tree *node, tree name,
27162 tree args ATTRIBUTE_UNUSED,
27163 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27165 if (TREE_CODE (*node) != FUNCTION_TYPE
27166 && TREE_CODE (*node) != METHOD_TYPE
27167 && TREE_CODE (*node) != FIELD_DECL
27168 && TREE_CODE (*node) != TYPE_DECL)
27170 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27172 *no_add_attrs = true;
27177 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27179 *no_add_attrs = true;
27183 /* Can combine regparm with all attributes but fastcall. */
27184 if (is_attribute_p ("ms_abi", name))
27186 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27188 error ("ms_abi and sysv_abi attributes are not compatible");
27193 else if (is_attribute_p ("sysv_abi", name))
27195 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27197 error ("ms_abi and sysv_abi attributes are not compatible");
27206 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27207 struct attribute_spec.handler. */
27209 ix86_handle_struct_attribute (tree *node, tree name,
27210 tree args ATTRIBUTE_UNUSED,
27211 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27214 if (DECL_P (*node))
27216 if (TREE_CODE (*node) == TYPE_DECL)
27217 type = &TREE_TYPE (*node);
27222 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27223 || TREE_CODE (*type) == UNION_TYPE)))
27225 warning (OPT_Wattributes, "%qE attribute ignored",
27227 *no_add_attrs = true;
27230 else if ((is_attribute_p ("ms_struct", name)
27231 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27232 || ((is_attribute_p ("gcc_struct", name)
27233 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27235 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27237 *no_add_attrs = true;
27244 ix86_ms_bitfield_layout_p (const_tree record_type)
27246 return (TARGET_MS_BITFIELD_LAYOUT &&
27247 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27248 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27251 /* Returns an expression indicating where the this parameter is
27252 located on entry to the FUNCTION. */
27255 x86_this_parameter (tree function)
27257 tree type = TREE_TYPE (function);
27258 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27263 const int *parm_regs;
27265 if (ix86_function_type_abi (type) == MS_ABI)
27266 parm_regs = x86_64_ms_abi_int_parameter_registers;
27268 parm_regs = x86_64_int_parameter_registers;
27269 return gen_rtx_REG (DImode, parm_regs[aggr]);
27272 nregs = ix86_function_regparm (type, function);
27274 if (nregs > 0 && !stdarg_p (type))
27278 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27279 regno = aggr ? DX_REG : CX_REG;
27287 return gen_rtx_MEM (SImode,
27288 plus_constant (stack_pointer_rtx, 4));
27291 return gen_rtx_REG (SImode, regno);
27294 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27297 /* Determine whether x86_output_mi_thunk can succeed. */
27300 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27301 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27302 HOST_WIDE_INT vcall_offset, const_tree function)
27304 /* 64-bit can handle anything. */
27308 /* For 32-bit, everything's fine if we have one free register. */
27309 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27312 /* Need a free register for vcall_offset. */
27316 /* Need a free register for GOT references. */
27317 if (flag_pic && !(*targetm.binds_local_p) (function))
27320 /* Otherwise ok. */
27324 /* Output the assembler code for a thunk function. THUNK_DECL is the
27325 declaration for the thunk function itself, FUNCTION is the decl for
27326 the target function. DELTA is an immediate constant offset to be
27327 added to THIS. If VCALL_OFFSET is nonzero, the word at
27328 *(*this + vcall_offset) should be added to THIS. */
27331 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
27332 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27333 HOST_WIDE_INT vcall_offset, tree function)
27336 rtx this_param = x86_this_parameter (function);
27339 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27340 pull it in now and let DELTA benefit. */
27341 if (REG_P (this_param))
27342 this_reg = this_param;
27343 else if (vcall_offset)
27345 /* Put the this parameter into %eax. */
27346 xops[0] = this_param;
27347 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27348 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27351 this_reg = NULL_RTX;
27353 /* Adjust the this parameter by a fixed constant. */
27356 xops[0] = GEN_INT (delta);
27357 xops[1] = this_reg ? this_reg : this_param;
27360 if (!x86_64_general_operand (xops[0], DImode))
27362 tmp = gen_rtx_REG (DImode, R10_REG);
27364 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27366 xops[1] = this_param;
27368 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27371 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27374 /* Adjust the this parameter by a value stored in the vtable. */
27378 tmp = gen_rtx_REG (DImode, R10_REG);
27381 int tmp_regno = CX_REG;
27382 if (lookup_attribute ("fastcall",
27383 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27384 tmp_regno = AX_REG;
27385 tmp = gen_rtx_REG (SImode, tmp_regno);
27388 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27390 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27392 /* Adjust the this parameter. */
27393 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27394 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27396 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27397 xops[0] = GEN_INT (vcall_offset);
27399 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27400 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27402 xops[1] = this_reg;
27403 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27406 /* If necessary, drop THIS back to its stack slot. */
27407 if (this_reg && this_reg != this_param)
27409 xops[0] = this_reg;
27410 xops[1] = this_param;
27411 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27414 xops[0] = XEXP (DECL_RTL (function), 0);
27417 if (!flag_pic || (*targetm.binds_local_p) (function))
27418 output_asm_insn ("jmp\t%P0", xops);
27419 /* All thunks should be in the same object as their target,
27420 and thus binds_local_p should be true. */
27421 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27422 gcc_unreachable ();
27425 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27426 tmp = gen_rtx_CONST (Pmode, tmp);
27427 tmp = gen_rtx_MEM (QImode, tmp);
27429 output_asm_insn ("jmp\t%A0", xops);
27434 if (!flag_pic || (*targetm.binds_local_p) (function))
27435 output_asm_insn ("jmp\t%P0", xops);
27440 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27441 tmp = (gen_rtx_SYMBOL_REF
27443 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27444 tmp = gen_rtx_MEM (QImode, tmp);
27446 output_asm_insn ("jmp\t%0", xops);
27449 #endif /* TARGET_MACHO */
27451 tmp = gen_rtx_REG (SImode, CX_REG);
27452 output_set_got (tmp, NULL_RTX);
27455 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27456 output_asm_insn ("jmp\t{*}%1", xops);
27462 x86_file_start (void)
27464 default_file_start ();
27466 darwin_file_start ();
27468 if (X86_FILE_START_VERSION_DIRECTIVE)
27469 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27470 if (X86_FILE_START_FLTUSED)
27471 fputs ("\t.global\t__fltused\n", asm_out_file);
27472 if (ix86_asm_dialect == ASM_INTEL)
27473 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27477 x86_field_alignment (tree field, int computed)
27479 enum machine_mode mode;
27480 tree type = TREE_TYPE (field);
27482 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27484 mode = TYPE_MODE (strip_array_types (type));
27485 if (mode == DFmode || mode == DCmode
27486 || GET_MODE_CLASS (mode) == MODE_INT
27487 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27488 return MIN (32, computed);
27492 /* Output assembler code to FILE to increment profiler label # LABELNO
27493 for profiling a function entry. */
27495 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27499 #ifndef NO_PROFILE_COUNTERS
27500 fprintf (file, "\tleaq\t" LPREFIX "P%d@(%%rip),%%r11\n", labelno);
27503 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27504 fputs ("\tcall\t*" MCOUNT_NAME "@GOTPCREL(%rip)\n", file);
27506 fputs ("\tcall\t" MCOUNT_NAME "\n", file);
27510 #ifndef NO_PROFILE_COUNTERS
27511 fprintf (file, "\tleal\t" LPREFIX "P%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
27514 fputs ("\tcall\t*" MCOUNT_NAME "@GOT(%ebx)\n", file);
27518 #ifndef NO_PROFILE_COUNTERS
27519 fprintf (file, "\tmovl\t$" LPREFIX "P%d,%%" PROFILE_COUNT_REGISTER "\n",
27522 fputs ("\tcall\t" MCOUNT_NAME "\n", file);
27526 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27527 /* We don't have exact information about the insn sizes, but we may assume
27528 quite safely that we are informed about all 1 byte insns and memory
27529 address sizes. This is enough to eliminate unnecessary padding in
27533 min_insn_size (rtx insn)
27537 if (!INSN_P (insn) || !active_insn_p (insn))
27540 /* Discard alignments we've emit and jump instructions. */
27541 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27542 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27544 if (JUMP_TABLE_DATA_P (insn))
27547 /* Important case - calls are always 5 bytes.
27548 It is common to have many calls in the row. */
27550 && symbolic_reference_mentioned_p (PATTERN (insn))
27551 && !SIBLING_CALL_P (insn))
27553 len = get_attr_length (insn);
27557 /* For normal instructions we rely on get_attr_length being exact,
27558 with a few exceptions. */
27559 if (!JUMP_P (insn))
27561 enum attr_type type = get_attr_type (insn);
27566 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27567 || asm_noperands (PATTERN (insn)) >= 0)
27574 /* Otherwise trust get_attr_length. */
27578 l = get_attr_length_address (insn);
27579 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27588 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27592 ix86_avoid_jump_mispredicts (void)
27594 rtx insn, start = get_insns ();
27595 int nbytes = 0, njumps = 0;
27598 /* Look for all minimal intervals of instructions containing 4 jumps.
27599 The intervals are bounded by START and INSN. NBYTES is the total
27600 size of instructions in the interval including INSN and not including
27601 START. When the NBYTES is smaller than 16 bytes, it is possible
27602 that the end of START and INSN ends up in the same 16byte page.
27604 The smallest offset in the page INSN can start is the case where START
27605 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27606 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27608 for (insn = start; insn; insn = NEXT_INSN (insn))
27612 if (LABEL_P (insn))
27614 int align = label_to_alignment (insn);
27615 int max_skip = label_to_max_skip (insn);
27619 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27620 already in the current 16 byte page, because otherwise
27621 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27622 bytes to reach 16 byte boundary. */
27624 || (align <= 3 && max_skip != (1 << align) - 1))
27627 fprintf (dump_file, "Label %i with max_skip %i\n",
27628 INSN_UID (insn), max_skip);
27631 while (nbytes + max_skip >= 16)
27633 start = NEXT_INSN (start);
27634 if ((JUMP_P (start)
27635 && GET_CODE (PATTERN (start)) != ADDR_VEC
27636 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27638 njumps--, isjump = 1;
27641 nbytes -= min_insn_size (start);
27647 min_size = min_insn_size (insn);
27648 nbytes += min_size;
27650 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27651 INSN_UID (insn), min_size);
27653 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27654 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27662 start = NEXT_INSN (start);
27663 if ((JUMP_P (start)
27664 && GET_CODE (PATTERN (start)) != ADDR_VEC
27665 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27667 njumps--, isjump = 1;
27670 nbytes -= min_insn_size (start);
27672 gcc_assert (njumps >= 0);
27674 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27675 INSN_UID (start), INSN_UID (insn), nbytes);
27677 if (njumps == 3 && isjump && nbytes < 16)
27679 int padsize = 15 - nbytes + min_insn_size (insn);
27682 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27683 INSN_UID (insn), padsize);
27684 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27690 /* AMD Athlon works faster
27691 when RET is not destination of conditional jump or directly preceded
27692 by other jump instruction. We avoid the penalty by inserting NOP just
27693 before the RET instructions in such cases. */
27695 ix86_pad_returns (void)
27700 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27702 basic_block bb = e->src;
27703 rtx ret = BB_END (bb);
27705 bool replace = false;
27707 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27708 || optimize_bb_for_size_p (bb))
27710 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27711 if (active_insn_p (prev) || LABEL_P (prev))
27713 if (prev && LABEL_P (prev))
27718 FOR_EACH_EDGE (e, ei, bb->preds)
27719 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27720 && !(e->flags & EDGE_FALLTHRU))
27725 prev = prev_active_insn (ret);
27727 && ((JUMP_P (prev) && any_condjump_p (prev))
27730 /* Empty functions get branch mispredict even when the jump destination
27731 is not visible to us. */
27732 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27737 emit_jump_insn_before (gen_return_internal_long (), ret);
27743 /* Implement machine specific optimizations. We implement padding of returns
27744 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27748 if (optimize && optimize_function_for_speed_p (cfun))
27750 if (TARGET_PAD_RETURNS)
27751 ix86_pad_returns ();
27752 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27753 if (TARGET_FOUR_JUMP_LIMIT)
27754 ix86_avoid_jump_mispredicts ();
27759 /* Return nonzero when QImode register that must be represented via REX prefix
27762 x86_extended_QIreg_mentioned_p (rtx insn)
27765 extract_insn_cached (insn);
27766 for (i = 0; i < recog_data.n_operands; i++)
27767 if (REG_P (recog_data.operand[i])
27768 && REGNO (recog_data.operand[i]) > BX_REG)
27773 /* Return nonzero when P points to register encoded via REX prefix.
27774 Called via for_each_rtx. */
27776 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27778 unsigned int regno;
27781 regno = REGNO (*p);
27782 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27785 /* Return true when INSN mentions register that must be encoded using REX
27788 x86_extended_reg_mentioned_p (rtx insn)
27790 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27791 extended_reg_mentioned_1, NULL);
27794 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27795 optabs would emit if we didn't have TFmode patterns. */
27798 x86_emit_floatuns (rtx operands[2])
27800 rtx neglab, donelab, i0, i1, f0, in, out;
27801 enum machine_mode mode, inmode;
27803 inmode = GET_MODE (operands[1]);
27804 gcc_assert (inmode == SImode || inmode == DImode);
27807 in = force_reg (inmode, operands[1]);
27808 mode = GET_MODE (out);
27809 neglab = gen_label_rtx ();
27810 donelab = gen_label_rtx ();
27811 f0 = gen_reg_rtx (mode);
27813 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27815 expand_float (out, in, 0);
27817 emit_jump_insn (gen_jump (donelab));
27820 emit_label (neglab);
27822 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27824 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27826 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27828 expand_float (f0, i0, 0);
27830 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27832 emit_label (donelab);
27835 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27836 with all elements equal to VAR. Return true if successful. */
27839 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27840 rtx target, rtx val)
27842 enum machine_mode hmode, smode, wsmode, wvmode;
27857 val = force_reg (GET_MODE_INNER (mode), val);
27858 x = gen_rtx_VEC_DUPLICATE (mode, val);
27859 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27865 if (TARGET_SSE || TARGET_3DNOW_A)
27867 val = gen_lowpart (SImode, val);
27868 x = gen_rtx_TRUNCATE (HImode, val);
27869 x = gen_rtx_VEC_DUPLICATE (mode, x);
27870 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27892 /* Extend HImode to SImode using a paradoxical SUBREG. */
27893 tmp1 = gen_reg_rtx (SImode);
27894 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27895 /* Insert the SImode value as low element of V4SImode vector. */
27896 tmp2 = gen_reg_rtx (V4SImode);
27897 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27898 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27899 CONST0_RTX (V4SImode),
27901 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27902 /* Cast the V4SImode vector back to a V8HImode vector. */
27903 tmp1 = gen_reg_rtx (V8HImode);
27904 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27905 /* Duplicate the low short through the whole low SImode word. */
27906 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27907 /* Cast the V8HImode vector back to a V4SImode vector. */
27908 tmp2 = gen_reg_rtx (V4SImode);
27909 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27910 /* Replicate the low element of the V4SImode vector. */
27911 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27912 /* Cast the V2SImode back to V8HImode, and store in target. */
27913 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27924 /* Extend QImode to SImode using a paradoxical SUBREG. */
27925 tmp1 = gen_reg_rtx (SImode);
27926 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27927 /* Insert the SImode value as low element of V4SImode vector. */
27928 tmp2 = gen_reg_rtx (V4SImode);
27929 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27930 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27931 CONST0_RTX (V4SImode),
27933 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27934 /* Cast the V4SImode vector back to a V16QImode vector. */
27935 tmp1 = gen_reg_rtx (V16QImode);
27936 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27937 /* Duplicate the low byte through the whole low SImode word. */
27938 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27939 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27940 /* Cast the V16QImode vector back to a V4SImode vector. */
27941 tmp2 = gen_reg_rtx (V4SImode);
27942 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27943 /* Replicate the low element of the V4SImode vector. */
27944 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27945 /* Cast the V2SImode back to V16QImode, and store in target. */
27946 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27954 /* Replicate the value once into the next wider mode and recurse. */
27955 val = convert_modes (wsmode, smode, val, true);
27956 x = expand_simple_binop (wsmode, ASHIFT, val,
27957 GEN_INT (GET_MODE_BITSIZE (smode)),
27958 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27959 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27961 x = gen_reg_rtx (wvmode);
27962 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27963 gcc_unreachable ();
27964 emit_move_insn (target, gen_lowpart (mode, x));
27987 rtx tmp = gen_reg_rtx (hmode);
27988 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27989 emit_insn (gen_rtx_SET (VOIDmode, target,
27990 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27999 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28000 whose ONE_VAR element is VAR, and other elements are zero. Return true
28004 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28005 rtx target, rtx var, int one_var)
28007 enum machine_mode vsimode;
28010 bool use_vector_set = false;
28015 /* For SSE4.1, we normally use vector set. But if the second
28016 element is zero and inter-unit moves are OK, we use movq
28018 use_vector_set = (TARGET_64BIT
28020 && !(TARGET_INTER_UNIT_MOVES
28026 use_vector_set = TARGET_SSE4_1;
28029 use_vector_set = TARGET_SSE2;
28032 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28039 use_vector_set = TARGET_AVX;
28042 /* Use ix86_expand_vector_set in 64bit mode only. */
28043 use_vector_set = TARGET_AVX && TARGET_64BIT;
28049 if (use_vector_set)
28051 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28052 var = force_reg (GET_MODE_INNER (mode), var);
28053 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28069 var = force_reg (GET_MODE_INNER (mode), var);
28070 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28071 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28076 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28077 new_target = gen_reg_rtx (mode);
28079 new_target = target;
28080 var = force_reg (GET_MODE_INNER (mode), var);
28081 x = gen_rtx_VEC_DUPLICATE (mode, var);
28082 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28083 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28086 /* We need to shuffle the value to the correct position, so
28087 create a new pseudo to store the intermediate result. */
28089 /* With SSE2, we can use the integer shuffle insns. */
28090 if (mode != V4SFmode && TARGET_SSE2)
28092 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28094 GEN_INT (one_var == 1 ? 0 : 1),
28095 GEN_INT (one_var == 2 ? 0 : 1),
28096 GEN_INT (one_var == 3 ? 0 : 1)));
28097 if (target != new_target)
28098 emit_move_insn (target, new_target);
28102 /* Otherwise convert the intermediate result to V4SFmode and
28103 use the SSE1 shuffle instructions. */
28104 if (mode != V4SFmode)
28106 tmp = gen_reg_rtx (V4SFmode);
28107 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28112 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28114 GEN_INT (one_var == 1 ? 0 : 1),
28115 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28116 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28118 if (mode != V4SFmode)
28119 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28120 else if (tmp != target)
28121 emit_move_insn (target, tmp);
28123 else if (target != new_target)
28124 emit_move_insn (target, new_target);
28129 vsimode = V4SImode;
28135 vsimode = V2SImode;
28141 /* Zero extend the variable element to SImode and recurse. */
28142 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28144 x = gen_reg_rtx (vsimode);
28145 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28147 gcc_unreachable ();
28149 emit_move_insn (target, gen_lowpart (mode, x));
28157 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28158 consisting of the values in VALS. It is known that all elements
28159 except ONE_VAR are constants. Return true if successful. */
28162 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28163 rtx target, rtx vals, int one_var)
28165 rtx var = XVECEXP (vals, 0, one_var);
28166 enum machine_mode wmode;
28169 const_vec = copy_rtx (vals);
28170 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28171 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28179 /* For the two element vectors, it's just as easy to use
28180 the general case. */
28184 /* Use ix86_expand_vector_set in 64bit mode only. */
28207 /* There's no way to set one QImode entry easily. Combine
28208 the variable value with its adjacent constant value, and
28209 promote to an HImode set. */
28210 x = XVECEXP (vals, 0, one_var ^ 1);
28213 var = convert_modes (HImode, QImode, var, true);
28214 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28215 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28216 x = GEN_INT (INTVAL (x) & 0xff);
28220 var = convert_modes (HImode, QImode, var, true);
28221 x = gen_int_mode (INTVAL (x) << 8, HImode);
28223 if (x != const0_rtx)
28224 var = expand_simple_binop (HImode, IOR, var, x, var,
28225 1, OPTAB_LIB_WIDEN);
28227 x = gen_reg_rtx (wmode);
28228 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28229 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28231 emit_move_insn (target, gen_lowpart (mode, x));
28238 emit_move_insn (target, const_vec);
28239 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28243 /* A subroutine of ix86_expand_vector_init_general. Use vector
28244 concatenate to handle the most general case: all values variable,
28245 and none identical. */
28248 ix86_expand_vector_init_concat (enum machine_mode mode,
28249 rtx target, rtx *ops, int n)
28251 enum machine_mode cmode, hmode = VOIDmode;
28252 rtx first[8], second[4];
28292 gcc_unreachable ();
28295 if (!register_operand (ops[1], cmode))
28296 ops[1] = force_reg (cmode, ops[1]);
28297 if (!register_operand (ops[0], cmode))
28298 ops[0] = force_reg (cmode, ops[0]);
28299 emit_insn (gen_rtx_SET (VOIDmode, target,
28300 gen_rtx_VEC_CONCAT (mode, ops[0],
28320 gcc_unreachable ();
28336 gcc_unreachable ();
28341 /* FIXME: We process inputs backward to help RA. PR 36222. */
28344 for (; i > 0; i -= 2, j--)
28346 first[j] = gen_reg_rtx (cmode);
28347 v = gen_rtvec (2, ops[i - 1], ops[i]);
28348 ix86_expand_vector_init (false, first[j],
28349 gen_rtx_PARALLEL (cmode, v));
28355 gcc_assert (hmode != VOIDmode);
28356 for (i = j = 0; i < n; i += 2, j++)
28358 second[j] = gen_reg_rtx (hmode);
28359 ix86_expand_vector_init_concat (hmode, second [j],
28363 ix86_expand_vector_init_concat (mode, target, second, n);
28366 ix86_expand_vector_init_concat (mode, target, first, n);
28370 gcc_unreachable ();
28374 /* A subroutine of ix86_expand_vector_init_general. Use vector
28375 interleave to handle the most general case: all values variable,
28376 and none identical. */
28379 ix86_expand_vector_init_interleave (enum machine_mode mode,
28380 rtx target, rtx *ops, int n)
28382 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28385 rtx (*gen_load_even) (rtx, rtx, rtx);
28386 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28387 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28392 gen_load_even = gen_vec_setv8hi;
28393 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28394 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28395 inner_mode = HImode;
28396 first_imode = V4SImode;
28397 second_imode = V2DImode;
28398 third_imode = VOIDmode;
28401 gen_load_even = gen_vec_setv16qi;
28402 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28403 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28404 inner_mode = QImode;
28405 first_imode = V8HImode;
28406 second_imode = V4SImode;
28407 third_imode = V2DImode;
28410 gcc_unreachable ();
28413 for (i = 0; i < n; i++)
28415 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28416 op0 = gen_reg_rtx (SImode);
28417 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28419 /* Insert the SImode value as low element of V4SImode vector. */
28420 op1 = gen_reg_rtx (V4SImode);
28421 op0 = gen_rtx_VEC_MERGE (V4SImode,
28422 gen_rtx_VEC_DUPLICATE (V4SImode,
28424 CONST0_RTX (V4SImode),
28426 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28428 /* Cast the V4SImode vector back to a vector in orignal mode. */
28429 op0 = gen_reg_rtx (mode);
28430 emit_move_insn (op0, gen_lowpart (mode, op1));
28432 /* Load even elements into the second positon. */
28433 emit_insn ((*gen_load_even) (op0,
28434 force_reg (inner_mode,
28438 /* Cast vector to FIRST_IMODE vector. */
28439 ops[i] = gen_reg_rtx (first_imode);
28440 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28443 /* Interleave low FIRST_IMODE vectors. */
28444 for (i = j = 0; i < n; i += 2, j++)
28446 op0 = gen_reg_rtx (first_imode);
28447 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
28449 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28450 ops[j] = gen_reg_rtx (second_imode);
28451 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28454 /* Interleave low SECOND_IMODE vectors. */
28455 switch (second_imode)
28458 for (i = j = 0; i < n / 2; i += 2, j++)
28460 op0 = gen_reg_rtx (second_imode);
28461 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
28464 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28466 ops[j] = gen_reg_rtx (third_imode);
28467 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28469 second_imode = V2DImode;
28470 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28474 op0 = gen_reg_rtx (second_imode);
28475 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
28478 /* Cast the SECOND_IMODE vector back to a vector on original
28480 emit_insn (gen_rtx_SET (VOIDmode, target,
28481 gen_lowpart (mode, op0)));
28485 gcc_unreachable ();
28489 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28490 all values variable, and none identical. */
28493 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28494 rtx target, rtx vals)
28496 rtx ops[32], op0, op1;
28497 enum machine_mode half_mode = VOIDmode;
28504 if (!mmx_ok && !TARGET_SSE)
28516 n = GET_MODE_NUNITS (mode);
28517 for (i = 0; i < n; i++)
28518 ops[i] = XVECEXP (vals, 0, i);
28519 ix86_expand_vector_init_concat (mode, target, ops, n);
28523 half_mode = V16QImode;
28527 half_mode = V8HImode;
28531 n = GET_MODE_NUNITS (mode);
28532 for (i = 0; i < n; i++)
28533 ops[i] = XVECEXP (vals, 0, i);
28534 op0 = gen_reg_rtx (half_mode);
28535 op1 = gen_reg_rtx (half_mode);
28536 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28538 ix86_expand_vector_init_interleave (half_mode, op1,
28539 &ops [n >> 1], n >> 2);
28540 emit_insn (gen_rtx_SET (VOIDmode, target,
28541 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28545 if (!TARGET_SSE4_1)
28553 /* Don't use ix86_expand_vector_init_interleave if we can't
28554 move from GPR to SSE register directly. */
28555 if (!TARGET_INTER_UNIT_MOVES)
28558 n = GET_MODE_NUNITS (mode);
28559 for (i = 0; i < n; i++)
28560 ops[i] = XVECEXP (vals, 0, i);
28561 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28569 gcc_unreachable ();
28573 int i, j, n_elts, n_words, n_elt_per_word;
28574 enum machine_mode inner_mode;
28575 rtx words[4], shift;
28577 inner_mode = GET_MODE_INNER (mode);
28578 n_elts = GET_MODE_NUNITS (mode);
28579 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28580 n_elt_per_word = n_elts / n_words;
28581 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28583 for (i = 0; i < n_words; ++i)
28585 rtx word = NULL_RTX;
28587 for (j = 0; j < n_elt_per_word; ++j)
28589 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28590 elt = convert_modes (word_mode, inner_mode, elt, true);
28596 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28597 word, 1, OPTAB_LIB_WIDEN);
28598 word = expand_simple_binop (word_mode, IOR, word, elt,
28599 word, 1, OPTAB_LIB_WIDEN);
28607 emit_move_insn (target, gen_lowpart (mode, words[0]));
28608 else if (n_words == 2)
28610 rtx tmp = gen_reg_rtx (mode);
28611 emit_clobber (tmp);
28612 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28613 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28614 emit_move_insn (target, tmp);
28616 else if (n_words == 4)
28618 rtx tmp = gen_reg_rtx (V4SImode);
28619 gcc_assert (word_mode == SImode);
28620 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28621 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28622 emit_move_insn (target, gen_lowpart (mode, tmp));
28625 gcc_unreachable ();
28629 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28630 instructions unless MMX_OK is true. */
28633 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28635 enum machine_mode mode = GET_MODE (target);
28636 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28637 int n_elts = GET_MODE_NUNITS (mode);
28638 int n_var = 0, one_var = -1;
28639 bool all_same = true, all_const_zero = true;
28643 for (i = 0; i < n_elts; ++i)
28645 x = XVECEXP (vals, 0, i);
28646 if (!(CONST_INT_P (x)
28647 || GET_CODE (x) == CONST_DOUBLE
28648 || GET_CODE (x) == CONST_FIXED))
28649 n_var++, one_var = i;
28650 else if (x != CONST0_RTX (inner_mode))
28651 all_const_zero = false;
28652 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28656 /* Constants are best loaded from the constant pool. */
28659 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28663 /* If all values are identical, broadcast the value. */
28665 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28666 XVECEXP (vals, 0, 0)))
28669 /* Values where only one field is non-constant are best loaded from
28670 the pool and overwritten via move later. */
28674 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28675 XVECEXP (vals, 0, one_var),
28679 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28683 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28687 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28689 enum machine_mode mode = GET_MODE (target);
28690 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28691 enum machine_mode half_mode;
28692 bool use_vec_merge = false;
28694 static rtx (*gen_extract[6][2]) (rtx, rtx)
28696 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28697 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28698 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28699 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28700 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28701 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28703 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28705 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28706 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28707 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28708 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28709 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28710 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28720 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28721 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28723 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28725 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28726 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28732 use_vec_merge = TARGET_SSE4_1;
28740 /* For the two element vectors, we implement a VEC_CONCAT with
28741 the extraction of the other element. */
28743 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28744 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28747 op0 = val, op1 = tmp;
28749 op0 = tmp, op1 = val;
28751 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28752 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28757 use_vec_merge = TARGET_SSE4_1;
28764 use_vec_merge = true;
28768 /* tmp = target = A B C D */
28769 tmp = copy_to_reg (target);
28770 /* target = A A B B */
28771 emit_insn (gen_sse_unpcklps (target, target, target));
28772 /* target = X A B B */
28773 ix86_expand_vector_set (false, target, val, 0);
28774 /* target = A X C D */
28775 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28776 const1_rtx, const0_rtx,
28777 GEN_INT (2+4), GEN_INT (3+4)));
28781 /* tmp = target = A B C D */
28782 tmp = copy_to_reg (target);
28783 /* tmp = X B C D */
28784 ix86_expand_vector_set (false, tmp, val, 0);
28785 /* target = A B X D */
28786 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28787 const0_rtx, const1_rtx,
28788 GEN_INT (0+4), GEN_INT (3+4)));
28792 /* tmp = target = A B C D */
28793 tmp = copy_to_reg (target);
28794 /* tmp = X B C D */
28795 ix86_expand_vector_set (false, tmp, val, 0);
28796 /* target = A B X D */
28797 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28798 const0_rtx, const1_rtx,
28799 GEN_INT (2+4), GEN_INT (0+4)));
28803 gcc_unreachable ();
28808 use_vec_merge = TARGET_SSE4_1;
28812 /* Element 0 handled by vec_merge below. */
28815 use_vec_merge = true;
28821 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28822 store into element 0, then shuffle them back. */
28826 order[0] = GEN_INT (elt);
28827 order[1] = const1_rtx;
28828 order[2] = const2_rtx;
28829 order[3] = GEN_INT (3);
28830 order[elt] = const0_rtx;
28832 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28833 order[1], order[2], order[3]));
28835 ix86_expand_vector_set (false, target, val, 0);
28837 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28838 order[1], order[2], order[3]));
28842 /* For SSE1, we have to reuse the V4SF code. */
28843 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28844 gen_lowpart (SFmode, val), elt);
28849 use_vec_merge = TARGET_SSE2;
28852 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28856 use_vec_merge = TARGET_SSE4_1;
28863 half_mode = V16QImode;
28869 half_mode = V8HImode;
28875 half_mode = V4SImode;
28881 half_mode = V2DImode;
28887 half_mode = V4SFmode;
28893 half_mode = V2DFmode;
28899 /* Compute offset. */
28903 gcc_assert (i <= 1);
28905 /* Extract the half. */
28906 tmp = gen_reg_rtx (half_mode);
28907 emit_insn ((*gen_extract[j][i]) (tmp, target));
28909 /* Put val in tmp at elt. */
28910 ix86_expand_vector_set (false, tmp, val, elt);
28913 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28922 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28923 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28924 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28928 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28930 emit_move_insn (mem, target);
28932 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28933 emit_move_insn (tmp, val);
28935 emit_move_insn (target, mem);
28940 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28942 enum machine_mode mode = GET_MODE (vec);
28943 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28944 bool use_vec_extr = false;
28957 use_vec_extr = true;
28961 use_vec_extr = TARGET_SSE4_1;
28973 tmp = gen_reg_rtx (mode);
28974 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28975 GEN_INT (elt), GEN_INT (elt),
28976 GEN_INT (elt+4), GEN_INT (elt+4)));
28980 tmp = gen_reg_rtx (mode);
28981 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28985 gcc_unreachable ();
28988 use_vec_extr = true;
28993 use_vec_extr = TARGET_SSE4_1;
29007 tmp = gen_reg_rtx (mode);
29008 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29009 GEN_INT (elt), GEN_INT (elt),
29010 GEN_INT (elt), GEN_INT (elt)));
29014 tmp = gen_reg_rtx (mode);
29015 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
29019 gcc_unreachable ();
29022 use_vec_extr = true;
29027 /* For SSE1, we have to reuse the V4SF code. */
29028 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29029 gen_lowpart (V4SFmode, vec), elt);
29035 use_vec_extr = TARGET_SSE2;
29038 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29042 use_vec_extr = TARGET_SSE4_1;
29046 /* ??? Could extract the appropriate HImode element and shift. */
29053 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29054 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29056 /* Let the rtl optimizers know about the zero extension performed. */
29057 if (inner_mode == QImode || inner_mode == HImode)
29059 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29060 target = gen_lowpart (SImode, target);
29063 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29067 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29069 emit_move_insn (mem, vec);
29071 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29072 emit_move_insn (target, tmp);
29076 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29077 pattern to reduce; DEST is the destination; IN is the input vector. */
29080 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29082 rtx tmp1, tmp2, tmp3;
29084 tmp1 = gen_reg_rtx (V4SFmode);
29085 tmp2 = gen_reg_rtx (V4SFmode);
29086 tmp3 = gen_reg_rtx (V4SFmode);
29088 emit_insn (gen_sse_movhlps (tmp1, in, in));
29089 emit_insn (fn (tmp2, tmp1, in));
29091 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29092 const1_rtx, const1_rtx,
29093 GEN_INT (1+4), GEN_INT (1+4)));
29094 emit_insn (fn (dest, tmp2, tmp3));
29097 /* Target hook for scalar_mode_supported_p. */
29099 ix86_scalar_mode_supported_p (enum machine_mode mode)
29101 if (DECIMAL_FLOAT_MODE_P (mode))
29103 else if (mode == TFmode)
29106 return default_scalar_mode_supported_p (mode);
29109 /* Implements target hook vector_mode_supported_p. */
29111 ix86_vector_mode_supported_p (enum machine_mode mode)
29113 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29115 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29117 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29119 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29121 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29126 /* Target hook for c_mode_for_suffix. */
29127 static enum machine_mode
29128 ix86_c_mode_for_suffix (char suffix)
29138 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29140 We do this in the new i386 backend to maintain source compatibility
29141 with the old cc0-based compiler. */
29144 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29145 tree inputs ATTRIBUTE_UNUSED,
29148 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29150 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29155 /* Implements target vector targetm.asm.encode_section_info. This
29156 is not used by netware. */
29158 static void ATTRIBUTE_UNUSED
29159 ix86_encode_section_info (tree decl, rtx rtl, int first)
29161 default_encode_section_info (decl, rtl, first);
29163 if (TREE_CODE (decl) == VAR_DECL
29164 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29165 && ix86_in_large_data_p (decl))
29166 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29169 /* Worker function for REVERSE_CONDITION. */
29172 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29174 return (mode != CCFPmode && mode != CCFPUmode
29175 ? reverse_condition (code)
29176 : reverse_condition_maybe_unordered (code));
29179 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29183 output_387_reg_move (rtx insn, rtx *operands)
29185 if (REG_P (operands[0]))
29187 if (REG_P (operands[1])
29188 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29190 if (REGNO (operands[0]) == FIRST_STACK_REG)
29191 return output_387_ffreep (operands, 0);
29192 return "fstp\t%y0";
29194 if (STACK_TOP_P (operands[0]))
29195 return "fld%Z1\t%y1";
29198 else if (MEM_P (operands[0]))
29200 gcc_assert (REG_P (operands[1]));
29201 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29202 return "fstp%Z0\t%y0";
29205 /* There is no non-popping store to memory for XFmode.
29206 So if we need one, follow the store with a load. */
29207 if (GET_MODE (operands[0]) == XFmode)
29208 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29210 return "fst%Z0\t%y0";
29217 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29218 FP status register is set. */
29221 ix86_emit_fp_unordered_jump (rtx label)
29223 rtx reg = gen_reg_rtx (HImode);
29226 emit_insn (gen_x86_fnstsw_1 (reg));
29228 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29230 emit_insn (gen_x86_sahf_1 (reg));
29232 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29233 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29237 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29239 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29240 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29243 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29244 gen_rtx_LABEL_REF (VOIDmode, label),
29246 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29248 emit_jump_insn (temp);
29249 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29252 /* Output code to perform a log1p XFmode calculation. */
29254 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29256 rtx label1 = gen_label_rtx ();
29257 rtx label2 = gen_label_rtx ();
29259 rtx tmp = gen_reg_rtx (XFmode);
29260 rtx tmp2 = gen_reg_rtx (XFmode);
29263 emit_insn (gen_absxf2 (tmp, op1));
29264 test = gen_rtx_GE (VOIDmode, tmp,
29265 CONST_DOUBLE_FROM_REAL_VALUE (
29266 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29268 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29270 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29271 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29272 emit_jump (label2);
29274 emit_label (label1);
29275 emit_move_insn (tmp, CONST1_RTX (XFmode));
29276 emit_insn (gen_addxf3 (tmp, op1, tmp));
29277 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29278 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29280 emit_label (label2);
29283 /* Output code to perform a Newton-Rhapson approximation of a single precision
29284 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29286 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29288 rtx x0, x1, e0, e1, two;
29290 x0 = gen_reg_rtx (mode);
29291 e0 = gen_reg_rtx (mode);
29292 e1 = gen_reg_rtx (mode);
29293 x1 = gen_reg_rtx (mode);
29295 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29297 if (VECTOR_MODE_P (mode))
29298 two = ix86_build_const_vector (SFmode, true, two);
29300 two = force_reg (mode, two);
29302 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29304 /* x0 = rcp(b) estimate */
29305 emit_insn (gen_rtx_SET (VOIDmode, x0,
29306 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29309 emit_insn (gen_rtx_SET (VOIDmode, e0,
29310 gen_rtx_MULT (mode, x0, b)));
29312 emit_insn (gen_rtx_SET (VOIDmode, e1,
29313 gen_rtx_MINUS (mode, two, e0)));
29315 emit_insn (gen_rtx_SET (VOIDmode, x1,
29316 gen_rtx_MULT (mode, x0, e1)));
29318 emit_insn (gen_rtx_SET (VOIDmode, res,
29319 gen_rtx_MULT (mode, a, x1)));
29322 /* Output code to perform a Newton-Rhapson approximation of a
29323 single precision floating point [reciprocal] square root. */
29325 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29328 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29331 x0 = gen_reg_rtx (mode);
29332 e0 = gen_reg_rtx (mode);
29333 e1 = gen_reg_rtx (mode);
29334 e2 = gen_reg_rtx (mode);
29335 e3 = gen_reg_rtx (mode);
29337 real_from_integer (&r, VOIDmode, -3, -1, 0);
29338 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29340 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29341 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29343 if (VECTOR_MODE_P (mode))
29345 mthree = ix86_build_const_vector (SFmode, true, mthree);
29346 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29349 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29350 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29352 /* x0 = rsqrt(a) estimate */
29353 emit_insn (gen_rtx_SET (VOIDmode, x0,
29354 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29357 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29362 zero = gen_reg_rtx (mode);
29363 mask = gen_reg_rtx (mode);
29365 zero = force_reg (mode, CONST0_RTX(mode));
29366 emit_insn (gen_rtx_SET (VOIDmode, mask,
29367 gen_rtx_NE (mode, zero, a)));
29369 emit_insn (gen_rtx_SET (VOIDmode, x0,
29370 gen_rtx_AND (mode, x0, mask)));
29374 emit_insn (gen_rtx_SET (VOIDmode, e0,
29375 gen_rtx_MULT (mode, x0, a)));
29377 emit_insn (gen_rtx_SET (VOIDmode, e1,
29378 gen_rtx_MULT (mode, e0, x0)));
29381 mthree = force_reg (mode, mthree);
29382 emit_insn (gen_rtx_SET (VOIDmode, e2,
29383 gen_rtx_PLUS (mode, e1, mthree)));
29385 mhalf = force_reg (mode, mhalf);
29387 /* e3 = -.5 * x0 */
29388 emit_insn (gen_rtx_SET (VOIDmode, e3,
29389 gen_rtx_MULT (mode, x0, mhalf)));
29391 /* e3 = -.5 * e0 */
29392 emit_insn (gen_rtx_SET (VOIDmode, e3,
29393 gen_rtx_MULT (mode, e0, mhalf)));
29394 /* ret = e2 * e3 */
29395 emit_insn (gen_rtx_SET (VOIDmode, res,
29396 gen_rtx_MULT (mode, e2, e3)));
29399 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29401 static void ATTRIBUTE_UNUSED
29402 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29405 /* With Binutils 2.15, the "@unwind" marker must be specified on
29406 every occurrence of the ".eh_frame" section, not just the first
29409 && strcmp (name, ".eh_frame") == 0)
29411 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29412 flags & SECTION_WRITE ? "aw" : "a");
29415 default_elf_asm_named_section (name, flags, decl);
29418 /* Return the mangling of TYPE if it is an extended fundamental type. */
29420 static const char *
29421 ix86_mangle_type (const_tree type)
29423 type = TYPE_MAIN_VARIANT (type);
29425 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29426 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29429 switch (TYPE_MODE (type))
29432 /* __float128 is "g". */
29435 /* "long double" or __float80 is "e". */
29442 /* For 32-bit code we can save PIC register setup by using
29443 __stack_chk_fail_local hidden function instead of calling
29444 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29445 register, so it is better to call __stack_chk_fail directly. */
29448 ix86_stack_protect_fail (void)
29450 return TARGET_64BIT
29451 ? default_external_stack_protect_fail ()
29452 : default_hidden_stack_protect_fail ();
29455 /* Select a format to encode pointers in exception handling data. CODE
29456 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29457 true if the symbol may be affected by dynamic relocations.
29459 ??? All x86 object file formats are capable of representing this.
29460 After all, the relocation needed is the same as for the call insn.
29461 Whether or not a particular assembler allows us to enter such, I
29462 guess we'll have to see. */
29464 asm_preferred_eh_data_format (int code, int global)
29468 int type = DW_EH_PE_sdata8;
29470 || ix86_cmodel == CM_SMALL_PIC
29471 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29472 type = DW_EH_PE_sdata4;
29473 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29475 if (ix86_cmodel == CM_SMALL
29476 || (ix86_cmodel == CM_MEDIUM && code))
29477 return DW_EH_PE_udata4;
29478 return DW_EH_PE_absptr;
29481 /* Expand copysign from SIGN to the positive value ABS_VALUE
29482 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29485 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29487 enum machine_mode mode = GET_MODE (sign);
29488 rtx sgn = gen_reg_rtx (mode);
29489 if (mask == NULL_RTX)
29491 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29492 if (!VECTOR_MODE_P (mode))
29494 /* We need to generate a scalar mode mask in this case. */
29495 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29496 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29497 mask = gen_reg_rtx (mode);
29498 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29502 mask = gen_rtx_NOT (mode, mask);
29503 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29504 gen_rtx_AND (mode, mask, sign)));
29505 emit_insn (gen_rtx_SET (VOIDmode, result,
29506 gen_rtx_IOR (mode, abs_value, sgn)));
29509 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29510 mask for masking out the sign-bit is stored in *SMASK, if that is
29513 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29515 enum machine_mode mode = GET_MODE (op0);
29518 xa = gen_reg_rtx (mode);
29519 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29520 if (!VECTOR_MODE_P (mode))
29522 /* We need to generate a scalar mode mask in this case. */
29523 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29524 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29525 mask = gen_reg_rtx (mode);
29526 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29528 emit_insn (gen_rtx_SET (VOIDmode, xa,
29529 gen_rtx_AND (mode, op0, mask)));
29537 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29538 swapping the operands if SWAP_OPERANDS is true. The expanded
29539 code is a forward jump to a newly created label in case the
29540 comparison is true. The generated label rtx is returned. */
29542 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29543 bool swap_operands)
29554 label = gen_label_rtx ();
29555 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29556 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29557 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29558 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29559 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29560 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29561 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29562 JUMP_LABEL (tmp) = label;
29567 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29568 using comparison code CODE. Operands are swapped for the comparison if
29569 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29571 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29572 bool swap_operands)
29574 enum machine_mode mode = GET_MODE (op0);
29575 rtx mask = gen_reg_rtx (mode);
29584 if (mode == DFmode)
29585 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29586 gen_rtx_fmt_ee (code, mode, op0, op1)));
29588 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29589 gen_rtx_fmt_ee (code, mode, op0, op1)));
29594 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29595 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29597 ix86_gen_TWO52 (enum machine_mode mode)
29599 REAL_VALUE_TYPE TWO52r;
29602 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29603 TWO52 = const_double_from_real_value (TWO52r, mode);
29604 TWO52 = force_reg (mode, TWO52);
29609 /* Expand SSE sequence for computing lround from OP1 storing
29612 ix86_expand_lround (rtx op0, rtx op1)
29614 /* C code for the stuff we're doing below:
29615 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29618 enum machine_mode mode = GET_MODE (op1);
29619 const struct real_format *fmt;
29620 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29623 /* load nextafter (0.5, 0.0) */
29624 fmt = REAL_MODE_FORMAT (mode);
29625 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29626 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29628 /* adj = copysign (0.5, op1) */
29629 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29630 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29632 /* adj = op1 + adj */
29633 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29635 /* op0 = (imode)adj */
29636 expand_fix (op0, adj, 0);
29639 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29642 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29644 /* C code for the stuff we're doing below (for do_floor):
29646 xi -= (double)xi > op1 ? 1 : 0;
29649 enum machine_mode fmode = GET_MODE (op1);
29650 enum machine_mode imode = GET_MODE (op0);
29651 rtx ireg, freg, label, tmp;
29653 /* reg = (long)op1 */
29654 ireg = gen_reg_rtx (imode);
29655 expand_fix (ireg, op1, 0);
29657 /* freg = (double)reg */
29658 freg = gen_reg_rtx (fmode);
29659 expand_float (freg, ireg, 0);
29661 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29662 label = ix86_expand_sse_compare_and_jump (UNLE,
29663 freg, op1, !do_floor);
29664 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29665 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29666 emit_move_insn (ireg, tmp);
29668 emit_label (label);
29669 LABEL_NUSES (label) = 1;
29671 emit_move_insn (op0, ireg);
29674 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29675 result in OPERAND0. */
29677 ix86_expand_rint (rtx operand0, rtx operand1)
29679 /* C code for the stuff we're doing below:
29680 xa = fabs (operand1);
29681 if (!isless (xa, 2**52))
29683 xa = xa + 2**52 - 2**52;
29684 return copysign (xa, operand1);
29686 enum machine_mode mode = GET_MODE (operand0);
29687 rtx res, xa, label, TWO52, mask;
29689 res = gen_reg_rtx (mode);
29690 emit_move_insn (res, operand1);
29692 /* xa = abs (operand1) */
29693 xa = ix86_expand_sse_fabs (res, &mask);
29695 /* if (!isless (xa, TWO52)) goto label; */
29696 TWO52 = ix86_gen_TWO52 (mode);
29697 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29699 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29700 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29702 ix86_sse_copysign_to_positive (res, xa, res, mask);
29704 emit_label (label);
29705 LABEL_NUSES (label) = 1;
29707 emit_move_insn (operand0, res);
29710 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29713 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29715 /* C code for the stuff we expand below.
29716 double xa = fabs (x), x2;
29717 if (!isless (xa, TWO52))
29719 xa = xa + TWO52 - TWO52;
29720 x2 = copysign (xa, x);
29729 enum machine_mode mode = GET_MODE (operand0);
29730 rtx xa, TWO52, tmp, label, one, res, mask;
29732 TWO52 = ix86_gen_TWO52 (mode);
29734 /* Temporary for holding the result, initialized to the input
29735 operand to ease control flow. */
29736 res = gen_reg_rtx (mode);
29737 emit_move_insn (res, operand1);
29739 /* xa = abs (operand1) */
29740 xa = ix86_expand_sse_fabs (res, &mask);
29742 /* if (!isless (xa, TWO52)) goto label; */
29743 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29745 /* xa = xa + TWO52 - TWO52; */
29746 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29747 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29749 /* xa = copysign (xa, operand1) */
29750 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29752 /* generate 1.0 or -1.0 */
29753 one = force_reg (mode,
29754 const_double_from_real_value (do_floor
29755 ? dconst1 : dconstm1, mode));
29757 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29758 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29759 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29760 gen_rtx_AND (mode, one, tmp)));
29761 /* We always need to subtract here to preserve signed zero. */
29762 tmp = expand_simple_binop (mode, MINUS,
29763 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29764 emit_move_insn (res, tmp);
29766 emit_label (label);
29767 LABEL_NUSES (label) = 1;
29769 emit_move_insn (operand0, res);
29772 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29775 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29777 /* C code for the stuff we expand below.
29778 double xa = fabs (x), x2;
29779 if (!isless (xa, TWO52))
29781 x2 = (double)(long)x;
29788 if (HONOR_SIGNED_ZEROS (mode))
29789 return copysign (x2, x);
29792 enum machine_mode mode = GET_MODE (operand0);
29793 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29795 TWO52 = ix86_gen_TWO52 (mode);
29797 /* Temporary for holding the result, initialized to the input
29798 operand to ease control flow. */
29799 res = gen_reg_rtx (mode);
29800 emit_move_insn (res, operand1);
29802 /* xa = abs (operand1) */
29803 xa = ix86_expand_sse_fabs (res, &mask);
29805 /* if (!isless (xa, TWO52)) goto label; */
29806 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29808 /* xa = (double)(long)x */
29809 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29810 expand_fix (xi, res, 0);
29811 expand_float (xa, xi, 0);
29814 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29816 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29817 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29818 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29819 gen_rtx_AND (mode, one, tmp)));
29820 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29821 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29822 emit_move_insn (res, tmp);
29824 if (HONOR_SIGNED_ZEROS (mode))
29825 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29827 emit_label (label);
29828 LABEL_NUSES (label) = 1;
29830 emit_move_insn (operand0, res);
29833 /* Expand SSE sequence for computing round from OPERAND1 storing
29834 into OPERAND0. Sequence that works without relying on DImode truncation
29835 via cvttsd2siq that is only available on 64bit targets. */
29837 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29839 /* C code for the stuff we expand below.
29840 double xa = fabs (x), xa2, x2;
29841 if (!isless (xa, TWO52))
29843 Using the absolute value and copying back sign makes
29844 -0.0 -> -0.0 correct.
29845 xa2 = xa + TWO52 - TWO52;
29850 else if (dxa > 0.5)
29852 x2 = copysign (xa2, x);
29855 enum machine_mode mode = GET_MODE (operand0);
29856 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29858 TWO52 = ix86_gen_TWO52 (mode);
29860 /* Temporary for holding the result, initialized to the input
29861 operand to ease control flow. */
29862 res = gen_reg_rtx (mode);
29863 emit_move_insn (res, operand1);
29865 /* xa = abs (operand1) */
29866 xa = ix86_expand_sse_fabs (res, &mask);
29868 /* if (!isless (xa, TWO52)) goto label; */
29869 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29871 /* xa2 = xa + TWO52 - TWO52; */
29872 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29873 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29875 /* dxa = xa2 - xa; */
29876 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29878 /* generate 0.5, 1.0 and -0.5 */
29879 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29880 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29881 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29885 tmp = gen_reg_rtx (mode);
29886 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29887 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29888 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29889 gen_rtx_AND (mode, one, tmp)));
29890 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29891 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29892 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29893 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29894 gen_rtx_AND (mode, one, tmp)));
29895 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29897 /* res = copysign (xa2, operand1) */
29898 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29900 emit_label (label);
29901 LABEL_NUSES (label) = 1;
29903 emit_move_insn (operand0, res);
29906 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29909 ix86_expand_trunc (rtx operand0, rtx operand1)
29911 /* C code for SSE variant we expand below.
29912 double xa = fabs (x), x2;
29913 if (!isless (xa, TWO52))
29915 x2 = (double)(long)x;
29916 if (HONOR_SIGNED_ZEROS (mode))
29917 return copysign (x2, x);
29920 enum machine_mode mode = GET_MODE (operand0);
29921 rtx xa, xi, TWO52, label, res, mask;
29923 TWO52 = ix86_gen_TWO52 (mode);
29925 /* Temporary for holding the result, initialized to the input
29926 operand to ease control flow. */
29927 res = gen_reg_rtx (mode);
29928 emit_move_insn (res, operand1);
29930 /* xa = abs (operand1) */
29931 xa = ix86_expand_sse_fabs (res, &mask);
29933 /* if (!isless (xa, TWO52)) goto label; */
29934 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29936 /* x = (double)(long)x */
29937 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29938 expand_fix (xi, res, 0);
29939 expand_float (res, xi, 0);
29941 if (HONOR_SIGNED_ZEROS (mode))
29942 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29944 emit_label (label);
29945 LABEL_NUSES (label) = 1;
29947 emit_move_insn (operand0, res);
29950 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29953 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29955 enum machine_mode mode = GET_MODE (operand0);
29956 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29958 /* C code for SSE variant we expand below.
29959 double xa = fabs (x), x2;
29960 if (!isless (xa, TWO52))
29962 xa2 = xa + TWO52 - TWO52;
29966 x2 = copysign (xa2, x);
29970 TWO52 = ix86_gen_TWO52 (mode);
29972 /* Temporary for holding the result, initialized to the input
29973 operand to ease control flow. */
29974 res = gen_reg_rtx (mode);
29975 emit_move_insn (res, operand1);
29977 /* xa = abs (operand1) */
29978 xa = ix86_expand_sse_fabs (res, &smask);
29980 /* if (!isless (xa, TWO52)) goto label; */
29981 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29983 /* res = xa + TWO52 - TWO52; */
29984 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29985 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29986 emit_move_insn (res, tmp);
29989 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29991 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29992 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29993 emit_insn (gen_rtx_SET (VOIDmode, mask,
29994 gen_rtx_AND (mode, mask, one)));
29995 tmp = expand_simple_binop (mode, MINUS,
29996 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29997 emit_move_insn (res, tmp);
29999 /* res = copysign (res, operand1) */
30000 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30002 emit_label (label);
30003 LABEL_NUSES (label) = 1;
30005 emit_move_insn (operand0, res);
30008 /* Expand SSE sequence for computing round from OPERAND1 storing
30011 ix86_expand_round (rtx operand0, rtx operand1)
30013 /* C code for the stuff we're doing below:
30014 double xa = fabs (x);
30015 if (!isless (xa, TWO52))
30017 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30018 return copysign (xa, x);
30020 enum machine_mode mode = GET_MODE (operand0);
30021 rtx res, TWO52, xa, label, xi, half, mask;
30022 const struct real_format *fmt;
30023 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30025 /* Temporary for holding the result, initialized to the input
30026 operand to ease control flow. */
30027 res = gen_reg_rtx (mode);
30028 emit_move_insn (res, operand1);
30030 TWO52 = ix86_gen_TWO52 (mode);
30031 xa = ix86_expand_sse_fabs (res, &mask);
30032 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30034 /* load nextafter (0.5, 0.0) */
30035 fmt = REAL_MODE_FORMAT (mode);
30036 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30037 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30039 /* xa = xa + 0.5 */
30040 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30041 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30043 /* xa = (double)(int64_t)xa */
30044 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30045 expand_fix (xi, xa, 0);
30046 expand_float (xa, xi, 0);
30048 /* res = copysign (xa, operand1) */
30049 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30051 emit_label (label);
30052 LABEL_NUSES (label) = 1;
30054 emit_move_insn (operand0, res);
30058 /* Validate whether a SSE5 instruction is valid or not.
30059 OPERANDS is the array of operands.
30060 NUM is the number of operands.
30061 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
30062 NUM_MEMORY is the maximum number of memory operands to accept.
30063 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
30066 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
30067 bool uses_oc0, int num_memory, bool commutative)
30073 /* Count the number of memory arguments */
30076 for (i = 0; i < num; i++)
30078 enum machine_mode mode = GET_MODE (operands[i]);
30079 if (register_operand (operands[i], mode))
30082 else if (memory_operand (operands[i], mode))
30084 mem_mask |= (1 << i);
30090 rtx pattern = PATTERN (insn);
30092 /* allow 0 for pcmov */
30093 if (GET_CODE (pattern) != SET
30094 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
30096 || operands[i] != CONST0_RTX (mode))
30101 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
30102 a memory operation. */
30103 if (num_memory < 0)
30105 num_memory = -num_memory;
30106 if ((mem_mask & (1 << (num-1))) != 0)
30108 mem_mask &= ~(1 << (num-1));
30113 /* If there were no memory operations, allow the insn */
30117 /* Do not allow the destination register to be a memory operand. */
30118 else if (mem_mask & (1 << 0))
30121 /* If there are too many memory operations, disallow the instruction. While
30122 the hardware only allows 1 memory reference, before register allocation
30123 for some insns, we allow two memory operations sometimes in order to allow
30124 code like the following to be optimized:
30126 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
30128 or similar cases that are vectorized into using the fmaddss
30130 else if (mem_count > num_memory)
30133 /* Don't allow more than one memory operation if not optimizing. */
30134 else if (mem_count > 1 && !optimize)
30137 else if (num == 4 && mem_count == 1)
30139 /* formats (destination is the first argument), example fmaddss:
30140 xmm1, xmm1, xmm2, xmm3/mem
30141 xmm1, xmm1, xmm2/mem, xmm3
30142 xmm1, xmm2, xmm3/mem, xmm1
30143 xmm1, xmm2/mem, xmm3, xmm1 */
30145 return ((mem_mask == (1 << 1))
30146 || (mem_mask == (1 << 2))
30147 || (mem_mask == (1 << 3)));
30149 /* format, example pmacsdd:
30150 xmm1, xmm2, xmm3/mem, xmm1 */
30152 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
30154 return (mem_mask == (1 << 2));
30157 else if (num == 4 && num_memory == 2)
30159 /* If there are two memory operations, we can load one of the memory ops
30160 into the destination register. This is for optimizing the
30161 multiply/add ops, which the combiner has optimized both the multiply
30162 and the add insns to have a memory operation. We have to be careful
30163 that the destination doesn't overlap with the inputs. */
30164 rtx op0 = operands[0];
30166 if (reg_mentioned_p (op0, operands[1])
30167 || reg_mentioned_p (op0, operands[2])
30168 || reg_mentioned_p (op0, operands[3]))
30171 /* formats (destination is the first argument), example fmaddss:
30172 xmm1, xmm1, xmm2, xmm3/mem
30173 xmm1, xmm1, xmm2/mem, xmm3
30174 xmm1, xmm2, xmm3/mem, xmm1
30175 xmm1, xmm2/mem, xmm3, xmm1
30177 For the oc0 case, we will load either operands[1] or operands[3] into
30178 operands[0], so any combination of 2 memory operands is ok. */
30182 /* format, example pmacsdd:
30183 xmm1, xmm2, xmm3/mem, xmm1
30185 For the integer multiply/add instructions be more restrictive and
30186 require operands[2] and operands[3] to be the memory operands. */
30188 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
30190 return (mem_mask == ((1 << 2) | (1 << 3)));
30193 else if (num == 3 && num_memory == 1)
30195 /* formats, example protb:
30196 xmm1, xmm2, xmm3/mem
30197 xmm1, xmm2/mem, xmm3 */
30199 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
30201 /* format, example comeq:
30202 xmm1, xmm2, xmm3/mem */
30204 return (mem_mask == (1 << 2));
30208 gcc_unreachable ();
30214 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
30215 hardware will allow by using the destination register to load one of the
30216 memory operations. Presently this is used by the multiply/add routines to
30217 allow 2 memory references. */
30220 ix86_expand_sse5_multiple_memory (rtx operands[],
30222 enum machine_mode mode)
30224 rtx op0 = operands[0];
30226 || memory_operand (op0, mode)
30227 || reg_mentioned_p (op0, operands[1])
30228 || reg_mentioned_p (op0, operands[2])
30229 || reg_mentioned_p (op0, operands[3]))
30230 gcc_unreachable ();
30232 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
30233 the destination register. */
30234 if (memory_operand (operands[1], mode))
30236 emit_move_insn (op0, operands[1]);
30239 else if (memory_operand (operands[3], mode))
30241 emit_move_insn (op0, operands[3]);
30245 gcc_unreachable ();
30251 /* Table of valid machine attributes. */
30252 static const struct attribute_spec ix86_attribute_table[] =
30254 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30255 /* Stdcall attribute says callee is responsible for popping arguments
30256 if they are not variable. */
30257 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30258 /* Fastcall attribute says callee is responsible for popping arguments
30259 if they are not variable. */
30260 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30261 /* Cdecl attribute says the callee is a normal C declaration */
30262 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30263 /* Regparm attribute specifies how many integer arguments are to be
30264 passed in registers. */
30265 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30266 /* Sseregparm attribute says we are using x86_64 calling conventions
30267 for FP arguments. */
30268 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30269 /* force_align_arg_pointer says this function realigns the stack at entry. */
30270 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30271 false, true, true, ix86_handle_cconv_attribute },
30272 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30273 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30274 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30275 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30277 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30278 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30279 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30280 SUBTARGET_ATTRIBUTE_TABLE,
30282 /* ms_abi and sysv_abi calling convention function attributes. */
30283 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30284 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30286 { NULL, 0, 0, false, false, false, NULL }
30289 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30291 x86_builtin_vectorization_cost (bool runtime_test)
30293 /* If the branch of the runtime test is taken - i.e. - the vectorized
30294 version is skipped - this incurs a misprediction cost (because the
30295 vectorized version is expected to be the fall-through). So we subtract
30296 the latency of a mispredicted branch from the costs that are incured
30297 when the vectorized version is executed.
30299 TODO: The values in individual target tables have to be tuned or new
30300 fields may be needed. For eg. on K8, the default branch path is the
30301 not-taken path. If the taken path is predicted correctly, the minimum
30302 penalty of going down the taken-path is 1 cycle. If the taken-path is
30303 not predicted correctly, then the minimum penalty is 10 cycles. */
30307 return (-(ix86_cost->cond_taken_branch_cost));
30313 /* This function returns the calling abi specific va_list type node.
30314 It returns the FNDECL specific va_list type. */
30317 ix86_fn_abi_va_list (tree fndecl)
30320 return va_list_type_node;
30321 gcc_assert (fndecl != NULL_TREE);
30323 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
30324 return ms_va_list_type_node;
30326 return sysv_va_list_type_node;
30329 /* Returns the canonical va_list type specified by TYPE. If there
30330 is no valid TYPE provided, it return NULL_TREE. */
30333 ix86_canonical_va_list_type (tree type)
30337 /* Resolve references and pointers to va_list type. */
30338 if (INDIRECT_REF_P (type))
30339 type = TREE_TYPE (type);
30340 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
30341 type = TREE_TYPE (type);
30345 wtype = va_list_type_node;
30346 gcc_assert (wtype != NULL_TREE);
30348 if (TREE_CODE (wtype) == ARRAY_TYPE)
30350 /* If va_list is an array type, the argument may have decayed
30351 to a pointer type, e.g. by being passed to another function.
30352 In that case, unwrap both types so that we can compare the
30353 underlying records. */
30354 if (TREE_CODE (htype) == ARRAY_TYPE
30355 || POINTER_TYPE_P (htype))
30357 wtype = TREE_TYPE (wtype);
30358 htype = TREE_TYPE (htype);
30361 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30362 return va_list_type_node;
30363 wtype = sysv_va_list_type_node;
30364 gcc_assert (wtype != NULL_TREE);
30366 if (TREE_CODE (wtype) == ARRAY_TYPE)
30368 /* If va_list is an array type, the argument may have decayed
30369 to a pointer type, e.g. by being passed to another function.
30370 In that case, unwrap both types so that we can compare the
30371 underlying records. */
30372 if (TREE_CODE (htype) == ARRAY_TYPE
30373 || POINTER_TYPE_P (htype))
30375 wtype = TREE_TYPE (wtype);
30376 htype = TREE_TYPE (htype);
30379 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30380 return sysv_va_list_type_node;
30381 wtype = ms_va_list_type_node;
30382 gcc_assert (wtype != NULL_TREE);
30384 if (TREE_CODE (wtype) == ARRAY_TYPE)
30386 /* If va_list is an array type, the argument may have decayed
30387 to a pointer type, e.g. by being passed to another function.
30388 In that case, unwrap both types so that we can compare the
30389 underlying records. */
30390 if (TREE_CODE (htype) == ARRAY_TYPE
30391 || POINTER_TYPE_P (htype))
30393 wtype = TREE_TYPE (wtype);
30394 htype = TREE_TYPE (htype);
30397 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30398 return ms_va_list_type_node;
30401 return std_canonical_va_list_type (type);
30404 /* Iterate through the target-specific builtin types for va_list.
30405 IDX denotes the iterator, *PTREE is set to the result type of
30406 the va_list builtin, and *PNAME to its internal type.
30407 Returns zero if there is no element for this index, otherwise
30408 IDX should be increased upon the next call.
30409 Note, do not iterate a base builtin's name like __builtin_va_list.
30410 Used from c_common_nodes_and_builtins. */
30413 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30419 *ptree = ms_va_list_type_node;
30420 *pname = "__builtin_ms_va_list";
30423 *ptree = sysv_va_list_type_node;
30424 *pname = "__builtin_sysv_va_list";
30432 /* Initialize the GCC target structure. */
30433 #undef TARGET_RETURN_IN_MEMORY
30434 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30436 #undef TARGET_LEGITIMIZE_ADDRESS
30437 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30439 #undef TARGET_ATTRIBUTE_TABLE
30440 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30441 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30442 # undef TARGET_MERGE_DECL_ATTRIBUTES
30443 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30446 #undef TARGET_COMP_TYPE_ATTRIBUTES
30447 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30449 #undef TARGET_INIT_BUILTINS
30450 #define TARGET_INIT_BUILTINS ix86_init_builtins
30451 #undef TARGET_EXPAND_BUILTIN
30452 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30454 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30455 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30456 ix86_builtin_vectorized_function
30458 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30459 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30461 #undef TARGET_BUILTIN_RECIPROCAL
30462 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30464 #undef TARGET_ASM_FUNCTION_EPILOGUE
30465 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30467 #undef TARGET_ENCODE_SECTION_INFO
30468 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30469 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30471 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30474 #undef TARGET_ASM_OPEN_PAREN
30475 #define TARGET_ASM_OPEN_PAREN ""
30476 #undef TARGET_ASM_CLOSE_PAREN
30477 #define TARGET_ASM_CLOSE_PAREN ""
30479 #undef TARGET_ASM_BYTE_OP
30480 #define TARGET_ASM_BYTE_OP ASM_BYTE
30482 #undef TARGET_ASM_ALIGNED_HI_OP
30483 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30484 #undef TARGET_ASM_ALIGNED_SI_OP
30485 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30487 #undef TARGET_ASM_ALIGNED_DI_OP
30488 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30491 #undef TARGET_ASM_UNALIGNED_HI_OP
30492 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30493 #undef TARGET_ASM_UNALIGNED_SI_OP
30494 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30495 #undef TARGET_ASM_UNALIGNED_DI_OP
30496 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30498 #undef TARGET_SCHED_ADJUST_COST
30499 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30500 #undef TARGET_SCHED_ISSUE_RATE
30501 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30502 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30503 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30504 ia32_multipass_dfa_lookahead
30506 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30507 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30510 #undef TARGET_HAVE_TLS
30511 #define TARGET_HAVE_TLS true
30513 #undef TARGET_CANNOT_FORCE_CONST_MEM
30514 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30515 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30516 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30518 #undef TARGET_DELEGITIMIZE_ADDRESS
30519 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30521 #undef TARGET_MS_BITFIELD_LAYOUT_P
30522 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30525 #undef TARGET_BINDS_LOCAL_P
30526 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30528 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30529 #undef TARGET_BINDS_LOCAL_P
30530 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30533 #undef TARGET_ASM_OUTPUT_MI_THUNK
30534 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30535 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30536 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30538 #undef TARGET_ASM_FILE_START
30539 #define TARGET_ASM_FILE_START x86_file_start
30541 #undef TARGET_DEFAULT_TARGET_FLAGS
30542 #define TARGET_DEFAULT_TARGET_FLAGS \
30544 | TARGET_SUBTARGET_DEFAULT \
30545 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
30547 #undef TARGET_HANDLE_OPTION
30548 #define TARGET_HANDLE_OPTION ix86_handle_option
30550 #undef TARGET_RTX_COSTS
30551 #define TARGET_RTX_COSTS ix86_rtx_costs
30552 #undef TARGET_ADDRESS_COST
30553 #define TARGET_ADDRESS_COST ix86_address_cost
30555 #undef TARGET_FIXED_CONDITION_CODE_REGS
30556 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30557 #undef TARGET_CC_MODES_COMPATIBLE
30558 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30560 #undef TARGET_MACHINE_DEPENDENT_REORG
30561 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30563 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30564 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30566 #undef TARGET_BUILD_BUILTIN_VA_LIST
30567 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30569 #undef TARGET_FN_ABI_VA_LIST
30570 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30572 #undef TARGET_CANONICAL_VA_LIST_TYPE
30573 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30575 #undef TARGET_EXPAND_BUILTIN_VA_START
30576 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30578 #undef TARGET_MD_ASM_CLOBBERS
30579 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30581 #undef TARGET_PROMOTE_PROTOTYPES
30582 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30583 #undef TARGET_STRUCT_VALUE_RTX
30584 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30585 #undef TARGET_SETUP_INCOMING_VARARGS
30586 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30587 #undef TARGET_MUST_PASS_IN_STACK
30588 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30589 #undef TARGET_PASS_BY_REFERENCE
30590 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30591 #undef TARGET_INTERNAL_ARG_POINTER
30592 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30593 #undef TARGET_UPDATE_STACK_BOUNDARY
30594 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30595 #undef TARGET_GET_DRAP_RTX
30596 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30597 #undef TARGET_STRICT_ARGUMENT_NAMING
30598 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30600 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30601 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30603 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30604 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30606 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30607 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30609 #undef TARGET_C_MODE_FOR_SUFFIX
30610 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30613 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30614 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30617 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30618 #undef TARGET_INSERT_ATTRIBUTES
30619 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30622 #undef TARGET_MANGLE_TYPE
30623 #define TARGET_MANGLE_TYPE ix86_mangle_type
30625 #undef TARGET_STACK_PROTECT_FAIL
30626 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30628 #undef TARGET_FUNCTION_VALUE
30629 #define TARGET_FUNCTION_VALUE ix86_function_value
30631 #undef TARGET_SECONDARY_RELOAD
30632 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30634 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30635 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30637 #undef TARGET_SET_CURRENT_FUNCTION
30638 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30640 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30641 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30643 #undef TARGET_OPTION_SAVE
30644 #define TARGET_OPTION_SAVE ix86_function_specific_save
30646 #undef TARGET_OPTION_RESTORE
30647 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30649 #undef TARGET_OPTION_PRINT
30650 #define TARGET_OPTION_PRINT ix86_function_specific_print
30652 #undef TARGET_CAN_INLINE_P
30653 #define TARGET_CAN_INLINE_P ix86_can_inline_p
30655 #undef TARGET_EXPAND_TO_RTL_HOOK
30656 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30658 #undef TARGET_LEGITIMATE_ADDRESS_P
30659 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30661 #undef TARGET_IRA_COVER_CLASSES
30662 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
30664 #undef TARGET_FRAME_POINTER_REQUIRED
30665 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
30667 struct gcc_target targetm = TARGET_INITIALIZER;
30669 #include "gt-i386.h"