1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1040 struct processor_costs atom_cost = {
1041 COSTS_N_INSNS (1), /* cost of an add instruction */
1042 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1043 COSTS_N_INSNS (1), /* variable shift costs */
1044 COSTS_N_INSNS (1), /* constant shift costs */
1045 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1046 COSTS_N_INSNS (4), /* HI */
1047 COSTS_N_INSNS (3), /* SI */
1048 COSTS_N_INSNS (4), /* DI */
1049 COSTS_N_INSNS (2)}, /* other */
1050 0, /* cost of multiply per each bit set */
1051 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1052 COSTS_N_INSNS (26), /* HI */
1053 COSTS_N_INSNS (42), /* SI */
1054 COSTS_N_INSNS (74), /* DI */
1055 COSTS_N_INSNS (74)}, /* other */
1056 COSTS_N_INSNS (1), /* cost of movsx */
1057 COSTS_N_INSNS (1), /* cost of movzx */
1058 8, /* "large" insn */
1059 17, /* MOVE_RATIO */
1060 2, /* cost for loading QImode using movzbl */
1061 {4, 4, 4}, /* cost of loading integer registers
1062 in QImode, HImode and SImode.
1063 Relative to reg-reg move (2). */
1064 {4, 4, 4}, /* cost of storing integer registers */
1065 4, /* cost of reg,reg fld/fst */
1066 {12, 12, 12}, /* cost of loading fp registers
1067 in SFmode, DFmode and XFmode */
1068 {6, 6, 8}, /* cost of storing fp registers
1069 in SFmode, DFmode and XFmode */
1070 2, /* cost of moving MMX register */
1071 {8, 8}, /* cost of loading MMX registers
1072 in SImode and DImode */
1073 {8, 8}, /* cost of storing MMX registers
1074 in SImode and DImode */
1075 2, /* cost of moving SSE register */
1076 {8, 8, 8}, /* cost of loading SSE registers
1077 in SImode, DImode and TImode */
1078 {8, 8, 8}, /* cost of storing SSE registers
1079 in SImode, DImode and TImode */
1080 5, /* MMX or SSE register to integer */
1081 32, /* size of l1 cache. */
1082 256, /* size of l2 cache. */
1083 64, /* size of prefetch block */
1084 6, /* number of parallel prefetches */
1085 3, /* Branch cost */
1086 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1087 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1088 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1089 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1090 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1091 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1092 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1093 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1094 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1095 {{libcall, {{8, loop}, {15, unrolled_loop},
1096 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1097 {libcall, {{24, loop}, {32, unrolled_loop},
1098 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1099 1, /* scalar_stmt_cost. */
1100 1, /* scalar load_cost. */
1101 1, /* scalar_store_cost. */
1102 1, /* vec_stmt_cost. */
1103 1, /* vec_to_scalar_cost. */
1104 1, /* scalar_to_vec_cost. */
1105 1, /* vec_align_load_cost. */
1106 2, /* vec_unalign_load_cost. */
1107 1, /* vec_store_cost. */
1108 3, /* cond_taken_branch_cost. */
1109 1, /* cond_not_taken_branch_cost. */
1112 /* Generic64 should produce code tuned for Nocona and K8. */
1114 struct processor_costs generic64_cost = {
1115 COSTS_N_INSNS (1), /* cost of an add instruction */
1116 /* On all chips taken into consideration lea is 2 cycles and more. With
1117 this cost however our current implementation of synth_mult results in
1118 use of unnecessary temporary registers causing regression on several
1119 SPECfp benchmarks. */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 512, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1164 is increased to perhaps more appropriate value of 5. */
1165 3, /* Branch cost */
1166 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1167 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1168 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1169 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1170 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1171 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1172 {DUMMY_STRINGOP_ALGS,
1173 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1174 {DUMMY_STRINGOP_ALGS,
1175 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1176 1, /* scalar_stmt_cost. */
1177 1, /* scalar load_cost. */
1178 1, /* scalar_store_cost. */
1179 1, /* vec_stmt_cost. */
1180 1, /* vec_to_scalar_cost. */
1181 1, /* scalar_to_vec_cost. */
1182 1, /* vec_align_load_cost. */
1183 2, /* vec_unalign_load_cost. */
1184 1, /* vec_store_cost. */
1185 3, /* cond_taken_branch_cost. */
1186 1, /* cond_not_taken_branch_cost. */
1189 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1191 struct processor_costs generic32_cost = {
1192 COSTS_N_INSNS (1), /* cost of an add instruction */
1193 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1194 COSTS_N_INSNS (1), /* variable shift costs */
1195 COSTS_N_INSNS (1), /* constant shift costs */
1196 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1197 COSTS_N_INSNS (4), /* HI */
1198 COSTS_N_INSNS (3), /* SI */
1199 COSTS_N_INSNS (4), /* DI */
1200 COSTS_N_INSNS (2)}, /* other */
1201 0, /* cost of multiply per each bit set */
1202 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1203 COSTS_N_INSNS (26), /* HI */
1204 COSTS_N_INSNS (42), /* SI */
1205 COSTS_N_INSNS (74), /* DI */
1206 COSTS_N_INSNS (74)}, /* other */
1207 COSTS_N_INSNS (1), /* cost of movsx */
1208 COSTS_N_INSNS (1), /* cost of movzx */
1209 8, /* "large" insn */
1210 17, /* MOVE_RATIO */
1211 4, /* cost for loading QImode using movzbl */
1212 {4, 4, 4}, /* cost of loading integer registers
1213 in QImode, HImode and SImode.
1214 Relative to reg-reg move (2). */
1215 {4, 4, 4}, /* cost of storing integer registers */
1216 4, /* cost of reg,reg fld/fst */
1217 {12, 12, 12}, /* cost of loading fp registers
1218 in SFmode, DFmode and XFmode */
1219 {6, 6, 8}, /* cost of storing fp registers
1220 in SFmode, DFmode and XFmode */
1221 2, /* cost of moving MMX register */
1222 {8, 8}, /* cost of loading MMX registers
1223 in SImode and DImode */
1224 {8, 8}, /* cost of storing MMX registers
1225 in SImode and DImode */
1226 2, /* cost of moving SSE register */
1227 {8, 8, 8}, /* cost of loading SSE registers
1228 in SImode, DImode and TImode */
1229 {8, 8, 8}, /* cost of storing SSE registers
1230 in SImode, DImode and TImode */
1231 5, /* MMX or SSE register to integer */
1232 32, /* size of l1 cache. */
1233 256, /* size of l2 cache. */
1234 64, /* size of prefetch block */
1235 6, /* number of parallel prefetches */
1236 3, /* Branch cost */
1237 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1238 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1239 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1240 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1241 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1242 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1243 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1244 DUMMY_STRINGOP_ALGS},
1245 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1246 DUMMY_STRINGOP_ALGS},
1247 1, /* scalar_stmt_cost. */
1248 1, /* scalar load_cost. */
1249 1, /* scalar_store_cost. */
1250 1, /* vec_stmt_cost. */
1251 1, /* vec_to_scalar_cost. */
1252 1, /* scalar_to_vec_cost. */
1253 1, /* vec_align_load_cost. */
1254 2, /* vec_unalign_load_cost. */
1255 1, /* vec_store_cost. */
1256 3, /* cond_taken_branch_cost. */
1257 1, /* cond_not_taken_branch_cost. */
1260 const struct processor_costs *ix86_cost = &pentium_cost;
1262 /* Processor feature/optimization bitmasks. */
1263 #define m_386 (1<<PROCESSOR_I386)
1264 #define m_486 (1<<PROCESSOR_I486)
1265 #define m_PENT (1<<PROCESSOR_PENTIUM)
1266 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1267 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1268 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1269 #define m_CORE2 (1<<PROCESSOR_CORE2)
1270 #define m_ATOM (1<<PROCESSOR_ATOM)
1272 #define m_GEODE (1<<PROCESSOR_GEODE)
1273 #define m_K6 (1<<PROCESSOR_K6)
1274 #define m_K6_GEODE (m_K6 | m_GEODE)
1275 #define m_K8 (1<<PROCESSOR_K8)
1276 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1277 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1278 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1279 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1281 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1282 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1284 /* Generic instruction choice should be common subset of supported CPUs
1285 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1286 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1288 /* Feature tests against the various tunings. */
1289 unsigned char ix86_tune_features[X86_TUNE_LAST];
1291 /* Feature tests against the various tunings used to create ix86_tune_features
1292 based on the processor mask. */
1293 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1294 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1295 negatively, so enabling for Generic64 seems like good code size
1296 tradeoff. We can't enable it for 32bit generic because it does not
1297 work well with PPro base chips. */
1298 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1300 /* X86_TUNE_PUSH_MEMORY */
1301 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1302 | m_NOCONA | m_CORE2 | m_GENERIC,
1304 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1307 /* X86_TUNE_UNROLL_STRLEN */
1308 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1309 | m_CORE2 | m_GENERIC,
1311 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1312 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1314 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1315 on simulation result. But after P4 was made, no performance benefit
1316 was observed with branch hints. It also increases the code size.
1317 As a result, icc never generates branch hints. */
1320 /* X86_TUNE_DOUBLE_WITH_ADD */
1323 /* X86_TUNE_USE_SAHF */
1324 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1325 | m_NOCONA | m_CORE2 | m_GENERIC,
1327 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1328 partial dependencies. */
1329 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1330 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1332 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1333 register stalls on Generic32 compilation setting as well. However
1334 in current implementation the partial register stalls are not eliminated
1335 very well - they can be introduced via subregs synthesized by combine
1336 and can happen in caller/callee saving sequences. Because this option
1337 pays back little on PPro based chips and is in conflict with partial reg
1338 dependencies used by Athlon/P4 based chips, it is better to leave it off
1339 for generic32 for now. */
1342 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1343 m_CORE2 | m_GENERIC,
1345 /* X86_TUNE_USE_HIMODE_FIOP */
1346 m_386 | m_486 | m_K6_GEODE,
1348 /* X86_TUNE_USE_SIMODE_FIOP */
1349 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1351 /* X86_TUNE_USE_MOV0 */
1354 /* X86_TUNE_USE_CLTD */
1355 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1357 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1360 /* X86_TUNE_SPLIT_LONG_MOVES */
1363 /* X86_TUNE_READ_MODIFY_WRITE */
1366 /* X86_TUNE_READ_MODIFY */
1369 /* X86_TUNE_PROMOTE_QIMODE */
1370 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1371 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1373 /* X86_TUNE_FAST_PREFIX */
1374 ~(m_PENT | m_486 | m_386),
1376 /* X86_TUNE_SINGLE_STRINGOP */
1377 m_386 | m_PENT4 | m_NOCONA,
1379 /* X86_TUNE_QIMODE_MATH */
1382 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1383 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1384 might be considered for Generic32 if our scheme for avoiding partial
1385 stalls was more effective. */
1388 /* X86_TUNE_PROMOTE_QI_REGS */
1391 /* X86_TUNE_PROMOTE_HI_REGS */
1394 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1395 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1396 | m_CORE2 | m_GENERIC,
1398 /* X86_TUNE_ADD_ESP_8 */
1399 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1400 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1402 /* X86_TUNE_SUB_ESP_4 */
1403 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1406 /* X86_TUNE_SUB_ESP_8 */
1407 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1408 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1410 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1411 for DFmode copies */
1412 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1413 | m_GENERIC | m_GEODE),
1415 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1416 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1418 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1419 conflict here in between PPro/Pentium4 based chips that thread 128bit
1420 SSE registers as single units versus K8 based chips that divide SSE
1421 registers to two 64bit halves. This knob promotes all store destinations
1422 to be 128bit to allow register renaming on 128bit SSE units, but usually
1423 results in one extra microop on 64bit SSE units. Experimental results
1424 shows that disabling this option on P4 brings over 20% SPECfp regression,
1425 while enabling it on K8 brings roughly 2.4% regression that can be partly
1426 masked by careful scheduling of moves. */
1427 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1430 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1433 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1434 are resolved on SSE register parts instead of whole registers, so we may
1435 maintain just lower part of scalar values in proper format leaving the
1436 upper part undefined. */
1439 /* X86_TUNE_SSE_TYPELESS_STORES */
1442 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1443 m_PPRO | m_PENT4 | m_NOCONA,
1445 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1446 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1448 /* X86_TUNE_PROLOGUE_USING_MOVE */
1449 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1451 /* X86_TUNE_EPILOGUE_USING_MOVE */
1452 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1454 /* X86_TUNE_SHIFT1 */
1457 /* X86_TUNE_USE_FFREEP */
1460 /* X86_TUNE_INTER_UNIT_MOVES */
1461 ~(m_AMD_MULTIPLE | m_GENERIC),
1463 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1466 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1467 than 4 branch instructions in the 16 byte window. */
1468 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1471 /* X86_TUNE_SCHEDULE */
1472 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1475 /* X86_TUNE_USE_BT */
1476 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1478 /* X86_TUNE_USE_INCDEC */
1479 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1481 /* X86_TUNE_PAD_RETURNS */
1482 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1484 /* X86_TUNE_EXT_80387_CONSTANTS */
1485 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1486 | m_CORE2 | m_GENERIC,
1488 /* X86_TUNE_SHORTEN_X87_SSE */
1491 /* X86_TUNE_AVOID_VECTOR_DECODE */
1494 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1495 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1498 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1499 vector path on AMD machines. */
1500 m_K8 | m_GENERIC64 | m_AMDFAM10,
1502 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1504 m_K8 | m_GENERIC64 | m_AMDFAM10,
1506 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1510 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1511 but one byte longer. */
1514 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1515 operand that cannot be represented using a modRM byte. The XOR
1516 replacement is long decoded, so this split helps here as well. */
1519 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1521 m_AMDFAM10 | m_GENERIC,
1523 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1524 from integer to FP. */
1527 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1528 with a subsequent conditional jump instruction into a single
1529 compare-and-branch uop. */
1532 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1533 will impact LEA instruction selection. */
1537 /* Feature tests against the various architecture variations. */
1538 unsigned char ix86_arch_features[X86_ARCH_LAST];
1540 /* Feature tests against the various architecture variations, used to create
1541 ix86_arch_features based on the processor mask. */
1542 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1543 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1544 ~(m_386 | m_486 | m_PENT | m_K6),
1546 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1549 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1552 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1555 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1559 static const unsigned int x86_accumulate_outgoing_args
1560 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1563 static const unsigned int x86_arch_always_fancy_math_387
1564 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1565 | m_NOCONA | m_CORE2 | m_GENERIC;
1567 static enum stringop_alg stringop_alg = no_stringop;
1569 /* In case the average insn count for single function invocation is
1570 lower than this constant, emit fast (but longer) prologue and
1572 #define FAST_PROLOGUE_INSN_COUNT 20
1574 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1575 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1576 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1577 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1579 /* Array of the smallest class containing reg number REGNO, indexed by
1580 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1582 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1584 /* ax, dx, cx, bx */
1585 AREG, DREG, CREG, BREG,
1586 /* si, di, bp, sp */
1587 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1589 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1590 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1593 /* flags, fpsr, fpcr, frame */
1594 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1596 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1599 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1602 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1603 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1604 /* SSE REX registers */
1605 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1609 /* The "default" register map used in 32bit mode. */
1611 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1613 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1614 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1615 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1616 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1617 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1618 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1619 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1622 /* The "default" register map used in 64bit mode. */
1624 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1626 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1627 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1628 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1629 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1630 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1631 8,9,10,11,12,13,14,15, /* extended integer registers */
1632 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1635 /* Define the register numbers to be used in Dwarf debugging information.
1636 The SVR4 reference port C compiler uses the following register numbers
1637 in its Dwarf output code:
1638 0 for %eax (gcc regno = 0)
1639 1 for %ecx (gcc regno = 2)
1640 2 for %edx (gcc regno = 1)
1641 3 for %ebx (gcc regno = 3)
1642 4 for %esp (gcc regno = 7)
1643 5 for %ebp (gcc regno = 6)
1644 6 for %esi (gcc regno = 4)
1645 7 for %edi (gcc regno = 5)
1646 The following three DWARF register numbers are never generated by
1647 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1648 believes these numbers have these meanings.
1649 8 for %eip (no gcc equivalent)
1650 9 for %eflags (gcc regno = 17)
1651 10 for %trapno (no gcc equivalent)
1652 It is not at all clear how we should number the FP stack registers
1653 for the x86 architecture. If the version of SDB on x86/svr4 were
1654 a bit less brain dead with respect to floating-point then we would
1655 have a precedent to follow with respect to DWARF register numbers
1656 for x86 FP registers, but the SDB on x86/svr4 is so completely
1657 broken with respect to FP registers that it is hardly worth thinking
1658 of it as something to strive for compatibility with.
1659 The version of x86/svr4 SDB I have at the moment does (partially)
1660 seem to believe that DWARF register number 11 is associated with
1661 the x86 register %st(0), but that's about all. Higher DWARF
1662 register numbers don't seem to be associated with anything in
1663 particular, and even for DWARF regno 11, SDB only seems to under-
1664 stand that it should say that a variable lives in %st(0) (when
1665 asked via an `=' command) if we said it was in DWARF regno 11,
1666 but SDB still prints garbage when asked for the value of the
1667 variable in question (via a `/' command).
1668 (Also note that the labels SDB prints for various FP stack regs
1669 when doing an `x' command are all wrong.)
1670 Note that these problems generally don't affect the native SVR4
1671 C compiler because it doesn't allow the use of -O with -g and
1672 because when it is *not* optimizing, it allocates a memory
1673 location for each floating-point variable, and the memory
1674 location is what gets described in the DWARF AT_location
1675 attribute for the variable in question.
1676 Regardless of the severe mental illness of the x86/svr4 SDB, we
1677 do something sensible here and we use the following DWARF
1678 register numbers. Note that these are all stack-top-relative
1680 11 for %st(0) (gcc regno = 8)
1681 12 for %st(1) (gcc regno = 9)
1682 13 for %st(2) (gcc regno = 10)
1683 14 for %st(3) (gcc regno = 11)
1684 15 for %st(4) (gcc regno = 12)
1685 16 for %st(5) (gcc regno = 13)
1686 17 for %st(6) (gcc regno = 14)
1687 18 for %st(7) (gcc regno = 15)
1689 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1691 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1692 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1693 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1694 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1695 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1696 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1697 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1700 /* Test and compare insns in i386.md store the information needed to
1701 generate branch and scc insns here. */
1703 rtx ix86_compare_op0 = NULL_RTX;
1704 rtx ix86_compare_op1 = NULL_RTX;
1706 /* Define parameter passing and return registers. */
1708 static int const x86_64_int_parameter_registers[6] =
1710 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1713 static int const x86_64_ms_abi_int_parameter_registers[4] =
1715 CX_REG, DX_REG, R8_REG, R9_REG
1718 static int const x86_64_int_return_registers[4] =
1720 AX_REG, DX_REG, DI_REG, SI_REG
1723 /* Define the structure for the machine field in struct function. */
1725 struct GTY(()) stack_local_entry {
1726 unsigned short mode;
1729 struct stack_local_entry *next;
1732 /* Structure describing stack frame layout.
1733 Stack grows downward:
1739 saved frame pointer if frame_pointer_needed
1740 <- HARD_FRAME_POINTER
1749 [va_arg registers] (
1750 > to_allocate <- FRAME_POINTER
1762 HOST_WIDE_INT frame;
1764 int outgoing_arguments_size;
1767 HOST_WIDE_INT to_allocate;
1768 /* The offsets relative to ARG_POINTER. */
1769 HOST_WIDE_INT frame_pointer_offset;
1770 HOST_WIDE_INT hard_frame_pointer_offset;
1771 HOST_WIDE_INT stack_pointer_offset;
1773 /* When save_regs_using_mov is set, emit prologue using
1774 move instead of push instructions. */
1775 bool save_regs_using_mov;
1778 /* Code model option. */
1779 enum cmodel ix86_cmodel;
1781 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1783 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1785 /* Which unit we are generating floating point math for. */
1786 enum fpmath_unit ix86_fpmath;
1788 /* Which cpu are we scheduling for. */
1789 enum attr_cpu ix86_schedule;
1791 /* Which cpu are we optimizing for. */
1792 enum processor_type ix86_tune;
1794 /* Which instruction set architecture to use. */
1795 enum processor_type ix86_arch;
1797 /* true if sse prefetch instruction is not NOOP. */
1798 int x86_prefetch_sse;
1800 /* ix86_regparm_string as a number */
1801 static int ix86_regparm;
1803 /* -mstackrealign option */
1804 extern int ix86_force_align_arg_pointer;
1805 static const char ix86_force_align_arg_pointer_string[]
1806 = "force_align_arg_pointer";
1808 static rtx (*ix86_gen_leave) (void);
1809 static rtx (*ix86_gen_pop1) (rtx);
1810 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1811 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1812 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1813 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1814 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1815 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1817 /* Preferred alignment for stack boundary in bits. */
1818 unsigned int ix86_preferred_stack_boundary;
1820 /* Alignment for incoming stack boundary in bits specified at
1822 static unsigned int ix86_user_incoming_stack_boundary;
1824 /* Default alignment for incoming stack boundary in bits. */
1825 static unsigned int ix86_default_incoming_stack_boundary;
1827 /* Alignment for incoming stack boundary in bits. */
1828 unsigned int ix86_incoming_stack_boundary;
1830 /* The abi used by target. */
1831 enum calling_abi ix86_abi;
1833 /* Values 1-5: see jump.c */
1834 int ix86_branch_cost;
1836 /* Calling abi specific va_list type nodes. */
1837 static GTY(()) tree sysv_va_list_type_node;
1838 static GTY(()) tree ms_va_list_type_node;
1840 /* Variables which are this size or smaller are put in the data/bss
1841 or ldata/lbss sections. */
1843 int ix86_section_threshold = 65536;
1845 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1846 char internal_label_prefix[16];
1847 int internal_label_prefix_len;
1849 /* Fence to use after loop using movnt. */
1852 /* Register class used for passing given 64bit part of the argument.
1853 These represent classes as documented by the PS ABI, with the exception
1854 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1855 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1857 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1858 whenever possible (upper half does contain padding). */
1859 enum x86_64_reg_class
1862 X86_64_INTEGER_CLASS,
1863 X86_64_INTEGERSI_CLASS,
1870 X86_64_COMPLEX_X87_CLASS,
1874 #define MAX_CLASSES 4
1876 /* Table of constants used by fldpi, fldln2, etc.... */
1877 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1878 static bool ext_80387_constants_init = 0;
1881 static struct machine_function * ix86_init_machine_status (void);
1882 static rtx ix86_function_value (const_tree, const_tree, bool);
1883 static rtx ix86_static_chain (const_tree, bool);
1884 static int ix86_function_regparm (const_tree, const_tree);
1885 static void ix86_compute_frame_layout (struct ix86_frame *);
1886 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1888 static void ix86_add_new_builtins (int);
1889 static rtx ix86_expand_vec_perm_builtin (tree);
1891 enum ix86_function_specific_strings
1893 IX86_FUNCTION_SPECIFIC_ARCH,
1894 IX86_FUNCTION_SPECIFIC_TUNE,
1895 IX86_FUNCTION_SPECIFIC_FPMATH,
1896 IX86_FUNCTION_SPECIFIC_MAX
1899 static char *ix86_target_string (int, int, const char *, const char *,
1900 const char *, bool);
1901 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1902 static void ix86_function_specific_save (struct cl_target_option *);
1903 static void ix86_function_specific_restore (struct cl_target_option *);
1904 static void ix86_function_specific_print (FILE *, int,
1905 struct cl_target_option *);
1906 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1907 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1908 static bool ix86_can_inline_p (tree, tree);
1909 static void ix86_set_current_function (tree);
1910 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
1912 static enum calling_abi ix86_function_abi (const_tree);
1915 #ifndef SUBTARGET32_DEFAULT_CPU
1916 #define SUBTARGET32_DEFAULT_CPU "i386"
1919 /* The svr4 ABI for the i386 says that records and unions are returned
1921 #ifndef DEFAULT_PCC_STRUCT_RETURN
1922 #define DEFAULT_PCC_STRUCT_RETURN 1
1925 /* Whether -mtune= or -march= were specified */
1926 static int ix86_tune_defaulted;
1927 static int ix86_arch_specified;
1929 /* Bit flags that specify the ISA we are compiling for. */
1930 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1932 /* A mask of ix86_isa_flags that includes bit X if X
1933 was set or cleared on the command line. */
1934 static int ix86_isa_flags_explicit;
1936 /* Define a set of ISAs which are available when a given ISA is
1937 enabled. MMX and SSE ISAs are handled separately. */
1939 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1940 #define OPTION_MASK_ISA_3DNOW_SET \
1941 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1943 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1944 #define OPTION_MASK_ISA_SSE2_SET \
1945 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1946 #define OPTION_MASK_ISA_SSE3_SET \
1947 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1948 #define OPTION_MASK_ISA_SSSE3_SET \
1949 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1950 #define OPTION_MASK_ISA_SSE4_1_SET \
1951 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1952 #define OPTION_MASK_ISA_SSE4_2_SET \
1953 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1954 #define OPTION_MASK_ISA_AVX_SET \
1955 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1956 #define OPTION_MASK_ISA_FMA_SET \
1957 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1959 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1961 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1963 #define OPTION_MASK_ISA_SSE4A_SET \
1964 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1965 #define OPTION_MASK_ISA_FMA4_SET \
1966 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
1967 | OPTION_MASK_ISA_AVX_SET)
1968 #define OPTION_MASK_ISA_XOP_SET \
1969 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
1970 #define OPTION_MASK_ISA_LWP_SET \
1973 /* AES and PCLMUL need SSE2 because they use xmm registers */
1974 #define OPTION_MASK_ISA_AES_SET \
1975 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1976 #define OPTION_MASK_ISA_PCLMUL_SET \
1977 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1979 #define OPTION_MASK_ISA_ABM_SET \
1980 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1982 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1983 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1984 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1985 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
1986 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
1988 /* Define a set of ISAs which aren't available when a given ISA is
1989 disabled. MMX and SSE ISAs are handled separately. */
1991 #define OPTION_MASK_ISA_MMX_UNSET \
1992 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1993 #define OPTION_MASK_ISA_3DNOW_UNSET \
1994 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1995 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1997 #define OPTION_MASK_ISA_SSE_UNSET \
1998 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1999 #define OPTION_MASK_ISA_SSE2_UNSET \
2000 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2001 #define OPTION_MASK_ISA_SSE3_UNSET \
2002 (OPTION_MASK_ISA_SSE3 \
2003 | OPTION_MASK_ISA_SSSE3_UNSET \
2004 | OPTION_MASK_ISA_SSE4A_UNSET )
2005 #define OPTION_MASK_ISA_SSSE3_UNSET \
2006 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2007 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2008 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2009 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2010 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2011 #define OPTION_MASK_ISA_AVX_UNSET \
2012 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2013 | OPTION_MASK_ISA_FMA4_UNSET)
2014 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2016 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2018 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2020 #define OPTION_MASK_ISA_SSE4A_UNSET \
2021 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2023 #define OPTION_MASK_ISA_FMA4_UNSET \
2024 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2025 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2026 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2028 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2029 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2030 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2031 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2032 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2033 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2034 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2035 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2037 /* Vectorization library interface and handlers. */
2038 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
2039 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2040 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2042 /* Processor target table, indexed by processor number */
2045 const struct processor_costs *cost; /* Processor costs */
2046 const int align_loop; /* Default alignments. */
2047 const int align_loop_max_skip;
2048 const int align_jump;
2049 const int align_jump_max_skip;
2050 const int align_func;
2053 static const struct ptt processor_target_table[PROCESSOR_max] =
2055 {&i386_cost, 4, 3, 4, 3, 4},
2056 {&i486_cost, 16, 15, 16, 15, 16},
2057 {&pentium_cost, 16, 7, 16, 7, 16},
2058 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2059 {&geode_cost, 0, 0, 0, 0, 0},
2060 {&k6_cost, 32, 7, 32, 7, 32},
2061 {&athlon_cost, 16, 7, 16, 7, 16},
2062 {&pentium4_cost, 0, 0, 0, 0, 0},
2063 {&k8_cost, 16, 7, 16, 7, 16},
2064 {&nocona_cost, 0, 0, 0, 0, 0},
2065 {&core2_cost, 16, 10, 16, 10, 16},
2066 {&generic32_cost, 16, 7, 16, 7, 16},
2067 {&generic64_cost, 16, 10, 16, 10, 16},
2068 {&amdfam10_cost, 32, 24, 32, 7, 32},
2069 {&atom_cost, 16, 7, 16, 7, 16}
2072 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2098 /* Implement TARGET_HANDLE_OPTION. */
2101 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2108 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2109 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2113 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2114 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2121 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2122 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2126 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2127 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2137 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2138 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2142 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2143 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2150 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2151 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2155 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2156 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2163 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2164 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2168 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2169 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2176 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2177 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2181 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2182 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2189 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2190 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2194 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2195 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2202 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2203 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2207 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2208 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2215 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2216 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2220 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2221 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2228 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2233 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2234 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2239 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2240 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2244 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2245 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2251 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2252 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2256 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2257 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2264 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2265 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2269 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2270 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2277 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2278 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2282 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2283 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2290 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2291 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2295 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2296 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2303 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2304 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2308 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2309 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2316 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2317 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2321 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2322 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2329 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2330 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2334 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2335 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2342 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2343 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2347 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2348 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2355 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2356 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2360 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2361 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2368 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2369 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2373 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2374 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2381 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2382 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2386 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2387 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2394 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2395 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2399 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2400 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2409 /* Return a string the documents the current -m options. The caller is
2410 responsible for freeing the string. */
2413 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2414 const char *fpmath, bool add_nl_p)
2416 struct ix86_target_opts
2418 const char *option; /* option string */
2419 int mask; /* isa mask options */
2422 /* This table is ordered so that options like -msse4.2 that imply
2423 preceding options while match those first. */
2424 static struct ix86_target_opts isa_opts[] =
2426 { "-m64", OPTION_MASK_ISA_64BIT },
2427 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2428 { "-mxop", OPTION_MASK_ISA_XOP },
2429 { "-mlwp", OPTION_MASK_ISA_LWP },
2430 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2431 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2432 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2433 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2434 { "-msse3", OPTION_MASK_ISA_SSE3 },
2435 { "-msse2", OPTION_MASK_ISA_SSE2 },
2436 { "-msse", OPTION_MASK_ISA_SSE },
2437 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2438 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2439 { "-mmmx", OPTION_MASK_ISA_MMX },
2440 { "-mabm", OPTION_MASK_ISA_ABM },
2441 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2442 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2443 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2444 { "-maes", OPTION_MASK_ISA_AES },
2445 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2449 static struct ix86_target_opts flag_opts[] =
2451 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2452 { "-m80387", MASK_80387 },
2453 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2454 { "-malign-double", MASK_ALIGN_DOUBLE },
2455 { "-mcld", MASK_CLD },
2456 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2457 { "-mieee-fp", MASK_IEEE_FP },
2458 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2459 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2460 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2461 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2462 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2463 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2464 { "-mno-red-zone", MASK_NO_RED_ZONE },
2465 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2466 { "-mrecip", MASK_RECIP },
2467 { "-mrtd", MASK_RTD },
2468 { "-msseregparm", MASK_SSEREGPARM },
2469 { "-mstack-arg-probe", MASK_STACK_PROBE },
2470 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2473 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2476 char target_other[40];
2485 memset (opts, '\0', sizeof (opts));
2487 /* Add -march= option. */
2490 opts[num][0] = "-march=";
2491 opts[num++][1] = arch;
2494 /* Add -mtune= option. */
2497 opts[num][0] = "-mtune=";
2498 opts[num++][1] = tune;
2501 /* Pick out the options in isa options. */
2502 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2504 if ((isa & isa_opts[i].mask) != 0)
2506 opts[num++][0] = isa_opts[i].option;
2507 isa &= ~ isa_opts[i].mask;
2511 if (isa && add_nl_p)
2513 opts[num++][0] = isa_other;
2514 sprintf (isa_other, "(other isa: 0x%x)", isa);
2517 /* Add flag options. */
2518 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2520 if ((flags & flag_opts[i].mask) != 0)
2522 opts[num++][0] = flag_opts[i].option;
2523 flags &= ~ flag_opts[i].mask;
2527 if (flags && add_nl_p)
2529 opts[num++][0] = target_other;
2530 sprintf (target_other, "(other flags: 0x%x)", isa);
2533 /* Add -fpmath= option. */
2536 opts[num][0] = "-mfpmath=";
2537 opts[num++][1] = fpmath;
2544 gcc_assert (num < ARRAY_SIZE (opts));
2546 /* Size the string. */
2548 sep_len = (add_nl_p) ? 3 : 1;
2549 for (i = 0; i < num; i++)
2552 for (j = 0; j < 2; j++)
2554 len += strlen (opts[i][j]);
2557 /* Build the string. */
2558 ret = ptr = (char *) xmalloc (len);
2561 for (i = 0; i < num; i++)
2565 for (j = 0; j < 2; j++)
2566 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2573 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2581 for (j = 0; j < 2; j++)
2584 memcpy (ptr, opts[i][j], len2[j]);
2586 line_len += len2[j];
2591 gcc_assert (ret + len >= ptr);
2596 /* Function that is callable from the debugger to print the current
2599 ix86_debug_options (void)
2601 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2602 ix86_arch_string, ix86_tune_string,
2603 ix86_fpmath_string, true);
2607 fprintf (stderr, "%s\n\n", opts);
2611 fputs ("<no options>\n\n", stderr);
2616 /* Sometimes certain combinations of command options do not make
2617 sense on a particular target machine. You can define a macro
2618 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2619 defined, is executed once just after all the command options have
2622 Don't use this macro to turn on various extra optimizations for
2623 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2626 override_options (bool main_args_p)
2629 unsigned int ix86_arch_mask, ix86_tune_mask;
2630 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2635 /* Comes from final.c -- no real reason to change it. */
2636 #define MAX_CODE_ALIGN 16
2644 PTA_PREFETCH_SSE = 1 << 4,
2646 PTA_3DNOW_A = 1 << 6,
2650 PTA_POPCNT = 1 << 10,
2652 PTA_SSE4A = 1 << 12,
2653 PTA_NO_SAHF = 1 << 13,
2654 PTA_SSE4_1 = 1 << 14,
2655 PTA_SSE4_2 = 1 << 15,
2657 PTA_PCLMUL = 1 << 17,
2660 PTA_MOVBE = 1 << 20,
2668 const char *const name; /* processor name or nickname. */
2669 const enum processor_type processor;
2670 const enum attr_cpu schedule;
2671 const unsigned /*enum pta_flags*/ flags;
2673 const processor_alias_table[] =
2675 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2676 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2677 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2678 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2679 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2680 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2681 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2682 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2683 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2684 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2685 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2686 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2687 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2689 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2691 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2692 PTA_MMX | PTA_SSE | PTA_SSE2},
2693 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2694 PTA_MMX |PTA_SSE | PTA_SSE2},
2695 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2696 PTA_MMX | PTA_SSE | PTA_SSE2},
2697 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2698 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2699 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2700 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2701 | PTA_CX16 | PTA_NO_SAHF},
2702 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2703 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2704 | PTA_SSSE3 | PTA_CX16},
2705 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2706 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2707 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2708 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2709 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2710 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2711 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2712 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2713 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2714 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2715 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2716 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2717 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2718 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2719 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2720 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2721 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2722 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2723 {"x86-64", PROCESSOR_K8, CPU_K8,
2724 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2725 {"k8", PROCESSOR_K8, CPU_K8,
2726 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2727 | PTA_SSE2 | PTA_NO_SAHF},
2728 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2729 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2730 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2731 {"opteron", PROCESSOR_K8, CPU_K8,
2732 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2733 | PTA_SSE2 | PTA_NO_SAHF},
2734 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2735 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2736 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2737 {"athlon64", PROCESSOR_K8, CPU_K8,
2738 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2739 | PTA_SSE2 | PTA_NO_SAHF},
2740 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2741 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2742 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2743 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2744 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2745 | PTA_SSE2 | PTA_NO_SAHF},
2746 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2747 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2748 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2749 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2750 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2751 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2752 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2753 0 /* flags are only used for -march switch. */ },
2754 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2755 PTA_64BIT /* flags are only used for -march switch. */ },
2758 int const pta_size = ARRAY_SIZE (processor_alias_table);
2760 /* Set up prefix/suffix so the error messages refer to either the command
2761 line argument, or the attribute(target). */
2770 prefix = "option(\"";
2775 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2776 SUBTARGET_OVERRIDE_OPTIONS;
2779 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2780 SUBSUBTARGET_OVERRIDE_OPTIONS;
2783 /* -fPIC is the default for x86_64. */
2784 if (TARGET_MACHO && TARGET_64BIT)
2787 /* Set the default values for switches whose default depends on TARGET_64BIT
2788 in case they weren't overwritten by command line options. */
2791 /* Mach-O doesn't support omitting the frame pointer for now. */
2792 if (flag_omit_frame_pointer == 2)
2793 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2794 if (flag_asynchronous_unwind_tables == 2)
2795 flag_asynchronous_unwind_tables = 1;
2796 if (flag_pcc_struct_return == 2)
2797 flag_pcc_struct_return = 0;
2801 if (flag_omit_frame_pointer == 2)
2802 flag_omit_frame_pointer = 0;
2803 if (flag_asynchronous_unwind_tables == 2)
2804 flag_asynchronous_unwind_tables = 0;
2805 if (flag_pcc_struct_return == 2)
2806 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2809 /* Need to check -mtune=generic first. */
2810 if (ix86_tune_string)
2812 if (!strcmp (ix86_tune_string, "generic")
2813 || !strcmp (ix86_tune_string, "i686")
2814 /* As special support for cross compilers we read -mtune=native
2815 as -mtune=generic. With native compilers we won't see the
2816 -mtune=native, as it was changed by the driver. */
2817 || !strcmp (ix86_tune_string, "native"))
2820 ix86_tune_string = "generic64";
2822 ix86_tune_string = "generic32";
2824 /* If this call is for setting the option attribute, allow the
2825 generic32/generic64 that was previously set. */
2826 else if (!main_args_p
2827 && (!strcmp (ix86_tune_string, "generic32")
2828 || !strcmp (ix86_tune_string, "generic64")))
2830 else if (!strncmp (ix86_tune_string, "generic", 7))
2831 error ("bad value (%s) for %stune=%s %s",
2832 ix86_tune_string, prefix, suffix, sw);
2833 else if (!strcmp (ix86_tune_string, "x86-64"))
2834 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2835 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2836 prefix, suffix, prefix, suffix, prefix, suffix);
2840 if (ix86_arch_string)
2841 ix86_tune_string = ix86_arch_string;
2842 if (!ix86_tune_string)
2844 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2845 ix86_tune_defaulted = 1;
2848 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2849 need to use a sensible tune option. */
2850 if (!strcmp (ix86_tune_string, "generic")
2851 || !strcmp (ix86_tune_string, "x86-64")
2852 || !strcmp (ix86_tune_string, "i686"))
2855 ix86_tune_string = "generic64";
2857 ix86_tune_string = "generic32";
2861 if (ix86_stringop_string)
2863 if (!strcmp (ix86_stringop_string, "rep_byte"))
2864 stringop_alg = rep_prefix_1_byte;
2865 else if (!strcmp (ix86_stringop_string, "libcall"))
2866 stringop_alg = libcall;
2867 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2868 stringop_alg = rep_prefix_4_byte;
2869 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2871 /* rep; movq isn't available in 32-bit code. */
2872 stringop_alg = rep_prefix_8_byte;
2873 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2874 stringop_alg = loop_1_byte;
2875 else if (!strcmp (ix86_stringop_string, "loop"))
2876 stringop_alg = loop;
2877 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2878 stringop_alg = unrolled_loop;
2880 error ("bad value (%s) for %sstringop-strategy=%s %s",
2881 ix86_stringop_string, prefix, suffix, sw);
2884 if (!ix86_arch_string)
2885 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
2887 ix86_arch_specified = 1;
2889 /* Validate -mabi= value. */
2890 if (ix86_abi_string)
2892 if (strcmp (ix86_abi_string, "sysv") == 0)
2893 ix86_abi = SYSV_ABI;
2894 else if (strcmp (ix86_abi_string, "ms") == 0)
2897 error ("unknown ABI (%s) for %sabi=%s %s",
2898 ix86_abi_string, prefix, suffix, sw);
2901 ix86_abi = DEFAULT_ABI;
2903 if (ix86_cmodel_string != 0)
2905 if (!strcmp (ix86_cmodel_string, "small"))
2906 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2907 else if (!strcmp (ix86_cmodel_string, "medium"))
2908 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2909 else if (!strcmp (ix86_cmodel_string, "large"))
2910 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2912 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2913 else if (!strcmp (ix86_cmodel_string, "32"))
2914 ix86_cmodel = CM_32;
2915 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2916 ix86_cmodel = CM_KERNEL;
2918 error ("bad value (%s) for %scmodel=%s %s",
2919 ix86_cmodel_string, prefix, suffix, sw);
2923 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2924 use of rip-relative addressing. This eliminates fixups that
2925 would otherwise be needed if this object is to be placed in a
2926 DLL, and is essentially just as efficient as direct addressing. */
2927 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2928 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2929 else if (TARGET_64BIT)
2930 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2932 ix86_cmodel = CM_32;
2934 if (ix86_asm_string != 0)
2937 && !strcmp (ix86_asm_string, "intel"))
2938 ix86_asm_dialect = ASM_INTEL;
2939 else if (!strcmp (ix86_asm_string, "att"))
2940 ix86_asm_dialect = ASM_ATT;
2942 error ("bad value (%s) for %sasm=%s %s",
2943 ix86_asm_string, prefix, suffix, sw);
2945 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2946 error ("code model %qs not supported in the %s bit mode",
2947 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2948 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2949 sorry ("%i-bit mode not compiled in",
2950 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2952 for (i = 0; i < pta_size; i++)
2953 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2955 ix86_schedule = processor_alias_table[i].schedule;
2956 ix86_arch = processor_alias_table[i].processor;
2957 /* Default cpu tuning to the architecture. */
2958 ix86_tune = ix86_arch;
2960 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2961 error ("CPU you selected does not support x86-64 "
2964 if (processor_alias_table[i].flags & PTA_MMX
2965 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2966 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2967 if (processor_alias_table[i].flags & PTA_3DNOW
2968 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2969 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2970 if (processor_alias_table[i].flags & PTA_3DNOW_A
2971 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2972 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2973 if (processor_alias_table[i].flags & PTA_SSE
2974 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2975 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2976 if (processor_alias_table[i].flags & PTA_SSE2
2977 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2978 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2979 if (processor_alias_table[i].flags & PTA_SSE3
2980 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2981 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2982 if (processor_alias_table[i].flags & PTA_SSSE3
2983 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2984 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2985 if (processor_alias_table[i].flags & PTA_SSE4_1
2986 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2987 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2988 if (processor_alias_table[i].flags & PTA_SSE4_2
2989 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2990 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2991 if (processor_alias_table[i].flags & PTA_AVX
2992 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2993 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2994 if (processor_alias_table[i].flags & PTA_FMA
2995 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2996 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2997 if (processor_alias_table[i].flags & PTA_SSE4A
2998 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2999 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3000 if (processor_alias_table[i].flags & PTA_FMA4
3001 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3002 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3003 if (processor_alias_table[i].flags & PTA_XOP
3004 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3005 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3006 if (processor_alias_table[i].flags & PTA_LWP
3007 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3008 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3009 if (processor_alias_table[i].flags & PTA_ABM
3010 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3011 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3012 if (processor_alias_table[i].flags & PTA_CX16
3013 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3014 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3015 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3016 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3017 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3018 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3019 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3020 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3021 if (processor_alias_table[i].flags & PTA_MOVBE
3022 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3023 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3024 if (processor_alias_table[i].flags & PTA_AES
3025 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3026 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3027 if (processor_alias_table[i].flags & PTA_PCLMUL
3028 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3029 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3030 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3031 x86_prefetch_sse = true;
3036 if (!strcmp (ix86_arch_string, "generic"))
3037 error ("generic CPU can be used only for %stune=%s %s",
3038 prefix, suffix, sw);
3039 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3040 error ("bad value (%s) for %sarch=%s %s",
3041 ix86_arch_string, prefix, suffix, sw);
3043 ix86_arch_mask = 1u << ix86_arch;
3044 for (i = 0; i < X86_ARCH_LAST; ++i)
3045 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3047 for (i = 0; i < pta_size; i++)
3048 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3050 ix86_schedule = processor_alias_table[i].schedule;
3051 ix86_tune = processor_alias_table[i].processor;
3052 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3054 if (ix86_tune_defaulted)
3056 ix86_tune_string = "x86-64";
3057 for (i = 0; i < pta_size; i++)
3058 if (! strcmp (ix86_tune_string,
3059 processor_alias_table[i].name))
3061 ix86_schedule = processor_alias_table[i].schedule;
3062 ix86_tune = processor_alias_table[i].processor;
3065 error ("CPU you selected does not support x86-64 "
3068 /* Intel CPUs have always interpreted SSE prefetch instructions as
3069 NOPs; so, we can enable SSE prefetch instructions even when
3070 -mtune (rather than -march) points us to a processor that has them.
3071 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3072 higher processors. */
3074 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3075 x86_prefetch_sse = true;
3079 if (ix86_tune_specified && i == pta_size)
3080 error ("bad value (%s) for %stune=%s %s",
3081 ix86_tune_string, prefix, suffix, sw);
3083 ix86_tune_mask = 1u << ix86_tune;
3084 for (i = 0; i < X86_TUNE_LAST; ++i)
3085 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3088 ix86_cost = &ix86_size_cost;
3090 ix86_cost = processor_target_table[ix86_tune].cost;
3092 /* Arrange to set up i386_stack_locals for all functions. */
3093 init_machine_status = ix86_init_machine_status;
3095 /* Validate -mregparm= value. */
3096 if (ix86_regparm_string)
3099 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3100 i = atoi (ix86_regparm_string);
3101 if (i < 0 || i > REGPARM_MAX)
3102 error ("%sregparm=%d%s is not between 0 and %d",
3103 prefix, i, suffix, REGPARM_MAX);
3108 ix86_regparm = REGPARM_MAX;
3110 /* If the user has provided any of the -malign-* options,
3111 warn and use that value only if -falign-* is not set.
3112 Remove this code in GCC 3.2 or later. */
3113 if (ix86_align_loops_string)
3115 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3116 prefix, suffix, suffix);
3117 if (align_loops == 0)
3119 i = atoi (ix86_align_loops_string);
3120 if (i < 0 || i > MAX_CODE_ALIGN)
3121 error ("%salign-loops=%d%s is not between 0 and %d",
3122 prefix, i, suffix, MAX_CODE_ALIGN);
3124 align_loops = 1 << i;
3128 if (ix86_align_jumps_string)
3130 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3131 prefix, suffix, suffix);
3132 if (align_jumps == 0)
3134 i = atoi (ix86_align_jumps_string);
3135 if (i < 0 || i > MAX_CODE_ALIGN)
3136 error ("%salign-loops=%d%s is not between 0 and %d",
3137 prefix, i, suffix, MAX_CODE_ALIGN);
3139 align_jumps = 1 << i;
3143 if (ix86_align_funcs_string)
3145 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3146 prefix, suffix, suffix);
3147 if (align_functions == 0)
3149 i = atoi (ix86_align_funcs_string);
3150 if (i < 0 || i > MAX_CODE_ALIGN)
3151 error ("%salign-loops=%d%s is not between 0 and %d",
3152 prefix, i, suffix, MAX_CODE_ALIGN);
3154 align_functions = 1 << i;
3158 /* Default align_* from the processor table. */
3159 if (align_loops == 0)
3161 align_loops = processor_target_table[ix86_tune].align_loop;
3162 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3164 if (align_jumps == 0)
3166 align_jumps = processor_target_table[ix86_tune].align_jump;
3167 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3169 if (align_functions == 0)
3171 align_functions = processor_target_table[ix86_tune].align_func;
3174 /* Validate -mbranch-cost= value, or provide default. */
3175 ix86_branch_cost = ix86_cost->branch_cost;
3176 if (ix86_branch_cost_string)
3178 i = atoi (ix86_branch_cost_string);
3180 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3182 ix86_branch_cost = i;
3184 if (ix86_section_threshold_string)
3186 i = atoi (ix86_section_threshold_string);
3188 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3190 ix86_section_threshold = i;
3193 if (ix86_tls_dialect_string)
3195 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3196 ix86_tls_dialect = TLS_DIALECT_GNU;
3197 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3198 ix86_tls_dialect = TLS_DIALECT_GNU2;
3199 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3200 ix86_tls_dialect = TLS_DIALECT_SUN;
3202 error ("bad value (%s) for %stls-dialect=%s %s",
3203 ix86_tls_dialect_string, prefix, suffix, sw);
3206 if (ix87_precision_string)
3208 i = atoi (ix87_precision_string);
3209 if (i != 32 && i != 64 && i != 80)
3210 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3215 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3217 /* Enable by default the SSE and MMX builtins. Do allow the user to
3218 explicitly disable any of these. In particular, disabling SSE and
3219 MMX for kernel code is extremely useful. */
3220 if (!ix86_arch_specified)
3222 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3223 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3226 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3230 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3232 if (!ix86_arch_specified)
3234 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3236 /* i386 ABI does not specify red zone. It still makes sense to use it
3237 when programmer takes care to stack from being destroyed. */
3238 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3239 target_flags |= MASK_NO_RED_ZONE;
3242 /* Keep nonleaf frame pointers. */
3243 if (flag_omit_frame_pointer)
3244 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3245 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3246 flag_omit_frame_pointer = 1;
3248 /* If we're doing fast math, we don't care about comparison order
3249 wrt NaNs. This lets us use a shorter comparison sequence. */
3250 if (flag_finite_math_only)
3251 target_flags &= ~MASK_IEEE_FP;
3253 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3254 since the insns won't need emulation. */
3255 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3256 target_flags &= ~MASK_NO_FANCY_MATH_387;
3258 /* Likewise, if the target doesn't have a 387, or we've specified
3259 software floating point, don't use 387 inline intrinsics. */
3261 target_flags |= MASK_NO_FANCY_MATH_387;
3263 /* Turn on MMX builtins for -msse. */
3266 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3267 x86_prefetch_sse = true;
3270 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3271 if (TARGET_SSE4_2 || TARGET_ABM)
3272 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3274 /* Validate -mpreferred-stack-boundary= value or default it to
3275 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3276 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3277 if (ix86_preferred_stack_boundary_string)
3279 i = atoi (ix86_preferred_stack_boundary_string);
3280 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3281 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3282 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3284 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3287 /* Set the default value for -mstackrealign. */
3288 if (ix86_force_align_arg_pointer == -1)
3289 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3291 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3293 /* Validate -mincoming-stack-boundary= value or default it to
3294 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3295 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3296 if (ix86_incoming_stack_boundary_string)
3298 i = atoi (ix86_incoming_stack_boundary_string);
3299 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3300 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3301 i, TARGET_64BIT ? 4 : 2);
3304 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3305 ix86_incoming_stack_boundary
3306 = ix86_user_incoming_stack_boundary;
3310 /* Accept -msseregparm only if at least SSE support is enabled. */
3311 if (TARGET_SSEREGPARM
3313 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3315 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3316 if (ix86_fpmath_string != 0)
3318 if (! strcmp (ix86_fpmath_string, "387"))
3319 ix86_fpmath = FPMATH_387;
3320 else if (! strcmp (ix86_fpmath_string, "sse"))
3324 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3325 ix86_fpmath = FPMATH_387;
3328 ix86_fpmath = FPMATH_SSE;
3330 else if (! strcmp (ix86_fpmath_string, "387,sse")
3331 || ! strcmp (ix86_fpmath_string, "387+sse")
3332 || ! strcmp (ix86_fpmath_string, "sse,387")
3333 || ! strcmp (ix86_fpmath_string, "sse+387")
3334 || ! strcmp (ix86_fpmath_string, "both"))
3338 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3339 ix86_fpmath = FPMATH_387;
3341 else if (!TARGET_80387)
3343 warning (0, "387 instruction set disabled, using SSE arithmetics");
3344 ix86_fpmath = FPMATH_SSE;
3347 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3350 error ("bad value (%s) for %sfpmath=%s %s",
3351 ix86_fpmath_string, prefix, suffix, sw);
3354 /* If the i387 is disabled, then do not return values in it. */
3356 target_flags &= ~MASK_FLOAT_RETURNS;
3358 /* Use external vectorized library in vectorizing intrinsics. */
3359 if (ix86_veclibabi_string)
3361 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3362 ix86_veclib_handler = ix86_veclibabi_svml;
3363 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3364 ix86_veclib_handler = ix86_veclibabi_acml;
3366 error ("unknown vectorization library ABI type (%s) for "
3367 "%sveclibabi=%s %s", ix86_veclibabi_string,
3368 prefix, suffix, sw);
3371 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3372 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3374 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3376 /* ??? Unwind info is not correct around the CFG unless either a frame
3377 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3378 unwind info generation to be aware of the CFG and propagating states
3380 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3381 || flag_exceptions || flag_non_call_exceptions)
3382 && flag_omit_frame_pointer
3383 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3385 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3386 warning (0, "unwind tables currently require either a frame pointer "
3387 "or %saccumulate-outgoing-args%s for correctness",
3389 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3392 /* If stack probes are required, the space used for large function
3393 arguments on the stack must also be probed, so enable
3394 -maccumulate-outgoing-args so this happens in the prologue. */
3395 if (TARGET_STACK_PROBE
3396 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3398 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3399 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3400 "for correctness", prefix, suffix);
3401 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3404 /* For sane SSE instruction set generation we need fcomi instruction.
3405 It is safe to enable all CMOVE instructions. */
3409 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3412 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3413 p = strchr (internal_label_prefix, 'X');
3414 internal_label_prefix_len = p - internal_label_prefix;
3418 /* When scheduling description is not available, disable scheduler pass
3419 so it won't slow down the compilation and make x87 code slower. */
3420 if (!TARGET_SCHEDULE)
3421 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3423 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3424 set_param_value ("simultaneous-prefetches",
3425 ix86_cost->simultaneous_prefetches);
3426 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3427 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3428 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3429 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3430 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3431 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3433 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3434 can be optimized to ap = __builtin_next_arg (0). */
3436 targetm.expand_builtin_va_start = NULL;
3440 ix86_gen_leave = gen_leave_rex64;
3441 ix86_gen_pop1 = gen_popdi1;
3442 ix86_gen_add3 = gen_adddi3;
3443 ix86_gen_sub3 = gen_subdi3;
3444 ix86_gen_sub3_carry = gen_subdi3_carry;
3445 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3446 ix86_gen_monitor = gen_sse3_monitor64;
3447 ix86_gen_andsp = gen_anddi3;
3451 ix86_gen_leave = gen_leave;
3452 ix86_gen_pop1 = gen_popsi1;
3453 ix86_gen_add3 = gen_addsi3;
3454 ix86_gen_sub3 = gen_subsi3;
3455 ix86_gen_sub3_carry = gen_subsi3_carry;
3456 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3457 ix86_gen_monitor = gen_sse3_monitor;
3458 ix86_gen_andsp = gen_andsi3;
3462 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3464 target_flags |= MASK_CLD & ~target_flags_explicit;
3467 /* Save the initial options in case the user does function specific options */
3469 target_option_default_node = target_option_current_node
3470 = build_target_option_node ();
3473 /* Update register usage after having seen the compiler flags. */
3476 ix86_conditional_register_usage (void)
3481 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3483 if (fixed_regs[i] > 1)
3484 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3485 if (call_used_regs[i] > 1)
3486 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3489 /* The PIC register, if it exists, is fixed. */
3490 j = PIC_OFFSET_TABLE_REGNUM;
3491 if (j != INVALID_REGNUM)
3492 fixed_regs[j] = call_used_regs[j] = 1;
3494 /* The MS_ABI changes the set of call-used registers. */
3495 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3497 call_used_regs[SI_REG] = 0;
3498 call_used_regs[DI_REG] = 0;
3499 call_used_regs[XMM6_REG] = 0;
3500 call_used_regs[XMM7_REG] = 0;
3501 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3502 call_used_regs[i] = 0;
3505 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3506 other call-clobbered regs for 64-bit. */
3509 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3511 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3512 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3513 && call_used_regs[i])
3514 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3517 /* If MMX is disabled, squash the registers. */
3519 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3520 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3521 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3523 /* If SSE is disabled, squash the registers. */
3525 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3526 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3527 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3529 /* If the FPU is disabled, squash the registers. */
3530 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3531 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3532 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3533 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3535 /* If 32-bit, squash the 64-bit registers. */
3538 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3540 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3546 /* Save the current options */
3549 ix86_function_specific_save (struct cl_target_option *ptr)
3551 ptr->arch = ix86_arch;
3552 ptr->schedule = ix86_schedule;
3553 ptr->tune = ix86_tune;
3554 ptr->fpmath = ix86_fpmath;
3555 ptr->branch_cost = ix86_branch_cost;
3556 ptr->tune_defaulted = ix86_tune_defaulted;
3557 ptr->arch_specified = ix86_arch_specified;
3558 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3559 ptr->target_flags_explicit = target_flags_explicit;
3561 /* The fields are char but the variables are not; make sure the
3562 values fit in the fields. */
3563 gcc_assert (ptr->arch == ix86_arch);
3564 gcc_assert (ptr->schedule == ix86_schedule);
3565 gcc_assert (ptr->tune == ix86_tune);
3566 gcc_assert (ptr->fpmath == ix86_fpmath);
3567 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3570 /* Restore the current options */
3573 ix86_function_specific_restore (struct cl_target_option *ptr)
3575 enum processor_type old_tune = ix86_tune;
3576 enum processor_type old_arch = ix86_arch;
3577 unsigned int ix86_arch_mask, ix86_tune_mask;
3580 ix86_arch = (enum processor_type) ptr->arch;
3581 ix86_schedule = (enum attr_cpu) ptr->schedule;
3582 ix86_tune = (enum processor_type) ptr->tune;
3583 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3584 ix86_branch_cost = ptr->branch_cost;
3585 ix86_tune_defaulted = ptr->tune_defaulted;
3586 ix86_arch_specified = ptr->arch_specified;
3587 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3588 target_flags_explicit = ptr->target_flags_explicit;
3590 /* Recreate the arch feature tests if the arch changed */
3591 if (old_arch != ix86_arch)
3593 ix86_arch_mask = 1u << ix86_arch;
3594 for (i = 0; i < X86_ARCH_LAST; ++i)
3595 ix86_arch_features[i]
3596 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3599 /* Recreate the tune optimization tests */
3600 if (old_tune != ix86_tune)
3602 ix86_tune_mask = 1u << ix86_tune;
3603 for (i = 0; i < X86_TUNE_LAST; ++i)
3604 ix86_tune_features[i]
3605 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3609 /* Print the current options */
3612 ix86_function_specific_print (FILE *file, int indent,
3613 struct cl_target_option *ptr)
3616 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3617 NULL, NULL, NULL, false);
3619 fprintf (file, "%*sarch = %d (%s)\n",
3622 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3623 ? cpu_names[ptr->arch]
3626 fprintf (file, "%*stune = %d (%s)\n",
3629 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3630 ? cpu_names[ptr->tune]
3633 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3634 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3635 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3636 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3640 fprintf (file, "%*s%s\n", indent, "", target_string);
3641 free (target_string);
3646 /* Inner function to process the attribute((target(...))), take an argument and
3647 set the current options from the argument. If we have a list, recursively go
3651 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3656 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3657 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3658 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3659 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3674 enum ix86_opt_type type;
3679 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3680 IX86_ATTR_ISA ("abm", OPT_mabm),
3681 IX86_ATTR_ISA ("aes", OPT_maes),
3682 IX86_ATTR_ISA ("avx", OPT_mavx),
3683 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3684 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3685 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3686 IX86_ATTR_ISA ("sse", OPT_msse),
3687 IX86_ATTR_ISA ("sse2", OPT_msse2),
3688 IX86_ATTR_ISA ("sse3", OPT_msse3),
3689 IX86_ATTR_ISA ("sse4", OPT_msse4),
3690 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3691 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3692 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3693 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3694 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3695 IX86_ATTR_ISA ("xop", OPT_mxop),
3696 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3698 /* string options */
3699 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3700 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3701 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3704 IX86_ATTR_YES ("cld",
3708 IX86_ATTR_NO ("fancy-math-387",
3709 OPT_mfancy_math_387,
3710 MASK_NO_FANCY_MATH_387),
3712 IX86_ATTR_YES ("ieee-fp",
3716 IX86_ATTR_YES ("inline-all-stringops",
3717 OPT_minline_all_stringops,
3718 MASK_INLINE_ALL_STRINGOPS),
3720 IX86_ATTR_YES ("inline-stringops-dynamically",
3721 OPT_minline_stringops_dynamically,
3722 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3724 IX86_ATTR_NO ("align-stringops",
3725 OPT_mno_align_stringops,
3726 MASK_NO_ALIGN_STRINGOPS),
3728 IX86_ATTR_YES ("recip",
3734 /* If this is a list, recurse to get the options. */
3735 if (TREE_CODE (args) == TREE_LIST)
3739 for (; args; args = TREE_CHAIN (args))
3740 if (TREE_VALUE (args)
3741 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3747 else if (TREE_CODE (args) != STRING_CST)
3750 /* Handle multiple arguments separated by commas. */
3751 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3753 while (next_optstr && *next_optstr != '\0')
3755 char *p = next_optstr;
3757 char *comma = strchr (next_optstr, ',');
3758 const char *opt_string;
3759 size_t len, opt_len;
3764 enum ix86_opt_type type = ix86_opt_unknown;
3770 len = comma - next_optstr;
3771 next_optstr = comma + 1;
3779 /* Recognize no-xxx. */
3780 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3789 /* Find the option. */
3792 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3794 type = attrs[i].type;
3795 opt_len = attrs[i].len;
3796 if (ch == attrs[i].string[0]
3797 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3798 && memcmp (p, attrs[i].string, opt_len) == 0)
3801 mask = attrs[i].mask;
3802 opt_string = attrs[i].string;
3807 /* Process the option. */
3810 error ("attribute(target(\"%s\")) is unknown", orig_p);
3814 else if (type == ix86_opt_isa)
3815 ix86_handle_option (opt, p, opt_set_p);
3817 else if (type == ix86_opt_yes || type == ix86_opt_no)
3819 if (type == ix86_opt_no)
3820 opt_set_p = !opt_set_p;
3823 target_flags |= mask;
3825 target_flags &= ~mask;
3828 else if (type == ix86_opt_str)
3832 error ("option(\"%s\") was already specified", opt_string);
3836 p_strings[opt] = xstrdup (p + opt_len);
3846 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3849 ix86_valid_target_attribute_tree (tree args)
3851 const char *orig_arch_string = ix86_arch_string;
3852 const char *orig_tune_string = ix86_tune_string;
3853 const char *orig_fpmath_string = ix86_fpmath_string;
3854 int orig_tune_defaulted = ix86_tune_defaulted;
3855 int orig_arch_specified = ix86_arch_specified;
3856 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3859 struct cl_target_option *def
3860 = TREE_TARGET_OPTION (target_option_default_node);
3862 /* Process each of the options on the chain. */
3863 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3866 /* If the changed options are different from the default, rerun override_options,
3867 and then save the options away. The string options are are attribute options,
3868 and will be undone when we copy the save structure. */
3869 if (ix86_isa_flags != def->ix86_isa_flags
3870 || target_flags != def->target_flags
3871 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3872 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3873 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3875 /* If we are using the default tune= or arch=, undo the string assigned,
3876 and use the default. */
3877 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3878 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3879 else if (!orig_arch_specified)
3880 ix86_arch_string = NULL;
3882 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3883 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3884 else if (orig_tune_defaulted)
3885 ix86_tune_string = NULL;
3887 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3888 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3889 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3890 else if (!TARGET_64BIT && TARGET_SSE)
3891 ix86_fpmath_string = "sse,387";
3893 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3894 override_options (false);
3896 /* Add any builtin functions with the new isa if any. */
3897 ix86_add_new_builtins (ix86_isa_flags);
3899 /* Save the current options unless we are validating options for
3901 t = build_target_option_node ();
3903 ix86_arch_string = orig_arch_string;
3904 ix86_tune_string = orig_tune_string;
3905 ix86_fpmath_string = orig_fpmath_string;
3907 /* Free up memory allocated to hold the strings */
3908 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3909 if (option_strings[i])
3910 free (option_strings[i]);
3916 /* Hook to validate attribute((target("string"))). */
3919 ix86_valid_target_attribute_p (tree fndecl,
3920 tree ARG_UNUSED (name),
3922 int ARG_UNUSED (flags))
3924 struct cl_target_option cur_target;
3926 tree old_optimize = build_optimization_node ();
3927 tree new_target, new_optimize;
3928 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3930 /* If the function changed the optimization levels as well as setting target
3931 options, start with the optimizations specified. */
3932 if (func_optimize && func_optimize != old_optimize)
3933 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3935 /* The target attributes may also change some optimization flags, so update
3936 the optimization options if necessary. */
3937 cl_target_option_save (&cur_target);
3938 new_target = ix86_valid_target_attribute_tree (args);
3939 new_optimize = build_optimization_node ();
3946 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3948 if (old_optimize != new_optimize)
3949 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3952 cl_target_option_restore (&cur_target);
3954 if (old_optimize != new_optimize)
3955 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3961 /* Hook to determine if one function can safely inline another. */
3964 ix86_can_inline_p (tree caller, tree callee)
3967 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3968 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3970 /* If callee has no option attributes, then it is ok to inline. */
3974 /* If caller has no option attributes, but callee does then it is not ok to
3976 else if (!caller_tree)
3981 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3982 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3984 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
3985 can inline a SSE2 function but a SSE2 function can't inline a SSE4
3987 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3988 != callee_opts->ix86_isa_flags)
3991 /* See if we have the same non-isa options. */
3992 else if (caller_opts->target_flags != callee_opts->target_flags)
3995 /* See if arch, tune, etc. are the same. */
3996 else if (caller_opts->arch != callee_opts->arch)
3999 else if (caller_opts->tune != callee_opts->tune)
4002 else if (caller_opts->fpmath != callee_opts->fpmath)
4005 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4016 /* Remember the last target of ix86_set_current_function. */
4017 static GTY(()) tree ix86_previous_fndecl;
4019 /* Establish appropriate back-end context for processing the function
4020 FNDECL. The argument might be NULL to indicate processing at top
4021 level, outside of any function scope. */
4023 ix86_set_current_function (tree fndecl)
4025 /* Only change the context if the function changes. This hook is called
4026 several times in the course of compiling a function, and we don't want to
4027 slow things down too much or call target_reinit when it isn't safe. */
4028 if (fndecl && fndecl != ix86_previous_fndecl)
4030 tree old_tree = (ix86_previous_fndecl
4031 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4034 tree new_tree = (fndecl
4035 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4038 ix86_previous_fndecl = fndecl;
4039 if (old_tree == new_tree)
4044 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4050 struct cl_target_option *def
4051 = TREE_TARGET_OPTION (target_option_current_node);
4053 cl_target_option_restore (def);
4060 /* Return true if this goes in large data/bss. */
4063 ix86_in_large_data_p (tree exp)
4065 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4068 /* Functions are never large data. */
4069 if (TREE_CODE (exp) == FUNCTION_DECL)
4072 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4074 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4075 if (strcmp (section, ".ldata") == 0
4076 || strcmp (section, ".lbss") == 0)
4082 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4084 /* If this is an incomplete type with size 0, then we can't put it
4085 in data because it might be too big when completed. */
4086 if (!size || size > ix86_section_threshold)
4093 /* Switch to the appropriate section for output of DECL.
4094 DECL is either a `VAR_DECL' node or a constant of some sort.
4095 RELOC indicates whether forming the initial value of DECL requires
4096 link-time relocations. */
4098 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4102 x86_64_elf_select_section (tree decl, int reloc,
4103 unsigned HOST_WIDE_INT align)
4105 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4106 && ix86_in_large_data_p (decl))
4108 const char *sname = NULL;
4109 unsigned int flags = SECTION_WRITE;
4110 switch (categorize_decl_for_section (decl, reloc))
4115 case SECCAT_DATA_REL:
4116 sname = ".ldata.rel";
4118 case SECCAT_DATA_REL_LOCAL:
4119 sname = ".ldata.rel.local";
4121 case SECCAT_DATA_REL_RO:
4122 sname = ".ldata.rel.ro";
4124 case SECCAT_DATA_REL_RO_LOCAL:
4125 sname = ".ldata.rel.ro.local";
4129 flags |= SECTION_BSS;
4132 case SECCAT_RODATA_MERGE_STR:
4133 case SECCAT_RODATA_MERGE_STR_INIT:
4134 case SECCAT_RODATA_MERGE_CONST:
4138 case SECCAT_SRODATA:
4145 /* We don't split these for medium model. Place them into
4146 default sections and hope for best. */
4148 case SECCAT_EMUTLS_VAR:
4149 case SECCAT_EMUTLS_TMPL:
4154 /* We might get called with string constants, but get_named_section
4155 doesn't like them as they are not DECLs. Also, we need to set
4156 flags in that case. */
4158 return get_section (sname, flags, NULL);
4159 return get_named_section (decl, sname, reloc);
4162 return default_elf_select_section (decl, reloc, align);
4165 /* Build up a unique section name, expressed as a
4166 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4167 RELOC indicates whether the initial value of EXP requires
4168 link-time relocations. */
4170 static void ATTRIBUTE_UNUSED
4171 x86_64_elf_unique_section (tree decl, int reloc)
4173 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4174 && ix86_in_large_data_p (decl))
4176 const char *prefix = NULL;
4177 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4178 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4180 switch (categorize_decl_for_section (decl, reloc))
4183 case SECCAT_DATA_REL:
4184 case SECCAT_DATA_REL_LOCAL:
4185 case SECCAT_DATA_REL_RO:
4186 case SECCAT_DATA_REL_RO_LOCAL:
4187 prefix = one_only ? ".ld" : ".ldata";
4190 prefix = one_only ? ".lb" : ".lbss";
4193 case SECCAT_RODATA_MERGE_STR:
4194 case SECCAT_RODATA_MERGE_STR_INIT:
4195 case SECCAT_RODATA_MERGE_CONST:
4196 prefix = one_only ? ".lr" : ".lrodata";
4198 case SECCAT_SRODATA:
4205 /* We don't split these for medium model. Place them into
4206 default sections and hope for best. */
4208 case SECCAT_EMUTLS_VAR:
4209 prefix = targetm.emutls.var_section;
4211 case SECCAT_EMUTLS_TMPL:
4212 prefix = targetm.emutls.tmpl_section;
4217 const char *name, *linkonce;
4220 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4221 name = targetm.strip_name_encoding (name);
4223 /* If we're using one_only, then there needs to be a .gnu.linkonce
4224 prefix to the section name. */
4225 linkonce = one_only ? ".gnu.linkonce" : "";
4227 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4229 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4233 default_unique_section (decl, reloc);
4236 #ifdef COMMON_ASM_OP
4237 /* This says how to output assembler code to declare an
4238 uninitialized external linkage data object.
4240 For medium model x86-64 we need to use .largecomm opcode for
4243 x86_elf_aligned_common (FILE *file,
4244 const char *name, unsigned HOST_WIDE_INT size,
4247 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4248 && size > (unsigned int)ix86_section_threshold)
4249 fputs (".largecomm\t", file);
4251 fputs (COMMON_ASM_OP, file);
4252 assemble_name (file, name);
4253 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4254 size, align / BITS_PER_UNIT);
4258 /* Utility function for targets to use in implementing
4259 ASM_OUTPUT_ALIGNED_BSS. */
4262 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4263 const char *name, unsigned HOST_WIDE_INT size,
4266 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4267 && size > (unsigned int)ix86_section_threshold)
4268 switch_to_section (get_named_section (decl, ".lbss", 0));
4270 switch_to_section (bss_section);
4271 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4272 #ifdef ASM_DECLARE_OBJECT_NAME
4273 last_assemble_variable_decl = decl;
4274 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4276 /* Standard thing is just output label for the object. */
4277 ASM_OUTPUT_LABEL (file, name);
4278 #endif /* ASM_DECLARE_OBJECT_NAME */
4279 ASM_OUTPUT_SKIP (file, size ? size : 1);
4283 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4285 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4286 make the problem with not enough registers even worse. */
4287 #ifdef INSN_SCHEDULING
4289 flag_schedule_insns = 0;
4293 /* The Darwin libraries never set errno, so we might as well
4294 avoid calling them when that's the only reason we would. */
4295 flag_errno_math = 0;
4297 /* The default values of these switches depend on the TARGET_64BIT
4298 that is not known at this moment. Mark these values with 2 and
4299 let user the to override these. In case there is no command line option
4300 specifying them, we will set the defaults in override_options. */
4302 flag_omit_frame_pointer = 2;
4303 flag_pcc_struct_return = 2;
4304 flag_asynchronous_unwind_tables = 2;
4305 flag_vect_cost_model = 1;
4306 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4307 SUBTARGET_OPTIMIZATION_OPTIONS;
4311 /* Decide whether we can make a sibling call to a function. DECL is the
4312 declaration of the function being targeted by the call and EXP is the
4313 CALL_EXPR representing the call. */
4316 ix86_function_ok_for_sibcall (tree decl, tree exp)
4318 tree type, decl_or_type;
4321 /* If we are generating position-independent code, we cannot sibcall
4322 optimize any indirect call, or a direct call to a global function,
4323 as the PLT requires %ebx be live. */
4324 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4327 /* If we need to align the outgoing stack, then sibcalling would
4328 unalign the stack, which may break the called function. */
4329 if (ix86_minimum_incoming_stack_boundary (true)
4330 < PREFERRED_STACK_BOUNDARY)
4335 decl_or_type = decl;
4336 type = TREE_TYPE (decl);
4340 /* We're looking at the CALL_EXPR, we need the type of the function. */
4341 type = CALL_EXPR_FN (exp); /* pointer expression */
4342 type = TREE_TYPE (type); /* pointer type */
4343 type = TREE_TYPE (type); /* function type */
4344 decl_or_type = type;
4347 /* Check that the return value locations are the same. Like
4348 if we are returning floats on the 80387 register stack, we cannot
4349 make a sibcall from a function that doesn't return a float to a
4350 function that does or, conversely, from a function that does return
4351 a float to a function that doesn't; the necessary stack adjustment
4352 would not be executed. This is also the place we notice
4353 differences in the return value ABI. Note that it is ok for one
4354 of the functions to have void return type as long as the return
4355 value of the other is passed in a register. */
4356 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4357 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4359 if (STACK_REG_P (a) || STACK_REG_P (b))
4361 if (!rtx_equal_p (a, b))
4364 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4366 else if (!rtx_equal_p (a, b))
4371 /* The SYSV ABI has more call-clobbered registers;
4372 disallow sibcalls from MS to SYSV. */
4373 if (cfun->machine->call_abi == MS_ABI
4374 && ix86_function_type_abi (type) == SYSV_ABI)
4379 /* If this call is indirect, we'll need to be able to use a
4380 call-clobbered register for the address of the target function.
4381 Make sure that all such registers are not used for passing
4382 parameters. Note that DLLIMPORT functions are indirect. */
4384 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4386 if (ix86_function_regparm (type, NULL) >= 3)
4388 /* ??? Need to count the actual number of registers to be used,
4389 not the possible number of registers. Fix later. */
4395 /* Otherwise okay. That also includes certain types of indirect calls. */
4399 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4400 calling convention attributes;
4401 arguments as in struct attribute_spec.handler. */
4404 ix86_handle_cconv_attribute (tree *node, tree name,
4406 int flags ATTRIBUTE_UNUSED,
4409 if (TREE_CODE (*node) != FUNCTION_TYPE
4410 && TREE_CODE (*node) != METHOD_TYPE
4411 && TREE_CODE (*node) != FIELD_DECL
4412 && TREE_CODE (*node) != TYPE_DECL)
4414 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4416 *no_add_attrs = true;
4420 /* Can combine regparm with all attributes but fastcall. */
4421 if (is_attribute_p ("regparm", name))
4425 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4427 error ("fastcall and regparm attributes are not compatible");
4430 cst = TREE_VALUE (args);
4431 if (TREE_CODE (cst) != INTEGER_CST)
4433 warning (OPT_Wattributes,
4434 "%qE attribute requires an integer constant argument",
4436 *no_add_attrs = true;
4438 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4440 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4442 *no_add_attrs = true;
4450 /* Do not warn when emulating the MS ABI. */
4451 if (TREE_CODE (*node) != FUNCTION_TYPE
4452 || ix86_function_type_abi (*node) != MS_ABI)
4453 warning (OPT_Wattributes, "%qE attribute ignored",
4455 *no_add_attrs = true;
4459 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4460 if (is_attribute_p ("fastcall", name))
4462 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4464 error ("fastcall and cdecl attributes are not compatible");
4466 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4468 error ("fastcall and stdcall attributes are not compatible");
4470 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4472 error ("fastcall and regparm attributes are not compatible");
4476 /* Can combine stdcall with fastcall (redundant), regparm and
4478 else if (is_attribute_p ("stdcall", name))
4480 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4482 error ("stdcall and cdecl attributes are not compatible");
4484 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4486 error ("stdcall and fastcall attributes are not compatible");
4490 /* Can combine cdecl with regparm and sseregparm. */
4491 else if (is_attribute_p ("cdecl", name))
4493 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4495 error ("stdcall and cdecl attributes are not compatible");
4497 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4499 error ("fastcall and cdecl attributes are not compatible");
4503 /* Can combine sseregparm with all attributes. */
4508 /* Return 0 if the attributes for two types are incompatible, 1 if they
4509 are compatible, and 2 if they are nearly compatible (which causes a
4510 warning to be generated). */
4513 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4515 /* Check for mismatch of non-default calling convention. */
4516 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4518 if (TREE_CODE (type1) != FUNCTION_TYPE
4519 && TREE_CODE (type1) != METHOD_TYPE)
4522 /* Check for mismatched fastcall/regparm types. */
4523 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4524 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4525 || (ix86_function_regparm (type1, NULL)
4526 != ix86_function_regparm (type2, NULL)))
4529 /* Check for mismatched sseregparm types. */
4530 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4531 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4534 /* Check for mismatched return types (cdecl vs stdcall). */
4535 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4536 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4542 /* Return the regparm value for a function with the indicated TYPE and DECL.
4543 DECL may be NULL when calling function indirectly
4544 or considering a libcall. */
4547 ix86_function_regparm (const_tree type, const_tree decl)
4553 return (ix86_function_type_abi (type) == SYSV_ABI
4554 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4556 regparm = ix86_regparm;
4557 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4560 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4564 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4567 /* Use register calling convention for local functions when possible. */
4569 && TREE_CODE (decl) == FUNCTION_DECL
4573 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4574 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4577 int local_regparm, globals = 0, regno;
4579 /* Make sure no regparm register is taken by a
4580 fixed register variable. */
4581 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4582 if (fixed_regs[local_regparm])
4585 /* We don't want to use regparm(3) for nested functions as
4586 these use a static chain pointer in the third argument. */
4587 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4590 /* Each fixed register usage increases register pressure,
4591 so less registers should be used for argument passing.
4592 This functionality can be overriden by an explicit
4594 for (regno = 0; regno <= DI_REG; regno++)
4595 if (fixed_regs[regno])
4599 = globals < local_regparm ? local_regparm - globals : 0;
4601 if (local_regparm > regparm)
4602 regparm = local_regparm;
4609 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4610 DFmode (2) arguments in SSE registers for a function with the
4611 indicated TYPE and DECL. DECL may be NULL when calling function
4612 indirectly or considering a libcall. Otherwise return 0. */
4615 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4617 gcc_assert (!TARGET_64BIT);
4619 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4620 by the sseregparm attribute. */
4621 if (TARGET_SSEREGPARM
4622 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4629 error ("Calling %qD with attribute sseregparm without "
4630 "SSE/SSE2 enabled", decl);
4632 error ("Calling %qT with attribute sseregparm without "
4633 "SSE/SSE2 enabled", type);
4641 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4642 (and DFmode for SSE2) arguments in SSE registers. */
4643 if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4645 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4646 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4648 return TARGET_SSE2 ? 2 : 1;
4654 /* Return true if EAX is live at the start of the function. Used by
4655 ix86_expand_prologue to determine if we need special help before
4656 calling allocate_stack_worker. */
4659 ix86_eax_live_at_start_p (void)
4661 /* Cheat. Don't bother working forward from ix86_function_regparm
4662 to the function type to whether an actual argument is located in
4663 eax. Instead just look at cfg info, which is still close enough
4664 to correct at this point. This gives false positives for broken
4665 functions that might use uninitialized data that happens to be
4666 allocated in eax, but who cares? */
4667 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4670 /* Value is the number of bytes of arguments automatically
4671 popped when returning from a subroutine call.
4672 FUNDECL is the declaration node of the function (as a tree),
4673 FUNTYPE is the data type of the function (as a tree),
4674 or for a library call it is an identifier node for the subroutine name.
4675 SIZE is the number of bytes of arguments passed on the stack.
4677 On the 80386, the RTD insn may be used to pop them if the number
4678 of args is fixed, but if the number is variable then the caller
4679 must pop them all. RTD can't be used for library calls now
4680 because the library is compiled with the Unix compiler.
4681 Use of RTD is a selectable option, since it is incompatible with
4682 standard Unix calling sequences. If the option is not selected,
4683 the caller must always pop the args.
4685 The attribute stdcall is equivalent to RTD on a per module basis. */
4688 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4692 /* None of the 64-bit ABIs pop arguments. */
4696 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4698 /* Cdecl functions override -mrtd, and never pop the stack. */
4699 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4701 /* Stdcall and fastcall functions will pop the stack if not
4703 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4704 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4707 if (rtd && ! stdarg_p (funtype))
4711 /* Lose any fake structure return argument if it is passed on the stack. */
4712 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4713 && !KEEP_AGGREGATE_RETURN_POINTER)
4715 int nregs = ix86_function_regparm (funtype, fundecl);
4717 return GET_MODE_SIZE (Pmode);
4723 /* Argument support functions. */
4725 /* Return true when register may be used to pass function parameters. */
4727 ix86_function_arg_regno_p (int regno)
4730 const int *parm_regs;
4735 return (regno < REGPARM_MAX
4736 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4738 return (regno < REGPARM_MAX
4739 || (TARGET_MMX && MMX_REGNO_P (regno)
4740 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4741 || (TARGET_SSE && SSE_REGNO_P (regno)
4742 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4747 if (SSE_REGNO_P (regno) && TARGET_SSE)
4752 if (TARGET_SSE && SSE_REGNO_P (regno)
4753 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4757 /* TODO: The function should depend on current function ABI but
4758 builtins.c would need updating then. Therefore we use the
4761 /* RAX is used as hidden argument to va_arg functions. */
4762 if (ix86_abi == SYSV_ABI && regno == AX_REG)
4765 if (ix86_abi == MS_ABI)
4766 parm_regs = x86_64_ms_abi_int_parameter_registers;
4768 parm_regs = x86_64_int_parameter_registers;
4769 for (i = 0; i < (ix86_abi == MS_ABI
4770 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
4771 if (regno == parm_regs[i])
4776 /* Return if we do not know how to pass TYPE solely in registers. */
4779 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4781 if (must_pass_in_stack_var_size_or_pad (mode, type))
4784 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4785 The layout_type routine is crafty and tries to trick us into passing
4786 currently unsupported vector types on the stack by using TImode. */
4787 return (!TARGET_64BIT && mode == TImode
4788 && type && TREE_CODE (type) != VECTOR_TYPE);
4791 /* It returns the size, in bytes, of the area reserved for arguments passed
4792 in registers for the function represented by fndecl dependent to the used
4795 ix86_reg_parm_stack_space (const_tree fndecl)
4797 enum calling_abi call_abi = SYSV_ABI;
4798 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4799 call_abi = ix86_function_abi (fndecl);
4801 call_abi = ix86_function_type_abi (fndecl);
4802 if (call_abi == MS_ABI)
4807 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4810 ix86_function_type_abi (const_tree fntype)
4812 if (TARGET_64BIT && fntype != NULL)
4814 enum calling_abi abi = ix86_abi;
4815 if (abi == SYSV_ABI)
4817 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
4820 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
4828 ix86_function_ms_hook_prologue (const_tree fntype)
4832 if (lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype)))
4834 if (decl_function_context (fntype) != NULL_TREE)
4836 error_at (DECL_SOURCE_LOCATION (fntype),
4837 "ms_hook_prologue is not compatible with nested function");
4846 static enum calling_abi
4847 ix86_function_abi (const_tree fndecl)
4851 return ix86_function_type_abi (TREE_TYPE (fndecl));
4854 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4857 ix86_cfun_abi (void)
4859 if (! cfun || ! TARGET_64BIT)
4861 return cfun->machine->call_abi;
4865 extern void init_regs (void);
4867 /* Implementation of call abi switching target hook. Specific to FNDECL
4868 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4869 for more details. */
4871 ix86_call_abi_override (const_tree fndecl)
4873 if (fndecl == NULL_TREE)
4874 cfun->machine->call_abi = ix86_abi;
4876 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4879 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4880 re-initialization of init_regs each time we switch function context since
4881 this is needed only during RTL expansion. */
4883 ix86_maybe_switch_abi (void)
4886 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4890 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4891 for a call to a function whose data type is FNTYPE.
4892 For a library call, FNTYPE is 0. */
4895 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4896 tree fntype, /* tree ptr for function decl */
4897 rtx libname, /* SYMBOL_REF of library name or 0 */
4900 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4901 memset (cum, 0, sizeof (*cum));
4904 cum->call_abi = ix86_function_abi (fndecl);
4906 cum->call_abi = ix86_function_type_abi (fntype);
4907 /* Set up the number of registers to use for passing arguments. */
4909 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4910 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
4911 "or subtarget optimization implying it");
4912 cum->nregs = ix86_regparm;
4915 if (cum->call_abi != ix86_abi)
4916 cum->nregs = (ix86_abi != SYSV_ABI
4917 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4921 cum->sse_nregs = SSE_REGPARM_MAX;
4924 if (cum->call_abi != ix86_abi)
4925 cum->sse_nregs = (ix86_abi != SYSV_ABI
4926 ? X86_64_SSE_REGPARM_MAX
4927 : X86_64_MS_SSE_REGPARM_MAX);
4931 cum->mmx_nregs = MMX_REGPARM_MAX;
4932 cum->warn_avx = true;
4933 cum->warn_sse = true;
4934 cum->warn_mmx = true;
4936 /* Because type might mismatch in between caller and callee, we need to
4937 use actual type of function for local calls.
4938 FIXME: cgraph_analyze can be told to actually record if function uses
4939 va_start so for local functions maybe_vaarg can be made aggressive
4941 FIXME: once typesytem is fixed, we won't need this code anymore. */
4943 fntype = TREE_TYPE (fndecl);
4944 cum->maybe_vaarg = (fntype
4945 ? (!prototype_p (fntype) || stdarg_p (fntype))
4950 /* If there are variable arguments, then we won't pass anything
4951 in registers in 32-bit mode. */
4952 if (stdarg_p (fntype))
4963 /* Use ecx and edx registers if function has fastcall attribute,
4964 else look for regparm information. */
4967 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4973 cum->nregs = ix86_function_regparm (fntype, fndecl);
4976 /* Set up the number of SSE registers used for passing SFmode
4977 and DFmode arguments. Warn for mismatching ABI. */
4978 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4982 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4983 But in the case of vector types, it is some vector mode.
4985 When we have only some of our vector isa extensions enabled, then there
4986 are some modes for which vector_mode_supported_p is false. For these
4987 modes, the generic vector support in gcc will choose some non-vector mode
4988 in order to implement the type. By computing the natural mode, we'll
4989 select the proper ABI location for the operand and not depend on whatever
4990 the middle-end decides to do with these vector types.
4992 The midde-end can't deal with the vector types > 16 bytes. In this
4993 case, we return the original mode and warn ABI change if CUM isn't
4996 static enum machine_mode
4997 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4999 enum machine_mode mode = TYPE_MODE (type);
5001 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5003 HOST_WIDE_INT size = int_size_in_bytes (type);
5004 if ((size == 8 || size == 16 || size == 32)
5005 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5006 && TYPE_VECTOR_SUBPARTS (type) > 1)
5008 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5010 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5011 mode = MIN_MODE_VECTOR_FLOAT;
5013 mode = MIN_MODE_VECTOR_INT;
5015 /* Get the mode which has this inner mode and number of units. */
5016 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5017 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5018 && GET_MODE_INNER (mode) == innermode)
5020 if (size == 32 && !TARGET_AVX)
5022 static bool warnedavx;
5029 warning (0, "AVX vector argument without AVX "
5030 "enabled changes the ABI");
5032 return TYPE_MODE (type);
5045 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5046 this may not agree with the mode that the type system has chosen for the
5047 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5048 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5051 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5056 if (orig_mode != BLKmode)
5057 tmp = gen_rtx_REG (orig_mode, regno);
5060 tmp = gen_rtx_REG (mode, regno);
5061 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5062 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5068 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5069 of this code is to classify each 8bytes of incoming argument by the register
5070 class and assign registers accordingly. */
5072 /* Return the union class of CLASS1 and CLASS2.
5073 See the x86-64 PS ABI for details. */
5075 static enum x86_64_reg_class
5076 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5078 /* Rule #1: If both classes are equal, this is the resulting class. */
5079 if (class1 == class2)
5082 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5084 if (class1 == X86_64_NO_CLASS)
5086 if (class2 == X86_64_NO_CLASS)
5089 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5090 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5091 return X86_64_MEMORY_CLASS;
5093 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5094 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5095 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5096 return X86_64_INTEGERSI_CLASS;
5097 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5098 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5099 return X86_64_INTEGER_CLASS;
5101 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5103 if (class1 == X86_64_X87_CLASS
5104 || class1 == X86_64_X87UP_CLASS
5105 || class1 == X86_64_COMPLEX_X87_CLASS
5106 || class2 == X86_64_X87_CLASS
5107 || class2 == X86_64_X87UP_CLASS
5108 || class2 == X86_64_COMPLEX_X87_CLASS)
5109 return X86_64_MEMORY_CLASS;
5111 /* Rule #6: Otherwise class SSE is used. */
5112 return X86_64_SSE_CLASS;
5115 /* Classify the argument of type TYPE and mode MODE.
5116 CLASSES will be filled by the register class used to pass each word
5117 of the operand. The number of words is returned. In case the parameter
5118 should be passed in memory, 0 is returned. As a special case for zero
5119 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5121 BIT_OFFSET is used internally for handling records and specifies offset
5122 of the offset in bits modulo 256 to avoid overflow cases.
5124 See the x86-64 PS ABI for details.
5128 classify_argument (enum machine_mode mode, const_tree type,
5129 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5131 HOST_WIDE_INT bytes =
5132 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5133 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5135 /* Variable sized entities are always passed/returned in memory. */
5139 if (mode != VOIDmode
5140 && targetm.calls.must_pass_in_stack (mode, type))
5143 if (type && AGGREGATE_TYPE_P (type))
5147 enum x86_64_reg_class subclasses[MAX_CLASSES];
5149 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5153 for (i = 0; i < words; i++)
5154 classes[i] = X86_64_NO_CLASS;
5156 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5157 signalize memory class, so handle it as special case. */
5160 classes[0] = X86_64_NO_CLASS;
5164 /* Classify each field of record and merge classes. */
5165 switch (TREE_CODE (type))
5168 /* And now merge the fields of structure. */
5169 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5171 if (TREE_CODE (field) == FIELD_DECL)
5175 if (TREE_TYPE (field) == error_mark_node)
5178 /* Bitfields are always classified as integer. Handle them
5179 early, since later code would consider them to be
5180 misaligned integers. */
5181 if (DECL_BIT_FIELD (field))
5183 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5184 i < ((int_bit_position (field) + (bit_offset % 64))
5185 + tree_low_cst (DECL_SIZE (field), 0)
5188 merge_classes (X86_64_INTEGER_CLASS,
5195 type = TREE_TYPE (field);
5197 /* Flexible array member is ignored. */
5198 if (TYPE_MODE (type) == BLKmode
5199 && TREE_CODE (type) == ARRAY_TYPE
5200 && TYPE_SIZE (type) == NULL_TREE
5201 && TYPE_DOMAIN (type) != NULL_TREE
5202 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5207 if (!warned && warn_psabi)
5210 inform (input_location,
5211 "The ABI of passing struct with"
5212 " a flexible array member has"
5213 " changed in GCC 4.4");
5217 num = classify_argument (TYPE_MODE (type), type,
5219 (int_bit_position (field)
5220 + bit_offset) % 256);
5223 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5224 for (i = 0; i < num && (i + pos) < words; i++)
5226 merge_classes (subclasses[i], classes[i + pos]);
5233 /* Arrays are handled as small records. */
5236 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5237 TREE_TYPE (type), subclasses, bit_offset);
5241 /* The partial classes are now full classes. */
5242 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5243 subclasses[0] = X86_64_SSE_CLASS;
5244 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5245 && !((bit_offset % 64) == 0 && bytes == 4))
5246 subclasses[0] = X86_64_INTEGER_CLASS;
5248 for (i = 0; i < words; i++)
5249 classes[i] = subclasses[i % num];
5254 case QUAL_UNION_TYPE:
5255 /* Unions are similar to RECORD_TYPE but offset is always 0.
5257 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5259 if (TREE_CODE (field) == FIELD_DECL)
5263 if (TREE_TYPE (field) == error_mark_node)
5266 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5267 TREE_TYPE (field), subclasses,
5271 for (i = 0; i < num; i++)
5272 classes[i] = merge_classes (subclasses[i], classes[i]);
5283 /* When size > 16 bytes, if the first one isn't
5284 X86_64_SSE_CLASS or any other ones aren't
5285 X86_64_SSEUP_CLASS, everything should be passed in
5287 if (classes[0] != X86_64_SSE_CLASS)
5290 for (i = 1; i < words; i++)
5291 if (classes[i] != X86_64_SSEUP_CLASS)
5295 /* Final merger cleanup. */
5296 for (i = 0; i < words; i++)
5298 /* If one class is MEMORY, everything should be passed in
5300 if (classes[i] == X86_64_MEMORY_CLASS)
5303 /* The X86_64_SSEUP_CLASS should be always preceded by
5304 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5305 if (classes[i] == X86_64_SSEUP_CLASS
5306 && classes[i - 1] != X86_64_SSE_CLASS
5307 && classes[i - 1] != X86_64_SSEUP_CLASS)
5309 /* The first one should never be X86_64_SSEUP_CLASS. */
5310 gcc_assert (i != 0);
5311 classes[i] = X86_64_SSE_CLASS;
5314 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5315 everything should be passed in memory. */
5316 if (classes[i] == X86_64_X87UP_CLASS
5317 && (classes[i - 1] != X86_64_X87_CLASS))
5321 /* The first one should never be X86_64_X87UP_CLASS. */
5322 gcc_assert (i != 0);
5323 if (!warned && warn_psabi)
5326 inform (input_location,
5327 "The ABI of passing union with long double"
5328 " has changed in GCC 4.4");
5336 /* Compute alignment needed. We align all types to natural boundaries with
5337 exception of XFmode that is aligned to 64bits. */
5338 if (mode != VOIDmode && mode != BLKmode)
5340 int mode_alignment = GET_MODE_BITSIZE (mode);
5343 mode_alignment = 128;
5344 else if (mode == XCmode)
5345 mode_alignment = 256;
5346 if (COMPLEX_MODE_P (mode))
5347 mode_alignment /= 2;
5348 /* Misaligned fields are always returned in memory. */
5349 if (bit_offset % mode_alignment)
5353 /* for V1xx modes, just use the base mode */
5354 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5355 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5356 mode = GET_MODE_INNER (mode);
5358 /* Classification of atomic types. */
5363 classes[0] = X86_64_SSE_CLASS;
5366 classes[0] = X86_64_SSE_CLASS;
5367 classes[1] = X86_64_SSEUP_CLASS;
5377 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5381 classes[0] = X86_64_INTEGERSI_CLASS;
5384 else if (size <= 64)
5386 classes[0] = X86_64_INTEGER_CLASS;
5389 else if (size <= 64+32)
5391 classes[0] = X86_64_INTEGER_CLASS;
5392 classes[1] = X86_64_INTEGERSI_CLASS;
5395 else if (size <= 64+64)
5397 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5405 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5409 /* OImode shouldn't be used directly. */
5414 if (!(bit_offset % 64))
5415 classes[0] = X86_64_SSESF_CLASS;
5417 classes[0] = X86_64_SSE_CLASS;
5420 classes[0] = X86_64_SSEDF_CLASS;
5423 classes[0] = X86_64_X87_CLASS;
5424 classes[1] = X86_64_X87UP_CLASS;
5427 classes[0] = X86_64_SSE_CLASS;
5428 classes[1] = X86_64_SSEUP_CLASS;
5431 classes[0] = X86_64_SSE_CLASS;
5432 if (!(bit_offset % 64))
5438 if (!warned && warn_psabi)
5441 inform (input_location,
5442 "The ABI of passing structure with complex float"
5443 " member has changed in GCC 4.4");
5445 classes[1] = X86_64_SSESF_CLASS;
5449 classes[0] = X86_64_SSEDF_CLASS;
5450 classes[1] = X86_64_SSEDF_CLASS;
5453 classes[0] = X86_64_COMPLEX_X87_CLASS;
5456 /* This modes is larger than 16 bytes. */
5464 classes[0] = X86_64_SSE_CLASS;
5465 classes[1] = X86_64_SSEUP_CLASS;
5466 classes[2] = X86_64_SSEUP_CLASS;
5467 classes[3] = X86_64_SSEUP_CLASS;
5475 classes[0] = X86_64_SSE_CLASS;
5476 classes[1] = X86_64_SSEUP_CLASS;
5484 classes[0] = X86_64_SSE_CLASS;
5490 gcc_assert (VECTOR_MODE_P (mode));
5495 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5497 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5498 classes[0] = X86_64_INTEGERSI_CLASS;
5500 classes[0] = X86_64_INTEGER_CLASS;
5501 classes[1] = X86_64_INTEGER_CLASS;
5502 return 1 + (bytes > 8);
5506 /* Examine the argument and return set number of register required in each
5507 class. Return 0 iff parameter should be passed in memory. */
5509 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5510 int *int_nregs, int *sse_nregs)
5512 enum x86_64_reg_class regclass[MAX_CLASSES];
5513 int n = classify_argument (mode, type, regclass, 0);
5519 for (n--; n >= 0; n--)
5520 switch (regclass[n])
5522 case X86_64_INTEGER_CLASS:
5523 case X86_64_INTEGERSI_CLASS:
5526 case X86_64_SSE_CLASS:
5527 case X86_64_SSESF_CLASS:
5528 case X86_64_SSEDF_CLASS:
5531 case X86_64_NO_CLASS:
5532 case X86_64_SSEUP_CLASS:
5534 case X86_64_X87_CLASS:
5535 case X86_64_X87UP_CLASS:
5539 case X86_64_COMPLEX_X87_CLASS:
5540 return in_return ? 2 : 0;
5541 case X86_64_MEMORY_CLASS:
5547 /* Construct container for the argument used by GCC interface. See
5548 FUNCTION_ARG for the detailed description. */
5551 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5552 const_tree type, int in_return, int nintregs, int nsseregs,
5553 const int *intreg, int sse_regno)
5555 /* The following variables hold the static issued_error state. */
5556 static bool issued_sse_arg_error;
5557 static bool issued_sse_ret_error;
5558 static bool issued_x87_ret_error;
5560 enum machine_mode tmpmode;
5562 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5563 enum x86_64_reg_class regclass[MAX_CLASSES];
5567 int needed_sseregs, needed_intregs;
5568 rtx exp[MAX_CLASSES];
5571 n = classify_argument (mode, type, regclass, 0);
5574 if (!examine_argument (mode, type, in_return, &needed_intregs,
5577 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5580 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5581 some less clueful developer tries to use floating-point anyway. */
5582 if (needed_sseregs && !TARGET_SSE)
5586 if (!issued_sse_ret_error)
5588 error ("SSE register return with SSE disabled");
5589 issued_sse_ret_error = true;
5592 else if (!issued_sse_arg_error)
5594 error ("SSE register argument with SSE disabled");
5595 issued_sse_arg_error = true;
5600 /* Likewise, error if the ABI requires us to return values in the
5601 x87 registers and the user specified -mno-80387. */
5602 if (!TARGET_80387 && in_return)
5603 for (i = 0; i < n; i++)
5604 if (regclass[i] == X86_64_X87_CLASS
5605 || regclass[i] == X86_64_X87UP_CLASS
5606 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5608 if (!issued_x87_ret_error)
5610 error ("x87 register return with x87 disabled");
5611 issued_x87_ret_error = true;
5616 /* First construct simple cases. Avoid SCmode, since we want to use
5617 single register to pass this type. */
5618 if (n == 1 && mode != SCmode)
5619 switch (regclass[0])
5621 case X86_64_INTEGER_CLASS:
5622 case X86_64_INTEGERSI_CLASS:
5623 return gen_rtx_REG (mode, intreg[0]);
5624 case X86_64_SSE_CLASS:
5625 case X86_64_SSESF_CLASS:
5626 case X86_64_SSEDF_CLASS:
5627 if (mode != BLKmode)
5628 return gen_reg_or_parallel (mode, orig_mode,
5629 SSE_REGNO (sse_regno));
5631 case X86_64_X87_CLASS:
5632 case X86_64_COMPLEX_X87_CLASS:
5633 return gen_rtx_REG (mode, FIRST_STACK_REG);
5634 case X86_64_NO_CLASS:
5635 /* Zero sized array, struct or class. */
5640 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5641 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5642 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5644 && regclass[0] == X86_64_SSE_CLASS
5645 && regclass[1] == X86_64_SSEUP_CLASS
5646 && regclass[2] == X86_64_SSEUP_CLASS
5647 && regclass[3] == X86_64_SSEUP_CLASS
5649 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5652 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5653 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5654 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5655 && regclass[1] == X86_64_INTEGER_CLASS
5656 && (mode == CDImode || mode == TImode || mode == TFmode)
5657 && intreg[0] + 1 == intreg[1])
5658 return gen_rtx_REG (mode, intreg[0]);
5660 /* Otherwise figure out the entries of the PARALLEL. */
5661 for (i = 0; i < n; i++)
5665 switch (regclass[i])
5667 case X86_64_NO_CLASS:
5669 case X86_64_INTEGER_CLASS:
5670 case X86_64_INTEGERSI_CLASS:
5671 /* Merge TImodes on aligned occasions here too. */
5672 if (i * 8 + 8 > bytes)
5673 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5674 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5678 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5679 if (tmpmode == BLKmode)
5681 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5682 gen_rtx_REG (tmpmode, *intreg),
5686 case X86_64_SSESF_CLASS:
5687 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5688 gen_rtx_REG (SFmode,
5689 SSE_REGNO (sse_regno)),
5693 case X86_64_SSEDF_CLASS:
5694 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5695 gen_rtx_REG (DFmode,
5696 SSE_REGNO (sse_regno)),
5700 case X86_64_SSE_CLASS:
5708 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5718 && regclass[1] == X86_64_SSEUP_CLASS
5719 && regclass[2] == X86_64_SSEUP_CLASS
5720 && regclass[3] == X86_64_SSEUP_CLASS);
5727 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5728 gen_rtx_REG (tmpmode,
5729 SSE_REGNO (sse_regno)),
5738 /* Empty aligned struct, union or class. */
5742 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5743 for (i = 0; i < nexps; i++)
5744 XVECEXP (ret, 0, i) = exp [i];
5748 /* Update the data in CUM to advance over an argument of mode MODE
5749 and data type TYPE. (TYPE is null for libcalls where that information
5750 may not be available.) */
5753 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5754 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5770 cum->words += words;
5771 cum->nregs -= words;
5772 cum->regno += words;
5774 if (cum->nregs <= 0)
5782 /* OImode shouldn't be used directly. */
5786 if (cum->float_in_sse < 2)
5789 if (cum->float_in_sse < 1)
5806 if (!type || !AGGREGATE_TYPE_P (type))
5808 cum->sse_words += words;
5809 cum->sse_nregs -= 1;
5810 cum->sse_regno += 1;
5811 if (cum->sse_nregs <= 0)
5825 if (!type || !AGGREGATE_TYPE_P (type))
5827 cum->mmx_words += words;
5828 cum->mmx_nregs -= 1;
5829 cum->mmx_regno += 1;
5830 if (cum->mmx_nregs <= 0)
5841 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5842 tree type, HOST_WIDE_INT words, int named)
5844 int int_nregs, sse_nregs;
5846 /* Unnamed 256bit vector mode parameters are passed on stack. */
5847 if (!named && VALID_AVX256_REG_MODE (mode))
5850 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5851 cum->words += words;
5852 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5854 cum->nregs -= int_nregs;
5855 cum->sse_nregs -= sse_nregs;
5856 cum->regno += int_nregs;
5857 cum->sse_regno += sse_nregs;
5860 cum->words += words;
5864 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5865 HOST_WIDE_INT words)
5867 /* Otherwise, this should be passed indirect. */
5868 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5870 cum->words += words;
5879 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5880 tree type, int named)
5882 HOST_WIDE_INT bytes, words;
5884 if (mode == BLKmode)
5885 bytes = int_size_in_bytes (type);
5887 bytes = GET_MODE_SIZE (mode);
5888 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5891 mode = type_natural_mode (type, NULL);
5893 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
5894 function_arg_advance_ms_64 (cum, bytes, words);
5895 else if (TARGET_64BIT)
5896 function_arg_advance_64 (cum, mode, type, words, named);
5898 function_arg_advance_32 (cum, mode, type, bytes, words);
5901 /* Define where to put the arguments to a function.
5902 Value is zero to push the argument on the stack,
5903 or a hard register in which to store the argument.
5905 MODE is the argument's machine mode.
5906 TYPE is the data type of the argument (as a tree).
5907 This is null for libcalls where that information may
5909 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5910 the preceding args and about the function being called.
5911 NAMED is nonzero if this argument is a named parameter
5912 (otherwise it is an extra parameter matching an ellipsis). */
5915 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5916 enum machine_mode orig_mode, tree type,
5917 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5919 static bool warnedsse, warnedmmx;
5921 /* Avoid the AL settings for the Unix64 ABI. */
5922 if (mode == VOIDmode)
5938 if (words <= cum->nregs)
5940 int regno = cum->regno;
5942 /* Fastcall allocates the first two DWORD (SImode) or
5943 smaller arguments to ECX and EDX if it isn't an
5949 || (type && AGGREGATE_TYPE_P (type)))
5952 /* ECX not EAX is the first allocated register. */
5953 if (regno == AX_REG)
5956 return gen_rtx_REG (mode, regno);
5961 if (cum->float_in_sse < 2)
5964 if (cum->float_in_sse < 1)
5968 /* In 32bit, we pass TImode in xmm registers. */
5975 if (!type || !AGGREGATE_TYPE_P (type))
5977 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5980 warning (0, "SSE vector argument without SSE enabled "
5984 return gen_reg_or_parallel (mode, orig_mode,
5985 cum->sse_regno + FIRST_SSE_REG);
5990 /* OImode shouldn't be used directly. */
5999 if (!type || !AGGREGATE_TYPE_P (type))
6002 return gen_reg_or_parallel (mode, orig_mode,
6003 cum->sse_regno + FIRST_SSE_REG);
6013 if (!type || !AGGREGATE_TYPE_P (type))
6015 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6018 warning (0, "MMX vector argument without MMX enabled "
6022 return gen_reg_or_parallel (mode, orig_mode,
6023 cum->mmx_regno + FIRST_MMX_REG);
6032 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6033 enum machine_mode orig_mode, tree type, int named)
6035 /* Handle a hidden AL argument containing number of registers
6036 for varargs x86-64 functions. */
6037 if (mode == VOIDmode)
6038 return GEN_INT (cum->maybe_vaarg
6039 ? (cum->sse_nregs < 0
6040 ? (cum->call_abi == ix86_abi
6042 : (ix86_abi != SYSV_ABI
6043 ? X86_64_SSE_REGPARM_MAX
6044 : X86_64_MS_SSE_REGPARM_MAX))
6059 /* Unnamed 256bit vector mode parameters are passed on stack. */
6065 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6067 &x86_64_int_parameter_registers [cum->regno],
6072 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6073 enum machine_mode orig_mode, int named,
6074 HOST_WIDE_INT bytes)
6078 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6079 We use value of -2 to specify that current function call is MSABI. */
6080 if (mode == VOIDmode)
6081 return GEN_INT (-2);
6083 /* If we've run out of registers, it goes on the stack. */
6084 if (cum->nregs == 0)
6087 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6089 /* Only floating point modes are passed in anything but integer regs. */
6090 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6093 regno = cum->regno + FIRST_SSE_REG;
6098 /* Unnamed floating parameters are passed in both the
6099 SSE and integer registers. */
6100 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6101 t2 = gen_rtx_REG (mode, regno);
6102 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6103 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6104 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6107 /* Handle aggregated types passed in register. */
6108 if (orig_mode == BLKmode)
6110 if (bytes > 0 && bytes <= 8)
6111 mode = (bytes > 4 ? DImode : SImode);
6112 if (mode == BLKmode)
6116 return gen_reg_or_parallel (mode, orig_mode, regno);
6120 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6121 tree type, int named)
6123 enum machine_mode mode = omode;
6124 HOST_WIDE_INT bytes, words;
6126 if (mode == BLKmode)
6127 bytes = int_size_in_bytes (type);
6129 bytes = GET_MODE_SIZE (mode);
6130 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6132 /* To simplify the code below, represent vector types with a vector mode
6133 even if MMX/SSE are not active. */
6134 if (type && TREE_CODE (type) == VECTOR_TYPE)
6135 mode = type_natural_mode (type, cum);
6137 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6138 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6139 else if (TARGET_64BIT)
6140 return function_arg_64 (cum, mode, omode, type, named);
6142 return function_arg_32 (cum, mode, omode, type, bytes, words);
6145 /* A C expression that indicates when an argument must be passed by
6146 reference. If nonzero for an argument, a copy of that argument is
6147 made in memory and a pointer to the argument is passed instead of
6148 the argument itself. The pointer is passed in whatever way is
6149 appropriate for passing a pointer to that type. */
6152 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6153 enum machine_mode mode ATTRIBUTE_UNUSED,
6154 const_tree type, bool named ATTRIBUTE_UNUSED)
6156 /* See Windows x64 Software Convention. */
6157 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6159 int msize = (int) GET_MODE_SIZE (mode);
6162 /* Arrays are passed by reference. */
6163 if (TREE_CODE (type) == ARRAY_TYPE)
6166 if (AGGREGATE_TYPE_P (type))
6168 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6169 are passed by reference. */
6170 msize = int_size_in_bytes (type);
6174 /* __m128 is passed by reference. */
6176 case 1: case 2: case 4: case 8:
6182 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6188 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6191 contains_aligned_value_p (tree type)
6193 enum machine_mode mode = TYPE_MODE (type);
6194 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6198 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6200 if (TYPE_ALIGN (type) < 128)
6203 if (AGGREGATE_TYPE_P (type))
6205 /* Walk the aggregates recursively. */
6206 switch (TREE_CODE (type))
6210 case QUAL_UNION_TYPE:
6214 /* Walk all the structure fields. */
6215 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6217 if (TREE_CODE (field) == FIELD_DECL
6218 && contains_aligned_value_p (TREE_TYPE (field)))
6225 /* Just for use if some languages passes arrays by value. */
6226 if (contains_aligned_value_p (TREE_TYPE (type)))
6237 /* Gives the alignment boundary, in bits, of an argument with the
6238 specified mode and type. */
6241 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6246 /* Since canonical type is used for call, we convert it to
6247 canonical type if needed. */
6248 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6249 type = TYPE_CANONICAL (type);
6250 align = TYPE_ALIGN (type);
6253 align = GET_MODE_ALIGNMENT (mode);
6254 if (align < PARM_BOUNDARY)
6255 align = PARM_BOUNDARY;
6256 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6257 natural boundaries. */
6258 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6260 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6261 make an exception for SSE modes since these require 128bit
6264 The handling here differs from field_alignment. ICC aligns MMX
6265 arguments to 4 byte boundaries, while structure fields are aligned
6266 to 8 byte boundaries. */
6269 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6270 align = PARM_BOUNDARY;
6274 if (!contains_aligned_value_p (type))
6275 align = PARM_BOUNDARY;
6278 if (align > BIGGEST_ALIGNMENT)
6279 align = BIGGEST_ALIGNMENT;
6283 /* Return true if N is a possible register number of function value. */
6286 ix86_function_value_regno_p (int regno)
6293 case FIRST_FLOAT_REG:
6294 /* TODO: The function should depend on current function ABI but
6295 builtins.c would need updating then. Therefore we use the
6297 if (TARGET_64BIT && ix86_abi == MS_ABI)
6299 return TARGET_FLOAT_RETURNS_IN_80387;
6305 if (TARGET_MACHO || TARGET_64BIT)
6313 /* Define how to find the value returned by a function.
6314 VALTYPE is the data type of the value (as a tree).
6315 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6316 otherwise, FUNC is 0. */
6319 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6320 const_tree fntype, const_tree fn)
6324 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6325 we normally prevent this case when mmx is not available. However
6326 some ABIs may require the result to be returned like DImode. */
6327 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6328 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6330 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6331 we prevent this case when sse is not available. However some ABIs
6332 may require the result to be returned like integer TImode. */
6333 else if (mode == TImode
6334 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6335 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6337 /* 32-byte vector modes in %ymm0. */
6338 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6339 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6341 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6342 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6343 regno = FIRST_FLOAT_REG;
6345 /* Most things go in %eax. */
6348 /* Override FP return register with %xmm0 for local functions when
6349 SSE math is enabled or for functions with sseregparm attribute. */
6350 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6352 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6353 if ((sse_level >= 1 && mode == SFmode)
6354 || (sse_level == 2 && mode == DFmode))
6355 regno = FIRST_SSE_REG;
6358 /* OImode shouldn't be used directly. */
6359 gcc_assert (mode != OImode);
6361 return gen_rtx_REG (orig_mode, regno);
6365 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6370 /* Handle libcalls, which don't provide a type node. */
6371 if (valtype == NULL)
6383 return gen_rtx_REG (mode, FIRST_SSE_REG);
6386 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6390 return gen_rtx_REG (mode, AX_REG);
6394 ret = construct_container (mode, orig_mode, valtype, 1,
6395 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6396 x86_64_int_return_registers, 0);
6398 /* For zero sized structures, construct_container returns NULL, but we
6399 need to keep rest of compiler happy by returning meaningful value. */
6401 ret = gen_rtx_REG (orig_mode, AX_REG);
6407 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6409 unsigned int regno = AX_REG;
6413 switch (GET_MODE_SIZE (mode))
6416 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6417 && !COMPLEX_MODE_P (mode))
6418 regno = FIRST_SSE_REG;
6422 if (mode == SFmode || mode == DFmode)
6423 regno = FIRST_SSE_REG;
6429 return gen_rtx_REG (orig_mode, regno);
6433 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6434 enum machine_mode orig_mode, enum machine_mode mode)
6436 const_tree fn, fntype;
6439 if (fntype_or_decl && DECL_P (fntype_or_decl))
6440 fn = fntype_or_decl;
6441 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6443 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6444 return function_value_ms_64 (orig_mode, mode);
6445 else if (TARGET_64BIT)
6446 return function_value_64 (orig_mode, mode, valtype);
6448 return function_value_32 (orig_mode, mode, fntype, fn);
6452 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6453 bool outgoing ATTRIBUTE_UNUSED)
6455 enum machine_mode mode, orig_mode;
6457 orig_mode = TYPE_MODE (valtype);
6458 mode = type_natural_mode (valtype, NULL);
6459 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6463 ix86_libcall_value (enum machine_mode mode)
6465 return ix86_function_value_1 (NULL, NULL, mode, mode);
6468 /* Return true iff type is returned in memory. */
6470 static int ATTRIBUTE_UNUSED
6471 return_in_memory_32 (const_tree type, enum machine_mode mode)
6475 if (mode == BLKmode)
6478 size = int_size_in_bytes (type);
6480 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6483 if (VECTOR_MODE_P (mode) || mode == TImode)
6485 /* User-created vectors small enough to fit in EAX. */
6489 /* MMX/3dNow values are returned in MM0,
6490 except when it doesn't exits. */
6492 return (TARGET_MMX ? 0 : 1);
6494 /* SSE values are returned in XMM0, except when it doesn't exist. */
6496 return (TARGET_SSE ? 0 : 1);
6498 /* AVX values are returned in YMM0, except when it doesn't exist. */
6500 return TARGET_AVX ? 0 : 1;
6509 /* OImode shouldn't be used directly. */
6510 gcc_assert (mode != OImode);
6515 static int ATTRIBUTE_UNUSED
6516 return_in_memory_64 (const_tree type, enum machine_mode mode)
6518 int needed_intregs, needed_sseregs;
6519 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6522 static int ATTRIBUTE_UNUSED
6523 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6525 HOST_WIDE_INT size = int_size_in_bytes (type);
6527 /* __m128 is returned in xmm0. */
6528 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6529 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6532 /* Otherwise, the size must be exactly in [1248]. */
6533 return (size != 1 && size != 2 && size != 4 && size != 8);
6537 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6539 #ifdef SUBTARGET_RETURN_IN_MEMORY
6540 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6542 const enum machine_mode mode = type_natural_mode (type, NULL);
6546 if (ix86_function_type_abi (fntype) == MS_ABI)
6547 return return_in_memory_ms_64 (type, mode);
6549 return return_in_memory_64 (type, mode);
6552 return return_in_memory_32 (type, mode);
6556 /* Return false iff TYPE is returned in memory. This version is used
6557 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6558 but differs notably in that when MMX is available, 8-byte vectors
6559 are returned in memory, rather than in MMX registers. */
6562 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6565 enum machine_mode mode = type_natural_mode (type, NULL);
6568 return return_in_memory_64 (type, mode);
6570 if (mode == BLKmode)
6573 size = int_size_in_bytes (type);
6575 if (VECTOR_MODE_P (mode))
6577 /* Return in memory only if MMX registers *are* available. This
6578 seems backwards, but it is consistent with the existing
6585 else if (mode == TImode)
6587 else if (mode == XFmode)
6593 /* When returning SSE vector types, we have a choice of either
6594 (1) being abi incompatible with a -march switch, or
6595 (2) generating an error.
6596 Given no good solution, I think the safest thing is one warning.
6597 The user won't be able to use -Werror, but....
6599 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6600 called in response to actually generating a caller or callee that
6601 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6602 via aggregate_value_p for general type probing from tree-ssa. */
6605 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6607 static bool warnedsse, warnedmmx;
6609 if (!TARGET_64BIT && type)
6611 /* Look at the return type of the function, not the function type. */
6612 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6614 if (!TARGET_SSE && !warnedsse)
6617 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6620 warning (0, "SSE vector return without SSE enabled "
6625 if (!TARGET_MMX && !warnedmmx)
6627 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6630 warning (0, "MMX vector return without MMX enabled "
6640 /* Create the va_list data type. */
6642 /* Returns the calling convention specific va_list date type.
6643 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6646 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6648 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6650 /* For i386 we use plain pointer to argument area. */
6651 if (!TARGET_64BIT || abi == MS_ABI)
6652 return build_pointer_type (char_type_node);
6654 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6655 type_decl = build_decl (BUILTINS_LOCATION,
6656 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6658 f_gpr = build_decl (BUILTINS_LOCATION,
6659 FIELD_DECL, get_identifier ("gp_offset"),
6660 unsigned_type_node);
6661 f_fpr = build_decl (BUILTINS_LOCATION,
6662 FIELD_DECL, get_identifier ("fp_offset"),
6663 unsigned_type_node);
6664 f_ovf = build_decl (BUILTINS_LOCATION,
6665 FIELD_DECL, get_identifier ("overflow_arg_area"),
6667 f_sav = build_decl (BUILTINS_LOCATION,
6668 FIELD_DECL, get_identifier ("reg_save_area"),
6671 va_list_gpr_counter_field = f_gpr;
6672 va_list_fpr_counter_field = f_fpr;
6674 DECL_FIELD_CONTEXT (f_gpr) = record;
6675 DECL_FIELD_CONTEXT (f_fpr) = record;
6676 DECL_FIELD_CONTEXT (f_ovf) = record;
6677 DECL_FIELD_CONTEXT (f_sav) = record;
6679 TREE_CHAIN (record) = type_decl;
6680 TYPE_NAME (record) = type_decl;
6681 TYPE_FIELDS (record) = f_gpr;
6682 TREE_CHAIN (f_gpr) = f_fpr;
6683 TREE_CHAIN (f_fpr) = f_ovf;
6684 TREE_CHAIN (f_ovf) = f_sav;
6686 layout_type (record);
6688 /* The correct type is an array type of one element. */
6689 return build_array_type (record, build_index_type (size_zero_node));
6692 /* Setup the builtin va_list data type and for 64-bit the additional
6693 calling convention specific va_list data types. */
6696 ix86_build_builtin_va_list (void)
6698 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
6700 /* Initialize abi specific va_list builtin types. */
6704 if (ix86_abi == MS_ABI)
6706 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6707 if (TREE_CODE (t) != RECORD_TYPE)
6708 t = build_variant_type_copy (t);
6709 sysv_va_list_type_node = t;
6714 if (TREE_CODE (t) != RECORD_TYPE)
6715 t = build_variant_type_copy (t);
6716 sysv_va_list_type_node = t;
6718 if (ix86_abi != MS_ABI)
6720 t = ix86_build_builtin_va_list_abi (MS_ABI);
6721 if (TREE_CODE (t) != RECORD_TYPE)
6722 t = build_variant_type_copy (t);
6723 ms_va_list_type_node = t;
6728 if (TREE_CODE (t) != RECORD_TYPE)
6729 t = build_variant_type_copy (t);
6730 ms_va_list_type_node = t;
6737 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6740 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6749 int regparm = ix86_regparm;
6751 if (cum->call_abi != ix86_abi)
6752 regparm = (ix86_abi != SYSV_ABI
6753 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
6755 /* GPR size of varargs save area. */
6756 if (cfun->va_list_gpr_size)
6757 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6759 ix86_varargs_gpr_size = 0;
6761 /* FPR size of varargs save area. We don't need it if we don't pass
6762 anything in SSE registers. */
6763 if (cum->sse_nregs && cfun->va_list_fpr_size)
6764 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6766 ix86_varargs_fpr_size = 0;
6768 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6771 save_area = frame_pointer_rtx;
6772 set = get_varargs_alias_set ();
6774 for (i = cum->regno;
6776 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6779 mem = gen_rtx_MEM (Pmode,
6780 plus_constant (save_area, i * UNITS_PER_WORD));
6781 MEM_NOTRAP_P (mem) = 1;
6782 set_mem_alias_set (mem, set);
6783 emit_move_insn (mem, gen_rtx_REG (Pmode,
6784 x86_64_int_parameter_registers[i]));
6787 if (ix86_varargs_fpr_size)
6789 /* Now emit code to save SSE registers. The AX parameter contains number
6790 of SSE parameter registers used to call this function. We use
6791 sse_prologue_save insn template that produces computed jump across
6792 SSE saves. We need some preparation work to get this working. */
6794 label = gen_label_rtx ();
6795 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6797 /* Compute address to jump to :
6798 label - eax*4 + nnamed_sse_arguments*4 Or
6799 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6800 tmp_reg = gen_reg_rtx (Pmode);
6801 nsse_reg = gen_reg_rtx (Pmode);
6802 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6803 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6804 gen_rtx_MULT (Pmode, nsse_reg,
6807 /* vmovaps is one byte longer than movaps. */
6809 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6810 gen_rtx_PLUS (Pmode, tmp_reg,
6816 gen_rtx_CONST (DImode,
6817 gen_rtx_PLUS (DImode,
6819 GEN_INT (cum->sse_regno
6820 * (TARGET_AVX ? 5 : 4)))));
6822 emit_move_insn (nsse_reg, label_ref);
6823 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6825 /* Compute address of memory block we save into. We always use pointer
6826 pointing 127 bytes after first byte to store - this is needed to keep
6827 instruction size limited by 4 bytes (5 bytes for AVX) with one
6828 byte displacement. */
6829 tmp_reg = gen_reg_rtx (Pmode);
6830 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6831 plus_constant (save_area,
6832 ix86_varargs_gpr_size + 127)));
6833 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6834 MEM_NOTRAP_P (mem) = 1;
6835 set_mem_alias_set (mem, set);
6836 set_mem_align (mem, BITS_PER_WORD);
6838 /* And finally do the dirty job! */
6839 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6840 GEN_INT (cum->sse_regno), label));
6845 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6847 alias_set_type set = get_varargs_alias_set ();
6850 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
6854 mem = gen_rtx_MEM (Pmode,
6855 plus_constant (virtual_incoming_args_rtx,
6856 i * UNITS_PER_WORD));
6857 MEM_NOTRAP_P (mem) = 1;
6858 set_mem_alias_set (mem, set);
6860 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6861 emit_move_insn (mem, reg);
6866 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6867 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6870 CUMULATIVE_ARGS next_cum;
6873 /* This argument doesn't appear to be used anymore. Which is good,
6874 because the old code here didn't suppress rtl generation. */
6875 gcc_assert (!no_rtl);
6880 fntype = TREE_TYPE (current_function_decl);
6882 /* For varargs, we do not want to skip the dummy va_dcl argument.
6883 For stdargs, we do want to skip the last named argument. */
6885 if (stdarg_p (fntype))
6886 function_arg_advance (&next_cum, mode, type, 1);
6888 if (cum->call_abi == MS_ABI)
6889 setup_incoming_varargs_ms_64 (&next_cum);
6891 setup_incoming_varargs_64 (&next_cum);
6894 /* Checks if TYPE is of kind va_list char *. */
6897 is_va_list_char_pointer (tree type)
6901 /* For 32-bit it is always true. */
6904 canonic = ix86_canonical_va_list_type (type);
6905 return (canonic == ms_va_list_type_node
6906 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
6909 /* Implement va_start. */
6912 ix86_va_start (tree valist, rtx nextarg)
6914 HOST_WIDE_INT words, n_gpr, n_fpr;
6915 tree f_gpr, f_fpr, f_ovf, f_sav;
6916 tree gpr, fpr, ovf, sav, t;
6919 /* Only 64bit target needs something special. */
6920 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6922 std_expand_builtin_va_start (valist, nextarg);
6926 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6927 f_fpr = TREE_CHAIN (f_gpr);
6928 f_ovf = TREE_CHAIN (f_fpr);
6929 f_sav = TREE_CHAIN (f_ovf);
6931 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6932 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6933 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6934 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6935 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6937 /* Count number of gp and fp argument registers used. */
6938 words = crtl->args.info.words;
6939 n_gpr = crtl->args.info.regno;
6940 n_fpr = crtl->args.info.sse_regno;
6942 if (cfun->va_list_gpr_size)
6944 type = TREE_TYPE (gpr);
6945 t = build2 (MODIFY_EXPR, type,
6946 gpr, build_int_cst (type, n_gpr * 8));
6947 TREE_SIDE_EFFECTS (t) = 1;
6948 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6951 if (TARGET_SSE && cfun->va_list_fpr_size)
6953 type = TREE_TYPE (fpr);
6954 t = build2 (MODIFY_EXPR, type, fpr,
6955 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6956 TREE_SIDE_EFFECTS (t) = 1;
6957 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6960 /* Find the overflow area. */
6961 type = TREE_TYPE (ovf);
6962 t = make_tree (type, crtl->args.internal_arg_pointer);
6964 t = build2 (POINTER_PLUS_EXPR, type, t,
6965 size_int (words * UNITS_PER_WORD));
6966 t = build2 (MODIFY_EXPR, type, ovf, t);
6967 TREE_SIDE_EFFECTS (t) = 1;
6968 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6970 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6972 /* Find the register save area.
6973 Prologue of the function save it right above stack frame. */
6974 type = TREE_TYPE (sav);
6975 t = make_tree (type, frame_pointer_rtx);
6976 if (!ix86_varargs_gpr_size)
6977 t = build2 (POINTER_PLUS_EXPR, type, t,
6978 size_int (-8 * X86_64_REGPARM_MAX));
6979 t = build2 (MODIFY_EXPR, type, sav, t);
6980 TREE_SIDE_EFFECTS (t) = 1;
6981 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6985 /* Implement va_arg. */
6988 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6991 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6992 tree f_gpr, f_fpr, f_ovf, f_sav;
6993 tree gpr, fpr, ovf, sav, t;
6995 tree lab_false, lab_over = NULL_TREE;
7000 enum machine_mode nat_mode;
7003 /* Only 64bit target needs something special. */
7004 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7005 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7007 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7008 f_fpr = TREE_CHAIN (f_gpr);
7009 f_ovf = TREE_CHAIN (f_fpr);
7010 f_sav = TREE_CHAIN (f_ovf);
7012 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7013 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7014 valist = build_va_arg_indirect_ref (valist);
7015 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7016 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7017 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7019 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7021 type = build_pointer_type (type);
7022 size = int_size_in_bytes (type);
7023 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7025 nat_mode = type_natural_mode (type, NULL);
7034 /* Unnamed 256bit vector mode parameters are passed on stack. */
7035 if (ix86_cfun_abi () == SYSV_ABI)
7042 container = construct_container (nat_mode, TYPE_MODE (type),
7043 type, 0, X86_64_REGPARM_MAX,
7044 X86_64_SSE_REGPARM_MAX, intreg,
7049 /* Pull the value out of the saved registers. */
7051 addr = create_tmp_var (ptr_type_node, "addr");
7055 int needed_intregs, needed_sseregs;
7057 tree int_addr, sse_addr;
7059 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7060 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7062 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7064 need_temp = (!REG_P (container)
7065 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7066 || TYPE_ALIGN (type) > 128));
7068 /* In case we are passing structure, verify that it is consecutive block
7069 on the register save area. If not we need to do moves. */
7070 if (!need_temp && !REG_P (container))
7072 /* Verify that all registers are strictly consecutive */
7073 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7077 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7079 rtx slot = XVECEXP (container, 0, i);
7080 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7081 || INTVAL (XEXP (slot, 1)) != i * 16)
7089 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7091 rtx slot = XVECEXP (container, 0, i);
7092 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7093 || INTVAL (XEXP (slot, 1)) != i * 8)
7105 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7106 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7109 /* First ensure that we fit completely in registers. */
7112 t = build_int_cst (TREE_TYPE (gpr),
7113 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7114 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7115 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7116 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7117 gimplify_and_add (t, pre_p);
7121 t = build_int_cst (TREE_TYPE (fpr),
7122 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7123 + X86_64_REGPARM_MAX * 8);
7124 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7125 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7126 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7127 gimplify_and_add (t, pre_p);
7130 /* Compute index to start of area used for integer regs. */
7133 /* int_addr = gpr + sav; */
7134 t = fold_convert (sizetype, gpr);
7135 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7136 gimplify_assign (int_addr, t, pre_p);
7140 /* sse_addr = fpr + sav; */
7141 t = fold_convert (sizetype, fpr);
7142 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7143 gimplify_assign (sse_addr, t, pre_p);
7148 tree temp = create_tmp_var (type, "va_arg_tmp");
7151 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7152 gimplify_assign (addr, t, pre_p);
7154 for (i = 0; i < XVECLEN (container, 0); i++)
7156 rtx slot = XVECEXP (container, 0, i);
7157 rtx reg = XEXP (slot, 0);
7158 enum machine_mode mode = GET_MODE (reg);
7159 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
7160 tree addr_type = build_pointer_type (piece_type);
7161 tree daddr_type = build_pointer_type_for_mode (piece_type,
7165 tree dest_addr, dest;
7167 if (SSE_REGNO_P (REGNO (reg)))
7169 src_addr = sse_addr;
7170 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7174 src_addr = int_addr;
7175 src_offset = REGNO (reg) * 8;
7177 src_addr = fold_convert (addr_type, src_addr);
7178 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7179 size_int (src_offset));
7180 src = build_va_arg_indirect_ref (src_addr);
7182 dest_addr = fold_convert (daddr_type, addr);
7183 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7184 size_int (INTVAL (XEXP (slot, 1))));
7185 dest = build_va_arg_indirect_ref (dest_addr);
7187 gimplify_assign (dest, src, pre_p);
7193 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7194 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7195 gimplify_assign (gpr, t, pre_p);
7200 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7201 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7202 gimplify_assign (fpr, t, pre_p);
7205 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7207 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7210 /* ... otherwise out of the overflow area. */
7212 /* When we align parameter on stack for caller, if the parameter
7213 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7214 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7215 here with caller. */
7216 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7217 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7218 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7220 /* Care for on-stack alignment if needed. */
7221 if (arg_boundary <= 64
7222 || integer_zerop (TYPE_SIZE (type)))
7226 HOST_WIDE_INT align = arg_boundary / 8;
7227 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7228 size_int (align - 1));
7229 t = fold_convert (sizetype, t);
7230 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7232 t = fold_convert (TREE_TYPE (ovf), t);
7234 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7235 gimplify_assign (addr, t, pre_p);
7237 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7238 size_int (rsize * UNITS_PER_WORD));
7239 gimplify_assign (unshare_expr (ovf), t, pre_p);
7242 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7244 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7245 addr = fold_convert (ptrtype, addr);
7248 addr = build_va_arg_indirect_ref (addr);
7249 return build_va_arg_indirect_ref (addr);
7252 /* Return nonzero if OPNUM's MEM should be matched
7253 in movabs* patterns. */
7256 ix86_check_movabs (rtx insn, int opnum)
7260 set = PATTERN (insn);
7261 if (GET_CODE (set) == PARALLEL)
7262 set = XVECEXP (set, 0, 0);
7263 gcc_assert (GET_CODE (set) == SET);
7264 mem = XEXP (set, opnum);
7265 while (GET_CODE (mem) == SUBREG)
7266 mem = SUBREG_REG (mem);
7267 gcc_assert (MEM_P (mem));
7268 return (volatile_ok || !MEM_VOLATILE_P (mem));
7271 /* Initialize the table of extra 80387 mathematical constants. */
7274 init_ext_80387_constants (void)
7276 static const char * cst[5] =
7278 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7279 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7280 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7281 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7282 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7286 for (i = 0; i < 5; i++)
7288 real_from_string (&ext_80387_constants_table[i], cst[i]);
7289 /* Ensure each constant is rounded to XFmode precision. */
7290 real_convert (&ext_80387_constants_table[i],
7291 XFmode, &ext_80387_constants_table[i]);
7294 ext_80387_constants_init = 1;
7297 /* Return true if the constant is something that can be loaded with
7298 a special instruction. */
7301 standard_80387_constant_p (rtx x)
7303 enum machine_mode mode = GET_MODE (x);
7307 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7310 if (x == CONST0_RTX (mode))
7312 if (x == CONST1_RTX (mode))
7315 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7317 /* For XFmode constants, try to find a special 80387 instruction when
7318 optimizing for size or on those CPUs that benefit from them. */
7320 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7324 if (! ext_80387_constants_init)
7325 init_ext_80387_constants ();
7327 for (i = 0; i < 5; i++)
7328 if (real_identical (&r, &ext_80387_constants_table[i]))
7332 /* Load of the constant -0.0 or -1.0 will be split as
7333 fldz;fchs or fld1;fchs sequence. */
7334 if (real_isnegzero (&r))
7336 if (real_identical (&r, &dconstm1))
7342 /* Return the opcode of the special instruction to be used to load
7346 standard_80387_constant_opcode (rtx x)
7348 switch (standard_80387_constant_p (x))
7372 /* Return the CONST_DOUBLE representing the 80387 constant that is
7373 loaded by the specified special instruction. The argument IDX
7374 matches the return value from standard_80387_constant_p. */
7377 standard_80387_constant_rtx (int idx)
7381 if (! ext_80387_constants_init)
7382 init_ext_80387_constants ();
7398 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7402 /* Return 1 if X is all 0s and 2 if x is all 1s
7403 in supported SSE vector mode. */
7406 standard_sse_constant_p (rtx x)
7408 enum machine_mode mode = GET_MODE (x);
7410 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7412 if (vector_all_ones_operand (x, mode))
7428 /* Return the opcode of the special instruction to be used to load
7432 standard_sse_constant_opcode (rtx insn, rtx x)
7434 switch (standard_sse_constant_p (x))
7437 switch (get_attr_mode (insn))
7440 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7442 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7444 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7446 return "vxorps\t%x0, %x0, %x0";
7448 return "vxorpd\t%x0, %x0, %x0";
7450 return "vpxor\t%x0, %x0, %x0";
7455 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7462 /* Returns 1 if OP contains a symbol reference */
7465 symbolic_reference_mentioned_p (rtx op)
7470 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7473 fmt = GET_RTX_FORMAT (GET_CODE (op));
7474 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7480 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7481 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7485 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7492 /* Return 1 if it is appropriate to emit `ret' instructions in the
7493 body of a function. Do this only if the epilogue is simple, needing a
7494 couple of insns. Prior to reloading, we can't tell how many registers
7495 must be saved, so return 0 then. Return 0 if there is no frame
7496 marker to de-allocate. */
7499 ix86_can_use_return_insn_p (void)
7501 struct ix86_frame frame;
7503 if (! reload_completed || frame_pointer_needed)
7506 /* Don't allow more than 32 pop, since that's all we can do
7507 with one instruction. */
7508 if (crtl->args.pops_args
7509 && crtl->args.size >= 32768)
7512 ix86_compute_frame_layout (&frame);
7513 return frame.to_allocate == 0 && frame.padding0 == 0
7514 && (frame.nregs + frame.nsseregs) == 0;
7517 /* Value should be nonzero if functions must have frame pointers.
7518 Zero means the frame pointer need not be set up (and parms may
7519 be accessed via the stack pointer) in functions that seem suitable. */
7522 ix86_frame_pointer_required (void)
7524 /* If we accessed previous frames, then the generated code expects
7525 to be able to access the saved ebp value in our frame. */
7526 if (cfun->machine->accesses_prev_frame)
7529 /* Several x86 os'es need a frame pointer for other reasons,
7530 usually pertaining to setjmp. */
7531 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7534 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7535 the frame pointer by default. Turn it back on now if we've not
7536 got a leaf function. */
7537 if (TARGET_OMIT_LEAF_FRAME_POINTER
7538 && (!current_function_is_leaf
7539 || ix86_current_function_calls_tls_descriptor))
7548 /* Record that the current function accesses previous call frames. */
7551 ix86_setup_frame_addresses (void)
7553 cfun->machine->accesses_prev_frame = 1;
7556 #ifndef USE_HIDDEN_LINKONCE
7557 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7558 # define USE_HIDDEN_LINKONCE 1
7560 # define USE_HIDDEN_LINKONCE 0
7564 static int pic_labels_used;
7566 /* Fills in the label name that should be used for a pc thunk for
7567 the given register. */
7570 get_pc_thunk_name (char name[32], unsigned int regno)
7572 gcc_assert (!TARGET_64BIT);
7574 if (USE_HIDDEN_LINKONCE)
7575 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7577 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7581 /* This function generates code for -fpic that loads %ebx with
7582 the return address of the caller and then returns. */
7585 ix86_file_end (void)
7590 for (regno = 0; regno < 8; ++regno)
7593 #ifdef DWARF2_UNWIND_INFO
7597 if (! ((pic_labels_used >> regno) & 1))
7600 get_pc_thunk_name (name, regno);
7605 switch_to_section (darwin_sections[text_coal_section]);
7606 fputs ("\t.weak_definition\t", asm_out_file);
7607 assemble_name (asm_out_file, name);
7608 fputs ("\n\t.private_extern\t", asm_out_file);
7609 assemble_name (asm_out_file, name);
7610 fputs ("\n", asm_out_file);
7611 ASM_OUTPUT_LABEL (asm_out_file, name);
7615 if (USE_HIDDEN_LINKONCE)
7619 decl = build_decl (BUILTINS_LOCATION,
7620 FUNCTION_DECL, get_identifier (name),
7622 TREE_PUBLIC (decl) = 1;
7623 TREE_STATIC (decl) = 1;
7624 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7626 (*targetm.asm_out.unique_section) (decl, 0);
7627 switch_to_section (get_named_section (decl, NULL, 0));
7629 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7630 fputs ("\t.hidden\t", asm_out_file);
7631 assemble_name (asm_out_file, name);
7632 putc ('\n', asm_out_file);
7633 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7637 switch_to_section (text_section);
7638 ASM_OUTPUT_LABEL (asm_out_file, name);
7641 #ifdef DWARF2_UNWIND_INFO
7642 do_cfi = dwarf2out_do_cfi_asm ();
7644 fprintf (asm_out_file, "\t.cfi_startproc\n");
7646 xops[0] = gen_rtx_REG (Pmode, regno);
7647 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7648 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7649 output_asm_insn ("ret", xops);
7650 #ifdef DWARF2_UNWIND_INFO
7652 fprintf (asm_out_file, "\t.cfi_endproc\n");
7656 if (NEED_INDICATE_EXEC_STACK)
7657 file_end_indicate_exec_stack ();
7660 /* Emit code for the SET_GOT patterns. */
7663 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7669 if (TARGET_VXWORKS_RTP && flag_pic)
7671 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7672 xops[2] = gen_rtx_MEM (Pmode,
7673 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7674 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7676 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7677 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7678 an unadorned address. */
7679 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7680 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7681 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7685 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7687 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7689 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7692 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7695 output_asm_insn ("call\t%a2", xops);
7696 #ifdef DWARF2_UNWIND_INFO
7697 /* The call to next label acts as a push. */
7698 if (dwarf2out_do_frame ())
7702 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
7703 gen_rtx_PLUS (Pmode,
7706 RTX_FRAME_RELATED_P (insn) = 1;
7707 dwarf2out_frame_debug (insn, true);
7714 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7715 is what will be referenced by the Mach-O PIC subsystem. */
7717 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7720 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7721 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7725 output_asm_insn ("pop%z0\t%0", xops);
7726 #ifdef DWARF2_UNWIND_INFO
7727 /* The pop is a pop and clobbers dest, but doesn't restore it
7728 for unwind info purposes. */
7729 if (dwarf2out_do_frame ())
7733 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
7734 dwarf2out_frame_debug (insn, true);
7735 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
7736 gen_rtx_PLUS (Pmode,
7739 RTX_FRAME_RELATED_P (insn) = 1;
7740 dwarf2out_frame_debug (insn, true);
7749 get_pc_thunk_name (name, REGNO (dest));
7750 pic_labels_used |= 1 << REGNO (dest);
7752 #ifdef DWARF2_UNWIND_INFO
7753 /* Ensure all queued register saves are flushed before the
7755 if (dwarf2out_do_frame ())
7759 insn = emit_barrier ();
7761 dwarf2out_frame_debug (insn, false);
7764 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7765 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7766 output_asm_insn ("call\t%X2", xops);
7767 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7768 is what will be referenced by the Mach-O PIC subsystem. */
7771 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7773 targetm.asm_out.internal_label (asm_out_file, "L",
7774 CODE_LABEL_NUMBER (label));
7781 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7782 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7784 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7789 /* Generate an "push" pattern for input ARG. */
7794 if (ix86_cfa_state->reg == stack_pointer_rtx)
7795 ix86_cfa_state->offset += UNITS_PER_WORD;
7797 return gen_rtx_SET (VOIDmode,
7799 gen_rtx_PRE_DEC (Pmode,
7800 stack_pointer_rtx)),
7804 /* Return >= 0 if there is an unused call-clobbered register available
7805 for the entire function. */
7808 ix86_select_alt_pic_regnum (void)
7810 if (current_function_is_leaf && !crtl->profile
7811 && !ix86_current_function_calls_tls_descriptor)
7814 /* Can't use the same register for both PIC and DRAP. */
7816 drap = REGNO (crtl->drap_reg);
7819 for (i = 2; i >= 0; --i)
7820 if (i != drap && !df_regs_ever_live_p (i))
7824 return INVALID_REGNUM;
7827 /* Return 1 if we need to save REGNO. */
7829 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7831 if (pic_offset_table_rtx
7832 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7833 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7835 || crtl->calls_eh_return
7836 || crtl->uses_const_pool))
7838 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7843 if (crtl->calls_eh_return && maybe_eh_return)
7848 unsigned test = EH_RETURN_DATA_REGNO (i);
7849 if (test == INVALID_REGNUM)
7856 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
7859 return (df_regs_ever_live_p (regno)
7860 && !call_used_regs[regno]
7861 && !fixed_regs[regno]
7862 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7865 /* Return number of saved general prupose registers. */
7868 ix86_nsaved_regs (void)
7873 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7874 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7879 /* Return number of saved SSE registrers. */
7882 ix86_nsaved_sseregs (void)
7887 if (ix86_cfun_abi () != MS_ABI)
7889 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7890 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7895 /* Given FROM and TO register numbers, say whether this elimination is
7896 allowed. If stack alignment is needed, we can only replace argument
7897 pointer with hard frame pointer, or replace frame pointer with stack
7898 pointer. Otherwise, frame pointer elimination is automatically
7899 handled and all other eliminations are valid. */
7902 ix86_can_eliminate (const int from, const int to)
7904 if (stack_realign_fp)
7905 return ((from == ARG_POINTER_REGNUM
7906 && to == HARD_FRAME_POINTER_REGNUM)
7907 || (from == FRAME_POINTER_REGNUM
7908 && to == STACK_POINTER_REGNUM));
7910 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
7913 /* Return the offset between two registers, one to be eliminated, and the other
7914 its replacement, at the start of a routine. */
7917 ix86_initial_elimination_offset (int from, int to)
7919 struct ix86_frame frame;
7920 ix86_compute_frame_layout (&frame);
7922 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7923 return frame.hard_frame_pointer_offset;
7924 else if (from == FRAME_POINTER_REGNUM
7925 && to == HARD_FRAME_POINTER_REGNUM)
7926 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7929 gcc_assert (to == STACK_POINTER_REGNUM);
7931 if (from == ARG_POINTER_REGNUM)
7932 return frame.stack_pointer_offset;
7934 gcc_assert (from == FRAME_POINTER_REGNUM);
7935 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7939 /* In a dynamically-aligned function, we can't know the offset from
7940 stack pointer to frame pointer, so we must ensure that setjmp
7941 eliminates fp against the hard fp (%ebp) rather than trying to
7942 index from %esp up to the top of the frame across a gap that is
7943 of unknown (at compile-time) size. */
7945 ix86_builtin_setjmp_frame_value (void)
7947 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7950 /* Fill structure ix86_frame about frame of currently computed function. */
7953 ix86_compute_frame_layout (struct ix86_frame *frame)
7955 unsigned int stack_alignment_needed;
7956 HOST_WIDE_INT offset;
7957 unsigned int preferred_alignment;
7958 HOST_WIDE_INT size = get_frame_size ();
7960 frame->nregs = ix86_nsaved_regs ();
7961 frame->nsseregs = ix86_nsaved_sseregs ();
7963 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7964 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7966 /* MS ABI seem to require stack alignment to be always 16 except for function
7968 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7970 preferred_alignment = 16;
7971 stack_alignment_needed = 16;
7972 crtl->preferred_stack_boundary = 128;
7973 crtl->stack_alignment_needed = 128;
7976 gcc_assert (!size || stack_alignment_needed);
7977 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7978 gcc_assert (preferred_alignment <= stack_alignment_needed);
7980 /* During reload iteration the amount of registers saved can change.
7981 Recompute the value as needed. Do not recompute when amount of registers
7982 didn't change as reload does multiple calls to the function and does not
7983 expect the decision to change within single iteration. */
7984 if (!optimize_function_for_size_p (cfun)
7985 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7987 int count = frame->nregs;
7989 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7990 /* The fast prologue uses move instead of push to save registers. This
7991 is significantly longer, but also executes faster as modern hardware
7992 can execute the moves in parallel, but can't do that for push/pop.
7994 Be careful about choosing what prologue to emit: When function takes
7995 many instructions to execute we may use slow version as well as in
7996 case function is known to be outside hot spot (this is known with
7997 feedback only). Weight the size of function by number of registers
7998 to save as it is cheap to use one or two push instructions but very
7999 slow to use many of them. */
8001 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8002 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
8003 || (flag_branch_probabilities
8004 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
8005 cfun->machine->use_fast_prologue_epilogue = false;
8007 cfun->machine->use_fast_prologue_epilogue
8008 = !expensive_function_p (count);
8010 if (TARGET_PROLOGUE_USING_MOVE
8011 && cfun->machine->use_fast_prologue_epilogue)
8012 frame->save_regs_using_mov = true;
8014 frame->save_regs_using_mov = false;
8016 /* Skip return address. */
8017 offset = UNITS_PER_WORD;
8019 /* Skip pushed static chain. */
8020 if (ix86_static_chain_on_stack)
8021 offset += UNITS_PER_WORD;
8023 /* Skip saved base pointer. */
8024 if (frame_pointer_needed)
8025 offset += UNITS_PER_WORD;
8027 frame->hard_frame_pointer_offset = offset;
8029 /* Set offset to aligned because the realigned frame starts from
8031 if (stack_realign_fp)
8032 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
8034 /* Register save area */
8035 offset += frame->nregs * UNITS_PER_WORD;
8037 /* Align SSE reg save area. */
8038 if (frame->nsseregs)
8039 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
8041 frame->padding0 = 0;
8043 /* SSE register save area. */
8044 offset += frame->padding0 + frame->nsseregs * 16;
8047 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8048 offset += frame->va_arg_size;
8050 /* Align start of frame for local function. */
8051 frame->padding1 = ((offset + stack_alignment_needed - 1)
8052 & -stack_alignment_needed) - offset;
8054 offset += frame->padding1;
8056 /* Frame pointer points here. */
8057 frame->frame_pointer_offset = offset;
8061 /* Add outgoing arguments area. Can be skipped if we eliminated
8062 all the function calls as dead code.
8063 Skipping is however impossible when function calls alloca. Alloca
8064 expander assumes that last crtl->outgoing_args_size
8065 of stack frame are unused. */
8066 if (ACCUMULATE_OUTGOING_ARGS
8067 && (!current_function_is_leaf || cfun->calls_alloca
8068 || ix86_current_function_calls_tls_descriptor))
8070 offset += crtl->outgoing_args_size;
8071 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8074 frame->outgoing_arguments_size = 0;
8076 /* Align stack boundary. Only needed if we're calling another function
8078 if (!current_function_is_leaf || cfun->calls_alloca
8079 || ix86_current_function_calls_tls_descriptor)
8080 frame->padding2 = ((offset + preferred_alignment - 1)
8081 & -preferred_alignment) - offset;
8083 frame->padding2 = 0;
8085 offset += frame->padding2;
8087 /* We've reached end of stack frame. */
8088 frame->stack_pointer_offset = offset;
8090 /* Size prologue needs to allocate. */
8091 frame->to_allocate =
8092 (size + frame->padding1 + frame->padding2
8093 + frame->outgoing_arguments_size + frame->va_arg_size);
8095 if ((!frame->to_allocate && frame->nregs <= 1)
8096 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
8097 frame->save_regs_using_mov = false;
8099 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8100 && current_function_sp_is_unchanging
8101 && current_function_is_leaf
8102 && !ix86_current_function_calls_tls_descriptor)
8104 frame->red_zone_size = frame->to_allocate;
8105 if (frame->save_regs_using_mov)
8106 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8107 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8108 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8111 frame->red_zone_size = 0;
8112 frame->to_allocate -= frame->red_zone_size;
8113 frame->stack_pointer_offset -= frame->red_zone_size;
8116 /* Emit code to save registers in the prologue. */
8119 ix86_emit_save_regs (void)
8124 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8125 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8127 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8128 RTX_FRAME_RELATED_P (insn) = 1;
8132 /* Emit code to save registers using MOV insns. First register
8133 is restored from POINTER + OFFSET. */
8135 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8140 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8141 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8143 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8145 gen_rtx_REG (Pmode, regno));
8146 RTX_FRAME_RELATED_P (insn) = 1;
8147 offset += UNITS_PER_WORD;
8151 /* Emit code to save registers using MOV insns. First register
8152 is restored from POINTER + OFFSET. */
8154 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8160 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8161 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8163 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8164 set_mem_align (mem, 128);
8165 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8166 RTX_FRAME_RELATED_P (insn) = 1;
8171 static GTY(()) rtx queued_cfa_restores;
8173 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8174 manipulation insn. Don't add it if the previously
8175 saved value will be left untouched within stack red-zone till return,
8176 as unwinders can find the same value in the register and
8180 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8183 && !TARGET_64BIT_MS_ABI
8184 && red_offset + RED_ZONE_SIZE >= 0
8185 && crtl->args.pops_args < 65536)
8190 add_reg_note (insn, REG_CFA_RESTORE, reg);
8191 RTX_FRAME_RELATED_P (insn) = 1;
8195 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8198 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8201 ix86_add_queued_cfa_restore_notes (rtx insn)
8204 if (!queued_cfa_restores)
8206 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8208 XEXP (last, 1) = REG_NOTES (insn);
8209 REG_NOTES (insn) = queued_cfa_restores;
8210 queued_cfa_restores = NULL_RTX;
8211 RTX_FRAME_RELATED_P (insn) = 1;
8214 /* Expand prologue or epilogue stack adjustment.
8215 The pattern exist to put a dependency on all ebp-based memory accesses.
8216 STYLE should be negative if instructions should be marked as frame related,
8217 zero if %r11 register is live and cannot be freely used and positive
8221 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8222 int style, bool set_cfa)
8227 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8228 else if (x86_64_immediate_operand (offset, DImode))
8229 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8233 /* r11 is used by indirect sibcall return as well, set before the
8234 epilogue and used after the epilogue. ATM indirect sibcall
8235 shouldn't be used together with huge frame sizes in one
8236 function because of the frame_size check in sibcall.c. */
8238 r11 = gen_rtx_REG (DImode, R11_REG);
8239 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8241 RTX_FRAME_RELATED_P (insn) = 1;
8242 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8247 ix86_add_queued_cfa_restore_notes (insn);
8253 gcc_assert (ix86_cfa_state->reg == src);
8254 ix86_cfa_state->offset += INTVAL (offset);
8255 ix86_cfa_state->reg = dest;
8257 r = gen_rtx_PLUS (Pmode, src, offset);
8258 r = gen_rtx_SET (VOIDmode, dest, r);
8259 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8260 RTX_FRAME_RELATED_P (insn) = 1;
8263 RTX_FRAME_RELATED_P (insn) = 1;
8266 /* Find an available register to be used as dynamic realign argument
8267 pointer regsiter. Such a register will be written in prologue and
8268 used in begin of body, so it must not be
8269 1. parameter passing register.
8271 We reuse static-chain register if it is available. Otherwise, we
8272 use DI for i386 and R13 for x86-64. We chose R13 since it has
8275 Return: the regno of chosen register. */
8278 find_drap_reg (void)
8280 tree decl = cfun->decl;
8284 /* Use R13 for nested function or function need static chain.
8285 Since function with tail call may use any caller-saved
8286 registers in epilogue, DRAP must not use caller-saved
8287 register in such case. */
8288 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8295 /* Use DI for nested function or function need static chain.
8296 Since function with tail call may use any caller-saved
8297 registers in epilogue, DRAP must not use caller-saved
8298 register in such case. */
8299 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8302 /* Reuse static chain register if it isn't used for parameter
8304 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8305 && !lookup_attribute ("fastcall",
8306 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8313 /* Return minimum incoming stack alignment. */
8316 ix86_minimum_incoming_stack_boundary (bool sibcall)
8318 unsigned int incoming_stack_boundary;
8320 /* Prefer the one specified at command line. */
8321 if (ix86_user_incoming_stack_boundary)
8322 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8323 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8324 if -mstackrealign is used, it isn't used for sibcall check and
8325 estimated stack alignment is 128bit. */
8328 && ix86_force_align_arg_pointer
8329 && crtl->stack_alignment_estimated == 128)
8330 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8332 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8334 /* Incoming stack alignment can be changed on individual functions
8335 via force_align_arg_pointer attribute. We use the smallest
8336 incoming stack boundary. */
8337 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8338 && lookup_attribute (ix86_force_align_arg_pointer_string,
8339 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8340 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8342 /* The incoming stack frame has to be aligned at least at
8343 parm_stack_boundary. */
8344 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8345 incoming_stack_boundary = crtl->parm_stack_boundary;
8347 /* Stack at entrance of main is aligned by runtime. We use the
8348 smallest incoming stack boundary. */
8349 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8350 && DECL_NAME (current_function_decl)
8351 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8352 && DECL_FILE_SCOPE_P (current_function_decl))
8353 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8355 return incoming_stack_boundary;
8358 /* Update incoming stack boundary and estimated stack alignment. */
8361 ix86_update_stack_boundary (void)
8363 ix86_incoming_stack_boundary
8364 = ix86_minimum_incoming_stack_boundary (false);
8366 /* x86_64 vararg needs 16byte stack alignment for register save
8370 && crtl->stack_alignment_estimated < 128)
8371 crtl->stack_alignment_estimated = 128;
8374 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8375 needed or an rtx for DRAP otherwise. */
8378 ix86_get_drap_rtx (void)
8380 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8381 crtl->need_drap = true;
8383 if (stack_realign_drap)
8385 /* Assign DRAP to vDRAP and returns vDRAP */
8386 unsigned int regno = find_drap_reg ();
8391 arg_ptr = gen_rtx_REG (Pmode, regno);
8392 crtl->drap_reg = arg_ptr;
8395 drap_vreg = copy_to_reg (arg_ptr);
8399 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8402 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8403 RTX_FRAME_RELATED_P (insn) = 1;
8411 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8414 ix86_internal_arg_pointer (void)
8416 return virtual_incoming_args_rtx;
8419 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8420 to be generated in correct form. */
8422 ix86_finalize_stack_realign_flags (void)
8424 /* Check if stack realign is really needed after reload, and
8425 stores result in cfun */
8426 unsigned int incoming_stack_boundary
8427 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8428 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8429 unsigned int stack_realign = (incoming_stack_boundary
8430 < (current_function_is_leaf
8431 ? crtl->max_used_stack_slot_alignment
8432 : crtl->stack_alignment_needed));
8434 if (crtl->stack_realign_finalized)
8436 /* After stack_realign_needed is finalized, we can't no longer
8438 gcc_assert (crtl->stack_realign_needed == stack_realign);
8442 crtl->stack_realign_needed = stack_realign;
8443 crtl->stack_realign_finalized = true;
8447 /* Expand the prologue into a bunch of separate insns. */
8450 ix86_expand_prologue (void)
8454 struct ix86_frame frame;
8455 HOST_WIDE_INT allocate;
8456 int gen_frame_pointer = frame_pointer_needed;
8458 ix86_finalize_stack_realign_flags ();
8460 /* DRAP should not coexist with stack_realign_fp */
8461 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8463 /* Initialize CFA state for before the prologue. */
8464 ix86_cfa_state->reg = stack_pointer_rtx;
8465 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
8467 ix86_compute_frame_layout (&frame);
8469 if (ix86_function_ms_hook_prologue (current_function_decl))
8473 /* Make sure the function starts with
8474 8b ff movl.s %edi,%edi
8476 8b ec movl.s %esp,%ebp
8478 This matches the hookable function prologue in Win32 API
8479 functions in Microsoft Windows XP Service Pack 2 and newer.
8480 Wine uses this to enable Windows apps to hook the Win32 API
8481 functions provided by Wine. */
8482 insn = emit_insn (gen_vswapmov (gen_rtx_REG (SImode, DI_REG),
8483 gen_rtx_REG (SImode, DI_REG)));
8484 push = emit_insn (gen_push (hard_frame_pointer_rtx));
8485 mov = emit_insn (gen_vswapmov (hard_frame_pointer_rtx,
8486 stack_pointer_rtx));
8488 if (frame_pointer_needed && !(crtl->drap_reg
8489 && crtl->stack_realign_needed))
8491 /* The push %ebp and movl.s %esp, %ebp already set up
8492 the frame pointer. No need to do this again. */
8493 gen_frame_pointer = 0;
8494 RTX_FRAME_RELATED_P (push) = 1;
8495 RTX_FRAME_RELATED_P (mov) = 1;
8496 if (ix86_cfa_state->reg == stack_pointer_rtx)
8497 ix86_cfa_state->reg = hard_frame_pointer_rtx;
8500 /* If the frame pointer is not needed, pop %ebp again. This
8501 could be optimized for cases where ebp needs to be backed up
8502 for some other reason. If stack realignment is needed, pop
8503 the base pointer again, align the stack, and later regenerate
8504 the frame pointer setup. The frame pointer generated by the
8505 hook prologue is not aligned, so it can't be used. */
8506 insn = emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8509 /* The first insn of a function that accepts its static chain on the
8510 stack is to push the register that would be filled in by a direct
8511 call. This insn will be skipped by the trampoline. */
8512 if (ix86_static_chain_on_stack)
8516 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
8517 emit_insn (gen_blockage ());
8519 /* We don't want to interpret this push insn as a register save,
8520 only as a stack adjustment. The real copy of the register as
8521 a save will be done later, if needed. */
8522 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
8523 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8524 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8525 RTX_FRAME_RELATED_P (insn) = 1;
8528 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8529 of DRAP is needed and stack realignment is really needed after reload */
8530 if (crtl->drap_reg && crtl->stack_realign_needed)
8533 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8534 int param_ptr_offset = UNITS_PER_WORD;
8536 if (ix86_static_chain_on_stack)
8537 param_ptr_offset += UNITS_PER_WORD;
8538 if (!call_used_regs[REGNO (crtl->drap_reg)])
8539 param_ptr_offset += UNITS_PER_WORD;
8541 gcc_assert (stack_realign_drap);
8543 /* Grab the argument pointer. */
8544 x = plus_constant (stack_pointer_rtx, param_ptr_offset);
8547 /* Only need to push parameter pointer reg if it is caller
8549 if (!call_used_regs[REGNO (crtl->drap_reg)])
8551 /* Push arg pointer reg */
8552 insn = emit_insn (gen_push (y));
8553 RTX_FRAME_RELATED_P (insn) = 1;
8556 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8557 RTX_FRAME_RELATED_P (insn) = 1;
8558 ix86_cfa_state->reg = crtl->drap_reg;
8560 /* Align the stack. */
8561 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8563 GEN_INT (-align_bytes)));
8564 RTX_FRAME_RELATED_P (insn) = 1;
8566 /* Replicate the return address on the stack so that return
8567 address can be reached via (argp - 1) slot. This is needed
8568 to implement macro RETURN_ADDR_RTX and intrinsic function
8569 expand_builtin_return_addr etc. */
8571 x = gen_frame_mem (Pmode,
8572 plus_constant (x, -UNITS_PER_WORD));
8573 insn = emit_insn (gen_push (x));
8574 RTX_FRAME_RELATED_P (insn) = 1;
8577 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8578 slower on all targets. Also sdb doesn't like it. */
8580 if (gen_frame_pointer)
8582 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8583 RTX_FRAME_RELATED_P (insn) = 1;
8585 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8586 RTX_FRAME_RELATED_P (insn) = 1;
8588 if (ix86_cfa_state->reg == stack_pointer_rtx)
8589 ix86_cfa_state->reg = hard_frame_pointer_rtx;
8592 if (stack_realign_fp)
8594 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8595 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8597 /* Align the stack. */
8598 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8600 GEN_INT (-align_bytes)));
8601 RTX_FRAME_RELATED_P (insn) = 1;
8604 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
8606 if (!frame.save_regs_using_mov)
8607 ix86_emit_save_regs ();
8609 allocate += frame.nregs * UNITS_PER_WORD;
8611 /* When using red zone we may start register saving before allocating
8612 the stack frame saving one cycle of the prologue. However I will
8613 avoid doing this if I am going to have to probe the stack since
8614 at least on x86_64 the stack probe can turn into a call that clobbers
8615 a red zone location */
8616 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8617 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8618 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8619 && !crtl->stack_realign_needed)
8620 ? hard_frame_pointer_rtx
8621 : stack_pointer_rtx,
8622 -frame.nregs * UNITS_PER_WORD);
8626 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8627 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8628 GEN_INT (-allocate), -1,
8629 ix86_cfa_state->reg == stack_pointer_rtx);
8632 /* Only valid for Win32. */
8633 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8637 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI);
8639 if (cfun->machine->call_abi == MS_ABI)
8642 eax_live = ix86_eax_live_at_start_p ();
8646 emit_insn (gen_push (eax));
8647 allocate -= UNITS_PER_WORD;
8650 emit_move_insn (eax, GEN_INT (allocate));
8653 insn = gen_allocate_stack_worker_64 (eax, eax);
8655 insn = gen_allocate_stack_worker_32 (eax, eax);
8656 insn = emit_insn (insn);
8658 if (ix86_cfa_state->reg == stack_pointer_rtx)
8660 ix86_cfa_state->offset += allocate;
8661 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8662 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8663 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
8664 RTX_FRAME_RELATED_P (insn) = 1;
8669 if (frame_pointer_needed)
8670 t = plus_constant (hard_frame_pointer_rtx,
8673 - frame.nregs * UNITS_PER_WORD);
8675 t = plus_constant (stack_pointer_rtx, allocate);
8676 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8680 if (frame.save_regs_using_mov
8681 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8682 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8684 if (!frame_pointer_needed
8685 || !(frame.to_allocate + frame.padding0)
8686 || crtl->stack_realign_needed)
8687 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8689 + frame.nsseregs * 16 + frame.padding0);
8691 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8692 -frame.nregs * UNITS_PER_WORD);
8694 if (!frame_pointer_needed
8695 || !(frame.to_allocate + frame.padding0)
8696 || crtl->stack_realign_needed)
8697 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8700 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8701 - frame.nregs * UNITS_PER_WORD
8702 - frame.nsseregs * 16
8705 pic_reg_used = false;
8706 if (pic_offset_table_rtx
8707 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8710 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8712 if (alt_pic_reg_used != INVALID_REGNUM)
8713 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8715 pic_reg_used = true;
8722 if (ix86_cmodel == CM_LARGE_PIC)
8724 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8725 rtx label = gen_label_rtx ();
8727 LABEL_PRESERVE_P (label) = 1;
8728 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8729 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8730 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8731 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8732 pic_offset_table_rtx, tmp_reg));
8735 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8738 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8741 /* In the pic_reg_used case, make sure that the got load isn't deleted
8742 when mcount needs it. Blockage to avoid call movement across mcount
8743 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8745 if (crtl->profile && pic_reg_used)
8746 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8748 if (crtl->drap_reg && !crtl->stack_realign_needed)
8750 /* vDRAP is setup but after reload it turns out stack realign
8751 isn't necessary, here we will emit prologue to setup DRAP
8752 without stack realign adjustment */
8754 int drap_bp_offset = UNITS_PER_WORD * 2;
8756 if (ix86_static_chain_on_stack)
8757 drap_bp_offset += UNITS_PER_WORD;
8758 x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8759 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8762 /* Prevent instructions from being scheduled into register save push
8763 sequence when access to the redzone area is done through frame pointer.
8764 The offset between the frame pointer and the stack pointer is calculated
8765 relative to the value of the stack pointer at the end of the function
8766 prologue, and moving instructions that access redzone area via frame
8767 pointer inside push sequence violates this assumption. */
8768 if (frame_pointer_needed && frame.red_zone_size)
8769 emit_insn (gen_memory_blockage ());
8771 /* Emit cld instruction if stringops are used in the function. */
8772 if (TARGET_CLD && ix86_current_function_needs_cld)
8773 emit_insn (gen_cld ());
8776 /* Emit code to restore REG using a POP insn. */
8779 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
8781 rtx insn = emit_insn (ix86_gen_pop1 (reg));
8783 if (ix86_cfa_state->reg == crtl->drap_reg
8784 && REGNO (reg) == REGNO (crtl->drap_reg))
8786 /* Previously we'd represented the CFA as an expression
8787 like *(%ebp - 8). We've just popped that value from
8788 the stack, which means we need to reset the CFA to
8789 the drap register. This will remain until we restore
8790 the stack pointer. */
8791 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8792 RTX_FRAME_RELATED_P (insn) = 1;
8796 if (ix86_cfa_state->reg == stack_pointer_rtx)
8798 ix86_cfa_state->offset -= UNITS_PER_WORD;
8799 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8800 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
8801 RTX_FRAME_RELATED_P (insn) = 1;
8804 /* When the frame pointer is the CFA, and we pop it, we are
8805 swapping back to the stack pointer as the CFA. This happens
8806 for stack frames that don't allocate other data, so we assume
8807 the stack pointer is now pointing at the return address, i.e.
8808 the function entry state, which makes the offset be 1 word. */
8809 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
8810 && reg == hard_frame_pointer_rtx)
8812 ix86_cfa_state->reg = stack_pointer_rtx;
8813 ix86_cfa_state->offset -= UNITS_PER_WORD;
8815 add_reg_note (insn, REG_CFA_DEF_CFA,
8816 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8817 GEN_INT (ix86_cfa_state->offset)));
8818 RTX_FRAME_RELATED_P (insn) = 1;
8821 ix86_add_cfa_restore_note (insn, reg, red_offset);
8824 /* Emit code to restore saved registers using POP insns. */
8827 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
8831 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8832 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8834 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
8836 red_offset += UNITS_PER_WORD;
8840 /* Emit code and notes for the LEAVE instruction. */
8843 ix86_emit_leave (HOST_WIDE_INT red_offset)
8845 rtx insn = emit_insn (ix86_gen_leave ());
8847 ix86_add_queued_cfa_restore_notes (insn);
8849 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
8851 ix86_cfa_state->reg = stack_pointer_rtx;
8852 ix86_cfa_state->offset -= UNITS_PER_WORD;
8854 add_reg_note (insn, REG_CFA_ADJUST_CFA,
8855 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
8856 RTX_FRAME_RELATED_P (insn) = 1;
8857 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
8861 /* Emit code to restore saved registers using MOV insns. First register
8862 is restored from POINTER + OFFSET. */
8864 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8865 HOST_WIDE_INT red_offset,
8866 int maybe_eh_return)
8869 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8872 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8873 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8875 rtx reg = gen_rtx_REG (Pmode, regno);
8877 /* Ensure that adjust_address won't be forced to produce pointer
8878 out of range allowed by x86-64 instruction set. */
8879 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8883 r11 = gen_rtx_REG (DImode, R11_REG);
8884 emit_move_insn (r11, GEN_INT (offset));
8885 emit_insn (gen_adddi3 (r11, r11, pointer));
8886 base_address = gen_rtx_MEM (Pmode, r11);
8889 insn = emit_move_insn (reg,
8890 adjust_address (base_address, Pmode, offset));
8891 offset += UNITS_PER_WORD;
8893 if (ix86_cfa_state->reg == crtl->drap_reg
8894 && regno == REGNO (crtl->drap_reg))
8896 /* Previously we'd represented the CFA as an expression
8897 like *(%ebp - 8). We've just popped that value from
8898 the stack, which means we need to reset the CFA to
8899 the drap register. This will remain until we restore
8900 the stack pointer. */
8901 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
8902 RTX_FRAME_RELATED_P (insn) = 1;
8905 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8907 red_offset += UNITS_PER_WORD;
8911 /* Emit code to restore saved registers using MOV insns. First register
8912 is restored from POINTER + OFFSET. */
8914 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8915 HOST_WIDE_INT red_offset,
8916 int maybe_eh_return)
8919 rtx base_address = gen_rtx_MEM (TImode, pointer);
8922 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8923 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8925 rtx reg = gen_rtx_REG (TImode, regno);
8927 /* Ensure that adjust_address won't be forced to produce pointer
8928 out of range allowed by x86-64 instruction set. */
8929 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8933 r11 = gen_rtx_REG (DImode, R11_REG);
8934 emit_move_insn (r11, GEN_INT (offset));
8935 emit_insn (gen_adddi3 (r11, r11, pointer));
8936 base_address = gen_rtx_MEM (TImode, r11);
8939 mem = adjust_address (base_address, TImode, offset);
8940 set_mem_align (mem, 128);
8941 emit_move_insn (reg, mem);
8944 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
8950 /* Restore function stack, frame, and registers. */
8953 ix86_expand_epilogue (int style)
8956 struct ix86_frame frame;
8957 HOST_WIDE_INT offset, red_offset;
8958 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
8961 ix86_finalize_stack_realign_flags ();
8963 /* When stack is realigned, SP must be valid. */
8964 sp_valid = (!frame_pointer_needed
8965 || current_function_sp_is_unchanging
8966 || stack_realign_fp);
8968 ix86_compute_frame_layout (&frame);
8970 /* See the comment about red zone and frame
8971 pointer usage in ix86_expand_prologue. */
8972 if (frame_pointer_needed && frame.red_zone_size)
8973 emit_insn (gen_memory_blockage ());
8975 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
8976 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
8978 /* Calculate start of saved registers relative to ebp. Special care
8979 must be taken for the normal return case of a function using
8980 eh_return: the eax and edx registers are marked as saved, but not
8981 restored along this path. */
8982 offset = frame.nregs;
8983 if (crtl->calls_eh_return && style != 2)
8985 offset *= -UNITS_PER_WORD;
8986 offset -= frame.nsseregs * 16 + frame.padding0;
8988 /* Calculate start of saved registers relative to esp on entry of the
8989 function. When realigning stack, this needs to be the most negative
8990 value possible at runtime. */
8991 red_offset = offset;
8993 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8995 else if (stack_realign_fp)
8996 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
8998 if (ix86_static_chain_on_stack)
8999 red_offset -= UNITS_PER_WORD;
9000 if (frame_pointer_needed)
9001 red_offset -= UNITS_PER_WORD;
9003 /* If we're only restoring one register and sp is not valid then
9004 using a move instruction to restore the register since it's
9005 less work than reloading sp and popping the register.
9007 The default code result in stack adjustment using add/lea instruction,
9008 while this code results in LEAVE instruction (or discrete equivalent),
9009 so it is profitable in some other cases as well. Especially when there
9010 are no registers to restore. We also use this code when TARGET_USE_LEAVE
9011 and there is exactly one register to pop. This heuristic may need some
9012 tuning in future. */
9013 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
9014 || (TARGET_EPILOGUE_USING_MOVE
9015 && cfun->machine->use_fast_prologue_epilogue
9016 && ((frame.nregs + frame.nsseregs) > 1
9017 || (frame.to_allocate + frame.padding0) != 0))
9018 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
9019 && (frame.to_allocate + frame.padding0) != 0)
9020 || (frame_pointer_needed && TARGET_USE_LEAVE
9021 && cfun->machine->use_fast_prologue_epilogue
9022 && (frame.nregs + frame.nsseregs) == 1)
9023 || crtl->calls_eh_return)
9025 /* Restore registers. We can use ebp or esp to address the memory
9026 locations. If both are available, default to ebp, since offsets
9027 are known to be small. Only exception is esp pointing directly
9028 to the end of block of saved registers, where we may simplify
9031 If we are realigning stack with bp and sp, regs restore can't
9032 be addressed by bp. sp must be used instead. */
9034 if (!frame_pointer_needed
9035 || (sp_valid && !(frame.to_allocate + frame.padding0))
9036 || stack_realign_fp)
9038 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9039 frame.to_allocate, red_offset,
9041 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
9043 + frame.nsseregs * 16
9046 + frame.nsseregs * 16
9047 + frame.padding0, style == 2);
9051 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
9054 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
9056 + frame.nsseregs * 16
9059 + frame.nsseregs * 16
9060 + frame.padding0, style == 2);
9063 red_offset -= offset;
9065 /* eh_return epilogues need %ecx added to the stack pointer. */
9068 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
9070 /* Stack align doesn't work with eh_return. */
9071 gcc_assert (!crtl->stack_realign_needed);
9072 /* Neither does regparm nested functions. */
9073 gcc_assert (!ix86_static_chain_on_stack);
9075 if (frame_pointer_needed)
9077 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9078 tmp = plus_constant (tmp, UNITS_PER_WORD);
9079 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
9081 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
9082 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
9084 /* Note that we use SA as a temporary CFA, as the return
9085 address is at the proper place relative to it. We
9086 pretend this happens at the FP restore insn because
9087 prior to this insn the FP would be stored at the wrong
9088 offset relative to SA, and after this insn we have no
9089 other reasonable register to use for the CFA. We don't
9090 bother resetting the CFA to the SP for the duration of
9092 add_reg_note (tmp, REG_CFA_DEF_CFA,
9093 plus_constant (sa, UNITS_PER_WORD));
9094 ix86_add_queued_cfa_restore_notes (tmp);
9095 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
9096 RTX_FRAME_RELATED_P (tmp) = 1;
9097 ix86_cfa_state->reg = sa;
9098 ix86_cfa_state->offset = UNITS_PER_WORD;
9100 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9101 const0_rtx, style, false);
9105 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9106 tmp = plus_constant (tmp, (frame.to_allocate
9107 + frame.nregs * UNITS_PER_WORD
9108 + frame.nsseregs * 16
9110 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
9111 ix86_add_queued_cfa_restore_notes (tmp);
9113 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9114 if (ix86_cfa_state->offset != UNITS_PER_WORD)
9116 ix86_cfa_state->offset = UNITS_PER_WORD;
9117 add_reg_note (tmp, REG_CFA_DEF_CFA,
9118 plus_constant (stack_pointer_rtx,
9120 RTX_FRAME_RELATED_P (tmp) = 1;
9124 else if (!frame_pointer_needed)
9125 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9126 GEN_INT (frame.to_allocate
9127 + frame.nregs * UNITS_PER_WORD
9128 + frame.nsseregs * 16
9130 style, !using_drap);
9131 /* If not an i386, mov & pop is faster than "leave". */
9132 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
9133 || !cfun->machine->use_fast_prologue_epilogue)
9134 ix86_emit_leave (red_offset);
9137 pro_epilogue_adjust_stack (stack_pointer_rtx,
9138 hard_frame_pointer_rtx,
9139 const0_rtx, style, !using_drap);
9141 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
9146 /* First step is to deallocate the stack frame so that we can
9149 If we realign stack with frame pointer, then stack pointer
9150 won't be able to recover via lea $offset(%bp), %sp, because
9151 there is a padding area between bp and sp for realign.
9152 "add $to_allocate, %sp" must be used instead. */
9155 gcc_assert (frame_pointer_needed);
9156 gcc_assert (!stack_realign_fp);
9157 pro_epilogue_adjust_stack (stack_pointer_rtx,
9158 hard_frame_pointer_rtx,
9159 GEN_INT (offset), style, false);
9160 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9163 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9164 GEN_INT (frame.nsseregs * 16
9168 else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
9170 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9171 frame.to_allocate, red_offset,
9173 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9174 GEN_INT (frame.to_allocate
9175 + frame.nsseregs * 16
9176 + frame.padding0), style,
9177 !using_drap && !frame_pointer_needed);
9180 ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
9182 red_offset -= offset;
9184 if (frame_pointer_needed)
9186 /* Leave results in shorter dependency chains on CPUs that are
9187 able to grok it fast. */
9188 if (TARGET_USE_LEAVE)
9189 ix86_emit_leave (red_offset);
9192 /* For stack realigned really happens, recover stack
9193 pointer to hard frame pointer is a must, if not using
9195 if (stack_realign_fp)
9196 pro_epilogue_adjust_stack (stack_pointer_rtx,
9197 hard_frame_pointer_rtx,
9198 const0_rtx, style, !using_drap);
9199 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
9207 int param_ptr_offset = UNITS_PER_WORD;
9210 gcc_assert (stack_realign_drap);
9212 if (ix86_static_chain_on_stack)
9213 param_ptr_offset += UNITS_PER_WORD;
9214 if (!call_used_regs[REGNO (crtl->drap_reg)])
9215 param_ptr_offset += UNITS_PER_WORD;
9217 insn = emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
9219 GEN_INT (-param_ptr_offset)));
9221 ix86_cfa_state->reg = stack_pointer_rtx;
9222 ix86_cfa_state->offset = param_ptr_offset;
9224 add_reg_note (insn, REG_CFA_DEF_CFA,
9225 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
9226 GEN_INT (ix86_cfa_state->offset)));
9227 RTX_FRAME_RELATED_P (insn) = 1;
9229 if (!call_used_regs[REGNO (crtl->drap_reg)])
9230 ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
9233 /* Remove the saved static chain from the stack. The use of ECX is
9234 merely as a scratch register, not as the actual static chain. */
9235 if (ix86_static_chain_on_stack)
9239 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9240 ix86_cfa_state->offset += UNITS_PER_WORD;
9242 r = gen_rtx_REG (Pmode, CX_REG);
9243 insn = emit_insn (ix86_gen_pop1 (r));
9245 r = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9246 r = gen_rtx_SET (VOIDmode, stack_pointer_rtx, r);
9247 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
9248 RTX_FRAME_RELATED_P (insn) = 1;
9251 /* Sibcall epilogues don't want a return instruction. */
9254 *ix86_cfa_state = cfa_state_save;
9258 if (crtl->args.pops_args && crtl->args.size)
9260 rtx popc = GEN_INT (crtl->args.pops_args);
9262 /* i386 can only pop 64K bytes. If asked to pop more, pop return
9263 address, do explicit add, and jump indirectly to the caller. */
9265 if (crtl->args.pops_args >= 65536)
9267 rtx ecx = gen_rtx_REG (SImode, CX_REG);
9270 /* There is no "pascal" calling convention in any 64bit ABI. */
9271 gcc_assert (!TARGET_64BIT);
9273 insn = emit_insn (gen_popsi1 (ecx));
9274 ix86_cfa_state->offset -= UNITS_PER_WORD;
9276 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9277 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9278 add_reg_note (insn, REG_CFA_REGISTER,
9279 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
9280 RTX_FRAME_RELATED_P (insn) = 1;
9282 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9284 emit_jump_insn (gen_return_indirect_internal (ecx));
9287 emit_jump_insn (gen_return_pop_internal (popc));
9290 emit_jump_insn (gen_return_internal ());
9292 /* Restore the state back to the state from the prologue,
9293 so that it's correct for the next epilogue. */
9294 *ix86_cfa_state = cfa_state_save;
9297 /* Reset from the function's potential modifications. */
9300 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9301 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
9303 if (pic_offset_table_rtx)
9304 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
9306 /* Mach-O doesn't support labels at the end of objects, so if
9307 it looks like we might want one, insert a NOP. */
9309 rtx insn = get_last_insn ();
9312 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
9313 insn = PREV_INSN (insn);
9317 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
9318 fputs ("\tnop\n", file);
9324 /* Extract the parts of an RTL expression that is a valid memory address
9325 for an instruction. Return 0 if the structure of the address is
9326 grossly off. Return -1 if the address contains ASHIFT, so it is not
9327 strictly valid, but still used for computing length of lea instruction. */
9330 ix86_decompose_address (rtx addr, struct ix86_address *out)
9332 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
9333 rtx base_reg, index_reg;
9334 HOST_WIDE_INT scale = 1;
9335 rtx scale_rtx = NULL_RTX;
9337 enum ix86_address_seg seg = SEG_DEFAULT;
9339 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
9341 else if (GET_CODE (addr) == PLUS)
9351 addends[n++] = XEXP (op, 1);
9354 while (GET_CODE (op) == PLUS);
9359 for (i = n; i >= 0; --i)
9362 switch (GET_CODE (op))
9367 index = XEXP (op, 0);
9368 scale_rtx = XEXP (op, 1);
9372 if (XINT (op, 1) == UNSPEC_TP
9373 && TARGET_TLS_DIRECT_SEG_REFS
9374 && seg == SEG_DEFAULT)
9375 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
9404 else if (GET_CODE (addr) == MULT)
9406 index = XEXP (addr, 0); /* index*scale */
9407 scale_rtx = XEXP (addr, 1);
9409 else if (GET_CODE (addr) == ASHIFT)
9413 /* We're called for lea too, which implements ashift on occasion. */
9414 index = XEXP (addr, 0);
9415 tmp = XEXP (addr, 1);
9416 if (!CONST_INT_P (tmp))
9418 scale = INTVAL (tmp);
9419 if ((unsigned HOST_WIDE_INT) scale > 3)
9425 disp = addr; /* displacement */
9427 /* Extract the integral value of scale. */
9430 if (!CONST_INT_P (scale_rtx))
9432 scale = INTVAL (scale_rtx);
9435 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
9436 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
9438 /* Avoid useless 0 displacement. */
9439 if (disp == const0_rtx && (base || index))
9442 /* Allow arg pointer and stack pointer as index if there is not scaling. */
9443 if (base_reg && index_reg && scale == 1
9444 && (index_reg == arg_pointer_rtx
9445 || index_reg == frame_pointer_rtx
9446 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
9449 tmp = base, base = index, index = tmp;
9450 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
9453 /* Special case: %ebp cannot be encoded as a base without a displacement.
9457 && (base_reg == hard_frame_pointer_rtx
9458 || base_reg == frame_pointer_rtx
9459 || base_reg == arg_pointer_rtx
9460 || (REG_P (base_reg)
9461 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
9462 || REGNO (base_reg) == R13_REG))))
9465 /* Special case: on K6, [%esi] makes the instruction vector decoded.
9466 Avoid this by transforming to [%esi+0].
9467 Reload calls address legitimization without cfun defined, so we need
9468 to test cfun for being non-NULL. */
9469 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
9470 && base_reg && !index_reg && !disp
9472 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
9475 /* Special case: encode reg+reg instead of reg*2. */
9476 if (!base && index && scale == 2)
9477 base = index, base_reg = index_reg, scale = 1;
9479 /* Special case: scaling cannot be encoded without base or displacement. */
9480 if (!base && !disp && index && scale != 1)
9492 /* Return cost of the memory address x.
9493 For i386, it is better to use a complex address than let gcc copy
9494 the address into a reg and make a new pseudo. But not if the address
9495 requires to two regs - that would mean more pseudos with longer
9498 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
9500 struct ix86_address parts;
9502 int ok = ix86_decompose_address (x, &parts);
9506 if (parts.base && GET_CODE (parts.base) == SUBREG)
9507 parts.base = SUBREG_REG (parts.base);
9508 if (parts.index && GET_CODE (parts.index) == SUBREG)
9509 parts.index = SUBREG_REG (parts.index);
9511 /* Attempt to minimize number of registers in the address. */
9513 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9515 && (!REG_P (parts.index)
9516 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9520 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9522 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9523 && parts.base != parts.index)
9526 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9527 since it's predecode logic can't detect the length of instructions
9528 and it degenerates to vector decoded. Increase cost of such
9529 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9530 to split such addresses or even refuse such addresses at all.
9532 Following addressing modes are affected:
9537 The first and last case may be avoidable by explicitly coding the zero in
9538 memory address, but I don't have AMD-K6 machine handy to check this
9542 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9543 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9544 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9550 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9551 this is used for to form addresses to local data when -fPIC is in
9555 darwin_local_data_pic (rtx disp)
9557 return (GET_CODE (disp) == UNSPEC
9558 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9561 /* Determine if a given RTX is a valid constant. We already know this
9562 satisfies CONSTANT_P. */
9565 legitimate_constant_p (rtx x)
9567 switch (GET_CODE (x))
9572 if (GET_CODE (x) == PLUS)
9574 if (!CONST_INT_P (XEXP (x, 1)))
9579 if (TARGET_MACHO && darwin_local_data_pic (x))
9582 /* Only some unspecs are valid as "constants". */
9583 if (GET_CODE (x) == UNSPEC)
9584 switch (XINT (x, 1))
9589 return TARGET_64BIT;
9592 x = XVECEXP (x, 0, 0);
9593 return (GET_CODE (x) == SYMBOL_REF
9594 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9596 x = XVECEXP (x, 0, 0);
9597 return (GET_CODE (x) == SYMBOL_REF
9598 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9603 /* We must have drilled down to a symbol. */
9604 if (GET_CODE (x) == LABEL_REF)
9606 if (GET_CODE (x) != SYMBOL_REF)
9611 /* TLS symbols are never valid. */
9612 if (SYMBOL_REF_TLS_MODEL (x))
9615 /* DLLIMPORT symbols are never valid. */
9616 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9617 && SYMBOL_REF_DLLIMPORT_P (x))
9622 if (GET_MODE (x) == TImode
9623 && x != CONST0_RTX (TImode)
9629 if (!standard_sse_constant_p (x))
9636 /* Otherwise we handle everything else in the move patterns. */
9640 /* Determine if it's legal to put X into the constant pool. This
9641 is not possible for the address of thread-local symbols, which
9642 is checked above. */
9645 ix86_cannot_force_const_mem (rtx x)
9647 /* We can always put integral constants and vectors in memory. */
9648 switch (GET_CODE (x))
9658 return !legitimate_constant_p (x);
9662 /* Nonzero if the constant value X is a legitimate general operand
9663 when generating PIC code. It is given that flag_pic is on and
9664 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9667 legitimate_pic_operand_p (rtx x)
9671 switch (GET_CODE (x))
9674 inner = XEXP (x, 0);
9675 if (GET_CODE (inner) == PLUS
9676 && CONST_INT_P (XEXP (inner, 1)))
9677 inner = XEXP (inner, 0);
9679 /* Only some unspecs are valid as "constants". */
9680 if (GET_CODE (inner) == UNSPEC)
9681 switch (XINT (inner, 1))
9686 return TARGET_64BIT;
9688 x = XVECEXP (inner, 0, 0);
9689 return (GET_CODE (x) == SYMBOL_REF
9690 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9691 case UNSPEC_MACHOPIC_OFFSET:
9692 return legitimate_pic_address_disp_p (x);
9700 return legitimate_pic_address_disp_p (x);
9707 /* Determine if a given CONST RTX is a valid memory displacement
9711 legitimate_pic_address_disp_p (rtx disp)
9715 /* In 64bit mode we can allow direct addresses of symbols and labels
9716 when they are not dynamic symbols. */
9719 rtx op0 = disp, op1;
9721 switch (GET_CODE (disp))
9727 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9729 op0 = XEXP (XEXP (disp, 0), 0);
9730 op1 = XEXP (XEXP (disp, 0), 1);
9731 if (!CONST_INT_P (op1)
9732 || INTVAL (op1) >= 16*1024*1024
9733 || INTVAL (op1) < -16*1024*1024)
9735 if (GET_CODE (op0) == LABEL_REF)
9737 if (GET_CODE (op0) != SYMBOL_REF)
9742 /* TLS references should always be enclosed in UNSPEC. */
9743 if (SYMBOL_REF_TLS_MODEL (op0))
9745 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9746 && ix86_cmodel != CM_LARGE_PIC)
9754 if (GET_CODE (disp) != CONST)
9756 disp = XEXP (disp, 0);
9760 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9761 of GOT tables. We should not need these anyway. */
9762 if (GET_CODE (disp) != UNSPEC
9763 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9764 && XINT (disp, 1) != UNSPEC_GOTOFF
9765 && XINT (disp, 1) != UNSPEC_PLTOFF))
9768 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9769 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9775 if (GET_CODE (disp) == PLUS)
9777 if (!CONST_INT_P (XEXP (disp, 1)))
9779 disp = XEXP (disp, 0);
9783 if (TARGET_MACHO && darwin_local_data_pic (disp))
9786 if (GET_CODE (disp) != UNSPEC)
9789 switch (XINT (disp, 1))
9794 /* We need to check for both symbols and labels because VxWorks loads
9795 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9797 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9798 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9800 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9801 While ABI specify also 32bit relocation but we don't produce it in
9802 small PIC model at all. */
9803 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9804 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9806 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9808 case UNSPEC_GOTTPOFF:
9809 case UNSPEC_GOTNTPOFF:
9810 case UNSPEC_INDNTPOFF:
9813 disp = XVECEXP (disp, 0, 0);
9814 return (GET_CODE (disp) == SYMBOL_REF
9815 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9817 disp = XVECEXP (disp, 0, 0);
9818 return (GET_CODE (disp) == SYMBOL_REF
9819 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9821 disp = XVECEXP (disp, 0, 0);
9822 return (GET_CODE (disp) == SYMBOL_REF
9823 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9829 /* Recognizes RTL expressions that are valid memory addresses for an
9830 instruction. The MODE argument is the machine mode for the MEM
9831 expression that wants to use this address.
9833 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9834 convert common non-canonical forms to canonical form so that they will
9838 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9839 rtx addr, bool strict)
9841 struct ix86_address parts;
9842 rtx base, index, disp;
9843 HOST_WIDE_INT scale;
9845 if (ix86_decompose_address (addr, &parts) <= 0)
9846 /* Decomposition failed. */
9850 index = parts.index;
9852 scale = parts.scale;
9854 /* Validate base register.
9856 Don't allow SUBREG's that span more than a word here. It can lead to spill
9857 failures when the base is one word out of a two word structure, which is
9858 represented internally as a DImode int. */
9866 else if (GET_CODE (base) == SUBREG
9867 && REG_P (SUBREG_REG (base))
9868 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9870 reg = SUBREG_REG (base);
9872 /* Base is not a register. */
9875 if (GET_MODE (base) != Pmode)
9876 /* Base is not in Pmode. */
9879 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9880 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9881 /* Base is not valid. */
9885 /* Validate index register.
9887 Don't allow SUBREG's that span more than a word here -- same as above. */
9895 else if (GET_CODE (index) == SUBREG
9896 && REG_P (SUBREG_REG (index))
9897 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9899 reg = SUBREG_REG (index);
9901 /* Index is not a register. */
9904 if (GET_MODE (index) != Pmode)
9905 /* Index is not in Pmode. */
9908 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9909 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9910 /* Index is not valid. */
9914 /* Validate scale factor. */
9918 /* Scale without index. */
9921 if (scale != 2 && scale != 4 && scale != 8)
9922 /* Scale is not a valid multiplier. */
9926 /* Validate displacement. */
9929 if (GET_CODE (disp) == CONST
9930 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9931 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9932 switch (XINT (XEXP (disp, 0), 1))
9934 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9935 used. While ABI specify also 32bit relocations, we don't produce
9936 them at all and use IP relative instead. */
9939 gcc_assert (flag_pic);
9941 goto is_legitimate_pic;
9943 /* 64bit address unspec. */
9946 case UNSPEC_GOTPCREL:
9947 gcc_assert (flag_pic);
9948 goto is_legitimate_pic;
9950 case UNSPEC_GOTTPOFF:
9951 case UNSPEC_GOTNTPOFF:
9952 case UNSPEC_INDNTPOFF:
9958 /* Invalid address unspec. */
9962 else if (SYMBOLIC_CONST (disp)
9966 && MACHOPIC_INDIRECT
9967 && !machopic_operand_p (disp)
9973 if (TARGET_64BIT && (index || base))
9975 /* foo@dtpoff(%rX) is ok. */
9976 if (GET_CODE (disp) != CONST
9977 || GET_CODE (XEXP (disp, 0)) != PLUS
9978 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9979 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9980 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9981 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9982 /* Non-constant pic memory reference. */
9985 else if (! legitimate_pic_address_disp_p (disp))
9986 /* Displacement is an invalid pic construct. */
9989 /* This code used to verify that a symbolic pic displacement
9990 includes the pic_offset_table_rtx register.
9992 While this is good idea, unfortunately these constructs may
9993 be created by "adds using lea" optimization for incorrect
10002 This code is nonsensical, but results in addressing
10003 GOT table with pic_offset_table_rtx base. We can't
10004 just refuse it easily, since it gets matched by
10005 "addsi3" pattern, that later gets split to lea in the
10006 case output register differs from input. While this
10007 can be handled by separate addsi pattern for this case
10008 that never results in lea, this seems to be easier and
10009 correct fix for crash to disable this test. */
10011 else if (GET_CODE (disp) != LABEL_REF
10012 && !CONST_INT_P (disp)
10013 && (GET_CODE (disp) != CONST
10014 || !legitimate_constant_p (disp))
10015 && (GET_CODE (disp) != SYMBOL_REF
10016 || !legitimate_constant_p (disp)))
10017 /* Displacement is not constant. */
10019 else if (TARGET_64BIT
10020 && !x86_64_immediate_operand (disp, VOIDmode))
10021 /* Displacement is out of range. */
10025 /* Everything looks valid. */
10029 /* Determine if a given RTX is a valid constant address. */
10032 constant_address_p (rtx x)
10034 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10037 /* Return a unique alias set for the GOT. */
10039 static alias_set_type
10040 ix86_GOT_alias_set (void)
10042 static alias_set_type set = -1;
10044 set = new_alias_set ();
10048 /* Return a legitimate reference for ORIG (an address) using the
10049 register REG. If REG is 0, a new pseudo is generated.
10051 There are two types of references that must be handled:
10053 1. Global data references must load the address from the GOT, via
10054 the PIC reg. An insn is emitted to do this load, and the reg is
10057 2. Static data references, constant pool addresses, and code labels
10058 compute the address as an offset from the GOT, whose base is in
10059 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10060 differentiate them from global data objects. The returned
10061 address is the PIC reg + an unspec constant.
10063 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10064 reg also appears in the address. */
10067 legitimize_pic_address (rtx orig, rtx reg)
10070 rtx new_rtx = orig;
10074 if (TARGET_MACHO && !TARGET_64BIT)
10077 reg = gen_reg_rtx (Pmode);
10078 /* Use the generic Mach-O PIC machinery. */
10079 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10083 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10085 else if (TARGET_64BIT
10086 && ix86_cmodel != CM_SMALL_PIC
10087 && gotoff_operand (addr, Pmode))
10090 /* This symbol may be referenced via a displacement from the PIC
10091 base address (@GOTOFF). */
10093 if (reload_in_progress)
10094 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10095 if (GET_CODE (addr) == CONST)
10096 addr = XEXP (addr, 0);
10097 if (GET_CODE (addr) == PLUS)
10099 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10101 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10104 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10105 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10107 tmpreg = gen_reg_rtx (Pmode);
10110 emit_move_insn (tmpreg, new_rtx);
10114 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
10115 tmpreg, 1, OPTAB_DIRECT);
10118 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
10120 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
10122 /* This symbol may be referenced via a displacement from the PIC
10123 base address (@GOTOFF). */
10125 if (reload_in_progress)
10126 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10127 if (GET_CODE (addr) == CONST)
10128 addr = XEXP (addr, 0);
10129 if (GET_CODE (addr) == PLUS)
10131 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10133 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10136 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10137 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10138 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10142 emit_move_insn (reg, new_rtx);
10146 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10147 /* We can't use @GOTOFF for text labels on VxWorks;
10148 see gotoff_operand. */
10149 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10151 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10153 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
10154 return legitimize_dllimport_symbol (addr, true);
10155 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
10156 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
10157 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
10159 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
10160 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
10164 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10166 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
10167 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10168 new_rtx = gen_const_mem (Pmode, new_rtx);
10169 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10172 reg = gen_reg_rtx (Pmode);
10173 /* Use directly gen_movsi, otherwise the address is loaded
10174 into register for CSE. We don't want to CSE this addresses,
10175 instead we CSE addresses from the GOT table, so skip this. */
10176 emit_insn (gen_movsi (reg, new_rtx));
10181 /* This symbol must be referenced via a load from the
10182 Global Offset Table (@GOT). */
10184 if (reload_in_progress)
10185 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10186 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10187 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10189 new_rtx = force_reg (Pmode, new_rtx);
10190 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10191 new_rtx = gen_const_mem (Pmode, new_rtx);
10192 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10195 reg = gen_reg_rtx (Pmode);
10196 emit_move_insn (reg, new_rtx);
10202 if (CONST_INT_P (addr)
10203 && !x86_64_immediate_operand (addr, VOIDmode))
10207 emit_move_insn (reg, addr);
10211 new_rtx = force_reg (Pmode, addr);
10213 else if (GET_CODE (addr) == CONST)
10215 addr = XEXP (addr, 0);
10217 /* We must match stuff we generate before. Assume the only
10218 unspecs that can get here are ours. Not that we could do
10219 anything with them anyway.... */
10220 if (GET_CODE (addr) == UNSPEC
10221 || (GET_CODE (addr) == PLUS
10222 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10224 gcc_assert (GET_CODE (addr) == PLUS);
10226 if (GET_CODE (addr) == PLUS)
10228 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
10230 /* Check first to see if this is a constant offset from a @GOTOFF
10231 symbol reference. */
10232 if (gotoff_operand (op0, Pmode)
10233 && CONST_INT_P (op1))
10237 if (reload_in_progress)
10238 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10239 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
10241 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
10242 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10243 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10247 emit_move_insn (reg, new_rtx);
10253 if (INTVAL (op1) < -16*1024*1024
10254 || INTVAL (op1) >= 16*1024*1024)
10256 if (!x86_64_immediate_operand (op1, Pmode))
10257 op1 = force_reg (Pmode, op1);
10258 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
10264 base = legitimize_pic_address (XEXP (addr, 0), reg);
10265 new_rtx = legitimize_pic_address (XEXP (addr, 1),
10266 base == reg ? NULL_RTX : reg);
10268 if (CONST_INT_P (new_rtx))
10269 new_rtx = plus_constant (base, INTVAL (new_rtx));
10272 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
10274 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
10275 new_rtx = XEXP (new_rtx, 1);
10277 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
10285 /* Load the thread pointer. If TO_REG is true, force it into a register. */
10288 get_thread_pointer (int to_reg)
10292 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
10296 reg = gen_reg_rtx (Pmode);
10297 insn = gen_rtx_SET (VOIDmode, reg, tp);
10298 insn = emit_insn (insn);
10303 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
10304 false if we expect this to be used for a memory address and true if
10305 we expect to load the address into a register. */
10308 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
10310 rtx dest, base, off, pic, tp;
10315 case TLS_MODEL_GLOBAL_DYNAMIC:
10316 dest = gen_reg_rtx (Pmode);
10317 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10319 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10321 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
10324 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
10325 insns = get_insns ();
10328 RTL_CONST_CALL_P (insns) = 1;
10329 emit_libcall_block (insns, dest, rax, x);
10331 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10332 emit_insn (gen_tls_global_dynamic_64 (dest, x));
10334 emit_insn (gen_tls_global_dynamic_32 (dest, x));
10336 if (TARGET_GNU2_TLS)
10338 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
10340 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10344 case TLS_MODEL_LOCAL_DYNAMIC:
10345 base = gen_reg_rtx (Pmode);
10346 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
10348 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
10350 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
10353 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
10354 insns = get_insns ();
10357 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
10358 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
10359 RTL_CONST_CALL_P (insns) = 1;
10360 emit_libcall_block (insns, base, rax, note);
10362 else if (TARGET_64BIT && TARGET_GNU2_TLS)
10363 emit_insn (gen_tls_local_dynamic_base_64 (base));
10365 emit_insn (gen_tls_local_dynamic_base_32 (base));
10367 if (TARGET_GNU2_TLS)
10369 rtx x = ix86_tls_module_base ();
10371 set_unique_reg_note (get_last_insn (), REG_EQUIV,
10372 gen_rtx_MINUS (Pmode, x, tp));
10375 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
10376 off = gen_rtx_CONST (Pmode, off);
10378 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
10380 if (TARGET_GNU2_TLS)
10382 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
10384 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
10389 case TLS_MODEL_INITIAL_EXEC:
10393 type = UNSPEC_GOTNTPOFF;
10397 if (reload_in_progress)
10398 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10399 pic = pic_offset_table_rtx;
10400 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
10402 else if (!TARGET_ANY_GNU_TLS)
10404 pic = gen_reg_rtx (Pmode);
10405 emit_insn (gen_set_got (pic));
10406 type = UNSPEC_GOTTPOFF;
10411 type = UNSPEC_INDNTPOFF;
10414 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
10415 off = gen_rtx_CONST (Pmode, off);
10417 off = gen_rtx_PLUS (Pmode, pic, off);
10418 off = gen_const_mem (Pmode, off);
10419 set_mem_alias_set (off, ix86_GOT_alias_set ());
10421 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10423 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10424 off = force_reg (Pmode, off);
10425 return gen_rtx_PLUS (Pmode, base, off);
10429 base = get_thread_pointer (true);
10430 dest = gen_reg_rtx (Pmode);
10431 emit_insn (gen_subsi3 (dest, base, off));
10435 case TLS_MODEL_LOCAL_EXEC:
10436 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
10437 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10438 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
10439 off = gen_rtx_CONST (Pmode, off);
10441 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
10443 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
10444 return gen_rtx_PLUS (Pmode, base, off);
10448 base = get_thread_pointer (true);
10449 dest = gen_reg_rtx (Pmode);
10450 emit_insn (gen_subsi3 (dest, base, off));
10455 gcc_unreachable ();
10461 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10464 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10465 htab_t dllimport_map;
10468 get_dllimport_decl (tree decl)
10470 struct tree_map *h, in;
10473 const char *prefix;
10474 size_t namelen, prefixlen;
10479 if (!dllimport_map)
10480 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10482 in.hash = htab_hash_pointer (decl);
10483 in.base.from = decl;
10484 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10485 h = (struct tree_map *) *loc;
10489 *loc = h = GGC_NEW (struct tree_map);
10491 h->base.from = decl;
10492 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
10493 VAR_DECL, NULL, ptr_type_node);
10494 DECL_ARTIFICIAL (to) = 1;
10495 DECL_IGNORED_P (to) = 1;
10496 DECL_EXTERNAL (to) = 1;
10497 TREE_READONLY (to) = 1;
10499 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10500 name = targetm.strip_name_encoding (name);
10501 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10502 ? "*__imp_" : "*__imp__";
10503 namelen = strlen (name);
10504 prefixlen = strlen (prefix);
10505 imp_name = (char *) alloca (namelen + prefixlen + 1);
10506 memcpy (imp_name, prefix, prefixlen);
10507 memcpy (imp_name + prefixlen, name, namelen + 1);
10509 name = ggc_alloc_string (imp_name, namelen + prefixlen);
10510 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10511 SET_SYMBOL_REF_DECL (rtl, to);
10512 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10514 rtl = gen_const_mem (Pmode, rtl);
10515 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10517 SET_DECL_RTL (to, rtl);
10518 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10523 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10524 true if we require the result be a register. */
10527 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10532 gcc_assert (SYMBOL_REF_DECL (symbol));
10533 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10535 x = DECL_RTL (imp_decl);
10537 x = force_reg (Pmode, x);
10541 /* Try machine-dependent ways of modifying an illegitimate address
10542 to be legitimate. If we find one, return the new, valid address.
10543 This macro is used in only one place: `memory_address' in explow.c.
10545 OLDX is the address as it was before break_out_memory_refs was called.
10546 In some cases it is useful to look at this to decide what needs to be done.
10548 It is always safe for this macro to do nothing. It exists to recognize
10549 opportunities to optimize the output.
10551 For the 80386, we handle X+REG by loading X into a register R and
10552 using R+REG. R will go in a general reg and indexing will be used.
10553 However, if REG is a broken-out memory address or multiplication,
10554 nothing needs to be done because REG can certainly go in a general reg.
10556 When -fpic is used, special handling is needed for symbolic references.
10557 See comments by legitimize_pic_address in i386.c for details. */
10560 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
10561 enum machine_mode mode)
10566 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10568 return legitimize_tls_address (x, (enum tls_model) log, false);
10569 if (GET_CODE (x) == CONST
10570 && GET_CODE (XEXP (x, 0)) == PLUS
10571 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10572 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10574 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10575 (enum tls_model) log, false);
10576 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10579 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10581 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10582 return legitimize_dllimport_symbol (x, true);
10583 if (GET_CODE (x) == CONST
10584 && GET_CODE (XEXP (x, 0)) == PLUS
10585 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10586 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10588 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10589 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10593 if (flag_pic && SYMBOLIC_CONST (x))
10594 return legitimize_pic_address (x, 0);
10596 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10597 if (GET_CODE (x) == ASHIFT
10598 && CONST_INT_P (XEXP (x, 1))
10599 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10602 log = INTVAL (XEXP (x, 1));
10603 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10604 GEN_INT (1 << log));
10607 if (GET_CODE (x) == PLUS)
10609 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10611 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10612 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10613 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10616 log = INTVAL (XEXP (XEXP (x, 0), 1));
10617 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10618 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10619 GEN_INT (1 << log));
10622 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10623 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10624 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10627 log = INTVAL (XEXP (XEXP (x, 1), 1));
10628 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10629 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10630 GEN_INT (1 << log));
10633 /* Put multiply first if it isn't already. */
10634 if (GET_CODE (XEXP (x, 1)) == MULT)
10636 rtx tmp = XEXP (x, 0);
10637 XEXP (x, 0) = XEXP (x, 1);
10642 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10643 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10644 created by virtual register instantiation, register elimination, and
10645 similar optimizations. */
10646 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10649 x = gen_rtx_PLUS (Pmode,
10650 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10651 XEXP (XEXP (x, 1), 0)),
10652 XEXP (XEXP (x, 1), 1));
10656 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10657 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10658 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10659 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10660 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10661 && CONSTANT_P (XEXP (x, 1)))
10664 rtx other = NULL_RTX;
10666 if (CONST_INT_P (XEXP (x, 1)))
10668 constant = XEXP (x, 1);
10669 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10671 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10673 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10674 other = XEXP (x, 1);
10682 x = gen_rtx_PLUS (Pmode,
10683 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10684 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10685 plus_constant (other, INTVAL (constant)));
10689 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10692 if (GET_CODE (XEXP (x, 0)) == MULT)
10695 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10698 if (GET_CODE (XEXP (x, 1)) == MULT)
10701 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10705 && REG_P (XEXP (x, 1))
10706 && REG_P (XEXP (x, 0)))
10709 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10712 x = legitimize_pic_address (x, 0);
10715 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
10718 if (REG_P (XEXP (x, 0)))
10720 rtx temp = gen_reg_rtx (Pmode);
10721 rtx val = force_operand (XEXP (x, 1), temp);
10723 emit_move_insn (temp, val);
10725 XEXP (x, 1) = temp;
10729 else if (REG_P (XEXP (x, 1)))
10731 rtx temp = gen_reg_rtx (Pmode);
10732 rtx val = force_operand (XEXP (x, 0), temp);
10734 emit_move_insn (temp, val);
10736 XEXP (x, 0) = temp;
10744 /* Print an integer constant expression in assembler syntax. Addition
10745 and subtraction are the only arithmetic that may appear in these
10746 expressions. FILE is the stdio stream to write to, X is the rtx, and
10747 CODE is the operand print code from the output string. */
10750 output_pic_addr_const (FILE *file, rtx x, int code)
10754 switch (GET_CODE (x))
10757 gcc_assert (flag_pic);
10762 if (! TARGET_MACHO || TARGET_64BIT)
10763 output_addr_const (file, x);
10766 const char *name = XSTR (x, 0);
10768 /* Mark the decl as referenced so that cgraph will
10769 output the function. */
10770 if (SYMBOL_REF_DECL (x))
10771 mark_decl_referenced (SYMBOL_REF_DECL (x));
10774 if (MACHOPIC_INDIRECT
10775 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10776 name = machopic_indirection_name (x, /*stub_p=*/true);
10778 assemble_name (file, name);
10780 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10781 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10782 fputs ("@PLT", file);
10789 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10790 assemble_name (asm_out_file, buf);
10794 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10798 /* This used to output parentheses around the expression,
10799 but that does not work on the 386 (either ATT or BSD assembler). */
10800 output_pic_addr_const (file, XEXP (x, 0), code);
10804 if (GET_MODE (x) == VOIDmode)
10806 /* We can use %d if the number is <32 bits and positive. */
10807 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10808 fprintf (file, "0x%lx%08lx",
10809 (unsigned long) CONST_DOUBLE_HIGH (x),
10810 (unsigned long) CONST_DOUBLE_LOW (x));
10812 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10815 /* We can't handle floating point constants;
10816 PRINT_OPERAND must handle them. */
10817 output_operand_lossage ("floating constant misused");
10821 /* Some assemblers need integer constants to appear first. */
10822 if (CONST_INT_P (XEXP (x, 0)))
10824 output_pic_addr_const (file, XEXP (x, 0), code);
10826 output_pic_addr_const (file, XEXP (x, 1), code);
10830 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10831 output_pic_addr_const (file, XEXP (x, 1), code);
10833 output_pic_addr_const (file, XEXP (x, 0), code);
10839 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10840 output_pic_addr_const (file, XEXP (x, 0), code);
10842 output_pic_addr_const (file, XEXP (x, 1), code);
10844 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10848 gcc_assert (XVECLEN (x, 0) == 1);
10849 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10850 switch (XINT (x, 1))
10853 fputs ("@GOT", file);
10855 case UNSPEC_GOTOFF:
10856 fputs ("@GOTOFF", file);
10858 case UNSPEC_PLTOFF:
10859 fputs ("@PLTOFF", file);
10861 case UNSPEC_GOTPCREL:
10862 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10863 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10865 case UNSPEC_GOTTPOFF:
10866 /* FIXME: This might be @TPOFF in Sun ld too. */
10867 fputs ("@GOTTPOFF", file);
10870 fputs ("@TPOFF", file);
10872 case UNSPEC_NTPOFF:
10874 fputs ("@TPOFF", file);
10876 fputs ("@NTPOFF", file);
10878 case UNSPEC_DTPOFF:
10879 fputs ("@DTPOFF", file);
10881 case UNSPEC_GOTNTPOFF:
10883 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10884 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10886 fputs ("@GOTNTPOFF", file);
10888 case UNSPEC_INDNTPOFF:
10889 fputs ("@INDNTPOFF", file);
10892 case UNSPEC_MACHOPIC_OFFSET:
10894 machopic_output_function_base_name (file);
10898 output_operand_lossage ("invalid UNSPEC as operand");
10904 output_operand_lossage ("invalid expression as operand");
10908 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10909 We need to emit DTP-relative relocations. */
10911 static void ATTRIBUTE_UNUSED
10912 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10914 fputs (ASM_LONG, file);
10915 output_addr_const (file, x);
10916 fputs ("@DTPOFF", file);
10922 fputs (", 0", file);
10925 gcc_unreachable ();
10929 /* Return true if X is a representation of the PIC register. This copes
10930 with calls from ix86_find_base_term, where the register might have
10931 been replaced by a cselib value. */
10934 ix86_pic_register_p (rtx x)
10936 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
10937 return (pic_offset_table_rtx
10938 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10940 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10943 /* In the name of slightly smaller debug output, and to cater to
10944 general assembler lossage, recognize PIC+GOTOFF and turn it back
10945 into a direct symbol reference.
10947 On Darwin, this is necessary to avoid a crash, because Darwin
10948 has a different PIC label for each routine but the DWARF debugging
10949 information is not associated with any particular routine, so it's
10950 necessary to remove references to the PIC label from RTL stored by
10951 the DWARF output code. */
10954 ix86_delegitimize_address (rtx x)
10956 rtx orig_x = delegitimize_mem_from_attrs (x);
10957 /* addend is NULL or some rtx if x is something+GOTOFF where
10958 something doesn't include the PIC register. */
10959 rtx addend = NULL_RTX;
10960 /* reg_addend is NULL or a multiple of some register. */
10961 rtx reg_addend = NULL_RTX;
10962 /* const_addend is NULL or a const_int. */
10963 rtx const_addend = NULL_RTX;
10964 /* This is the result, or NULL. */
10965 rtx result = NULL_RTX;
10974 if (GET_CODE (x) != CONST
10975 || GET_CODE (XEXP (x, 0)) != UNSPEC
10976 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10977 || !MEM_P (orig_x))
10979 return XVECEXP (XEXP (x, 0), 0, 0);
10982 if (GET_CODE (x) != PLUS
10983 || GET_CODE (XEXP (x, 1)) != CONST)
10986 if (ix86_pic_register_p (XEXP (x, 0)))
10987 /* %ebx + GOT/GOTOFF */
10989 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10991 /* %ebx + %reg * scale + GOT/GOTOFF */
10992 reg_addend = XEXP (x, 0);
10993 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10994 reg_addend = XEXP (reg_addend, 1);
10995 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10996 reg_addend = XEXP (reg_addend, 0);
10999 reg_addend = NULL_RTX;
11000 addend = XEXP (x, 0);
11004 addend = XEXP (x, 0);
11006 x = XEXP (XEXP (x, 1), 0);
11007 if (GET_CODE (x) == PLUS
11008 && CONST_INT_P (XEXP (x, 1)))
11010 const_addend = XEXP (x, 1);
11014 if (GET_CODE (x) == UNSPEC
11015 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
11016 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
11017 result = XVECEXP (x, 0, 0);
11019 if (TARGET_MACHO && darwin_local_data_pic (x)
11020 && !MEM_P (orig_x))
11021 result = XVECEXP (x, 0, 0);
11027 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
11029 result = gen_rtx_PLUS (Pmode, reg_addend, result);
11032 /* If the rest of original X doesn't involve the PIC register, add
11033 addend and subtract pic_offset_table_rtx. This can happen e.g.
11035 leal (%ebx, %ecx, 4), %ecx
11037 movl foo@GOTOFF(%ecx), %edx
11038 in which case we return (%ecx - %ebx) + foo. */
11039 if (pic_offset_table_rtx)
11040 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
11041 pic_offset_table_rtx),
11049 /* If X is a machine specific address (i.e. a symbol or label being
11050 referenced as a displacement from the GOT implemented using an
11051 UNSPEC), then return the base term. Otherwise return X. */
11054 ix86_find_base_term (rtx x)
11060 if (GET_CODE (x) != CONST)
11062 term = XEXP (x, 0);
11063 if (GET_CODE (term) == PLUS
11064 && (CONST_INT_P (XEXP (term, 1))
11065 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
11066 term = XEXP (term, 0);
11067 if (GET_CODE (term) != UNSPEC
11068 || XINT (term, 1) != UNSPEC_GOTPCREL)
11071 return XVECEXP (term, 0, 0);
11074 return ix86_delegitimize_address (x);
11078 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
11079 int fp, FILE *file)
11081 const char *suffix;
11083 if (mode == CCFPmode || mode == CCFPUmode)
11085 code = ix86_fp_compare_code_to_integer (code);
11089 code = reverse_condition (code);
11140 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
11144 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
11145 Those same assemblers have the same but opposite lossage on cmov. */
11146 if (mode == CCmode)
11147 suffix = fp ? "nbe" : "a";
11148 else if (mode == CCCmode)
11151 gcc_unreachable ();
11167 gcc_unreachable ();
11171 gcc_assert (mode == CCmode || mode == CCCmode);
11188 gcc_unreachable ();
11192 /* ??? As above. */
11193 gcc_assert (mode == CCmode || mode == CCCmode);
11194 suffix = fp ? "nb" : "ae";
11197 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
11201 /* ??? As above. */
11202 if (mode == CCmode)
11204 else if (mode == CCCmode)
11205 suffix = fp ? "nb" : "ae";
11207 gcc_unreachable ();
11210 suffix = fp ? "u" : "p";
11213 suffix = fp ? "nu" : "np";
11216 gcc_unreachable ();
11218 fputs (suffix, file);
11221 /* Print the name of register X to FILE based on its machine mode and number.
11222 If CODE is 'w', pretend the mode is HImode.
11223 If CODE is 'b', pretend the mode is QImode.
11224 If CODE is 'k', pretend the mode is SImode.
11225 If CODE is 'q', pretend the mode is DImode.
11226 If CODE is 'x', pretend the mode is V4SFmode.
11227 If CODE is 't', pretend the mode is V8SFmode.
11228 If CODE is 'h', pretend the reg is the 'high' byte register.
11229 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
11230 If CODE is 'd', duplicate the operand for AVX instruction.
11234 print_reg (rtx x, int code, FILE *file)
11237 bool duplicated = code == 'd' && TARGET_AVX;
11239 gcc_assert (x == pc_rtx
11240 || (REGNO (x) != ARG_POINTER_REGNUM
11241 && REGNO (x) != FRAME_POINTER_REGNUM
11242 && REGNO (x) != FLAGS_REG
11243 && REGNO (x) != FPSR_REG
11244 && REGNO (x) != FPCR_REG));
11246 if (ASSEMBLER_DIALECT == ASM_ATT)
11251 gcc_assert (TARGET_64BIT);
11252 fputs ("rip", file);
11256 if (code == 'w' || MMX_REG_P (x))
11258 else if (code == 'b')
11260 else if (code == 'k')
11262 else if (code == 'q')
11264 else if (code == 'y')
11266 else if (code == 'h')
11268 else if (code == 'x')
11270 else if (code == 't')
11273 code = GET_MODE_SIZE (GET_MODE (x));
11275 /* Irritatingly, AMD extended registers use different naming convention
11276 from the normal registers. */
11277 if (REX_INT_REG_P (x))
11279 gcc_assert (TARGET_64BIT);
11283 error ("extended registers have no high halves");
11286 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
11289 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
11292 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
11295 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
11298 error ("unsupported operand size for extended register");
11308 if (STACK_TOP_P (x))
11317 if (! ANY_FP_REG_P (x))
11318 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
11323 reg = hi_reg_name[REGNO (x)];
11326 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
11328 reg = qi_reg_name[REGNO (x)];
11331 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
11333 reg = qi_high_reg_name[REGNO (x)];
11338 gcc_assert (!duplicated);
11340 fputs (hi_reg_name[REGNO (x)] + 1, file);
11345 gcc_unreachable ();
11351 if (ASSEMBLER_DIALECT == ASM_ATT)
11352 fprintf (file, ", %%%s", reg);
11354 fprintf (file, ", %s", reg);
11358 /* Locate some local-dynamic symbol still in use by this function
11359 so that we can print its name in some tls_local_dynamic_base
11363 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
11367 if (GET_CODE (x) == SYMBOL_REF
11368 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
11370 cfun->machine->some_ld_name = XSTR (x, 0);
11377 static const char *
11378 get_some_local_dynamic_name (void)
11382 if (cfun->machine->some_ld_name)
11383 return cfun->machine->some_ld_name;
11385 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
11387 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
11388 return cfun->machine->some_ld_name;
11393 /* Meaning of CODE:
11394 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
11395 C -- print opcode suffix for set/cmov insn.
11396 c -- like C, but print reversed condition
11397 E,e -- likewise, but for compare-and-branch fused insn.
11398 F,f -- likewise, but for floating-point.
11399 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
11401 R -- print the prefix for register names.
11402 z -- print the opcode suffix for the size of the current operand.
11403 Z -- likewise, with special suffixes for x87 instructions.
11404 * -- print a star (in certain assembler syntax)
11405 A -- print an absolute memory reference.
11406 w -- print the operand as if it's a "word" (HImode) even if it isn't.
11407 s -- print a shift double count, followed by the assemblers argument
11409 b -- print the QImode name of the register for the indicated operand.
11410 %b0 would print %al if operands[0] is reg 0.
11411 w -- likewise, print the HImode name of the register.
11412 k -- likewise, print the SImode name of the register.
11413 q -- likewise, print the DImode name of the register.
11414 x -- likewise, print the V4SFmode name of the register.
11415 t -- likewise, print the V8SFmode name of the register.
11416 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
11417 y -- print "st(0)" instead of "st" as a register.
11418 d -- print duplicated register operand for AVX instruction.
11419 D -- print condition for SSE cmp instruction.
11420 P -- if PIC, print an @PLT suffix.
11421 X -- don't print any sort of PIC '@' suffix for a symbol.
11422 & -- print some in-use local-dynamic symbol name.
11423 H -- print a memory address offset by 8; used for sse high-parts
11424 Y -- print condition for XOP pcom* instruction.
11425 + -- print a branch hint as 'cs' or 'ds' prefix
11426 ; -- print a semicolon (after prefixes due to bug in older gas).
11430 print_operand (FILE *file, rtx x, int code)
11437 if (ASSEMBLER_DIALECT == ASM_ATT)
11443 const char *name = get_some_local_dynamic_name ();
11445 output_operand_lossage ("'%%&' used without any "
11446 "local dynamic TLS references");
11448 assemble_name (file, name);
11453 switch (ASSEMBLER_DIALECT)
11460 /* Intel syntax. For absolute addresses, registers should not
11461 be surrounded by braces. */
11465 PRINT_OPERAND (file, x, 0);
11472 gcc_unreachable ();
11475 PRINT_OPERAND (file, x, 0);
11480 if (ASSEMBLER_DIALECT == ASM_ATT)
11485 if (ASSEMBLER_DIALECT == ASM_ATT)
11490 if (ASSEMBLER_DIALECT == ASM_ATT)
11495 if (ASSEMBLER_DIALECT == ASM_ATT)
11500 if (ASSEMBLER_DIALECT == ASM_ATT)
11505 if (ASSEMBLER_DIALECT == ASM_ATT)
11510 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11512 /* Opcodes don't get size suffixes if using Intel opcodes. */
11513 if (ASSEMBLER_DIALECT == ASM_INTEL)
11516 switch (GET_MODE_SIZE (GET_MODE (x)))
11535 output_operand_lossage
11536 ("invalid operand size for operand code '%c'", code);
11541 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11543 (0, "non-integer operand used with operand code '%c'", code);
11547 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
11548 if (ASSEMBLER_DIALECT == ASM_INTEL)
11551 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11553 switch (GET_MODE_SIZE (GET_MODE (x)))
11556 #ifdef HAVE_AS_IX86_FILDS
11566 #ifdef HAVE_AS_IX86_FILDQ
11569 fputs ("ll", file);
11577 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
11579 /* 387 opcodes don't get size suffixes
11580 if the operands are registers. */
11581 if (STACK_REG_P (x))
11584 switch (GET_MODE_SIZE (GET_MODE (x)))
11605 output_operand_lossage
11606 ("invalid operand type used with operand code '%c'", code);
11610 output_operand_lossage
11611 ("invalid operand size for operand code '%c'", code);
11628 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11630 PRINT_OPERAND (file, x, 0);
11631 fputs (", ", file);
11636 /* Little bit of braindamage here. The SSE compare instructions
11637 does use completely different names for the comparisons that the
11638 fp conditional moves. */
11641 switch (GET_CODE (x))
11644 fputs ("eq", file);
11647 fputs ("eq_us", file);
11650 fputs ("lt", file);
11653 fputs ("nge", file);
11656 fputs ("le", file);
11659 fputs ("ngt", file);
11662 fputs ("unord", file);
11665 fputs ("neq", file);
11668 fputs ("neq_oq", file);
11671 fputs ("ge", file);
11674 fputs ("nlt", file);
11677 fputs ("gt", file);
11680 fputs ("nle", file);
11683 fputs ("ord", file);
11686 output_operand_lossage ("operand is not a condition code, "
11687 "invalid operand code 'D'");
11693 switch (GET_CODE (x))
11697 fputs ("eq", file);
11701 fputs ("lt", file);
11705 fputs ("le", file);
11708 fputs ("unord", file);
11712 fputs ("neq", file);
11716 fputs ("nlt", file);
11720 fputs ("nle", file);
11723 fputs ("ord", file);
11726 output_operand_lossage ("operand is not a condition code, "
11727 "invalid operand code 'D'");
11733 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11734 if (ASSEMBLER_DIALECT == ASM_ATT)
11736 switch (GET_MODE (x))
11738 case HImode: putc ('w', file); break;
11740 case SFmode: putc ('l', file); break;
11742 case DFmode: putc ('q', file); break;
11743 default: gcc_unreachable ();
11750 if (!COMPARISON_P (x))
11752 output_operand_lossage ("operand is neither a constant nor a "
11753 "condition code, invalid operand code "
11757 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11760 if (!COMPARISON_P (x))
11762 output_operand_lossage ("operand is neither a constant nor a "
11763 "condition code, invalid operand code "
11767 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11768 if (ASSEMBLER_DIALECT == ASM_ATT)
11771 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11774 /* Like above, but reverse condition */
11776 /* Check to see if argument to %c is really a constant
11777 and not a condition code which needs to be reversed. */
11778 if (!COMPARISON_P (x))
11780 output_operand_lossage ("operand is neither a constant nor a "
11781 "condition code, invalid operand "
11785 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11788 if (!COMPARISON_P (x))
11790 output_operand_lossage ("operand is neither a constant nor a "
11791 "condition code, invalid operand "
11795 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11796 if (ASSEMBLER_DIALECT == ASM_ATT)
11799 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11803 put_condition_code (GET_CODE (x), CCmode, 0, 0, file);
11807 put_condition_code (GET_CODE (x), CCmode, 1, 0, file);
11811 /* It doesn't actually matter what mode we use here, as we're
11812 only going to use this for printing. */
11813 x = adjust_address_nv (x, DImode, 8);
11821 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11824 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11827 int pred_val = INTVAL (XEXP (x, 0));
11829 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11830 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11832 int taken = pred_val > REG_BR_PROB_BASE / 2;
11833 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11835 /* Emit hints only in the case default branch prediction
11836 heuristics would fail. */
11837 if (taken != cputaken)
11839 /* We use 3e (DS) prefix for taken branches and
11840 2e (CS) prefix for not taken branches. */
11842 fputs ("ds ; ", file);
11844 fputs ("cs ; ", file);
11852 switch (GET_CODE (x))
11855 fputs ("neq", file);
11858 fputs ("eq", file);
11862 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11866 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11870 fputs ("le", file);
11874 fputs ("lt", file);
11877 fputs ("unord", file);
11880 fputs ("ord", file);
11883 fputs ("ueq", file);
11886 fputs ("nlt", file);
11889 fputs ("nle", file);
11892 fputs ("ule", file);
11895 fputs ("ult", file);
11898 fputs ("une", file);
11901 output_operand_lossage ("operand is not a condition code, "
11902 "invalid operand code 'Y'");
11909 fputs (" ; ", file);
11916 output_operand_lossage ("invalid operand code '%c'", code);
11921 print_reg (x, code, file);
11923 else if (MEM_P (x))
11925 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11926 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11927 && GET_MODE (x) != BLKmode)
11930 switch (GET_MODE_SIZE (GET_MODE (x)))
11932 case 1: size = "BYTE"; break;
11933 case 2: size = "WORD"; break;
11934 case 4: size = "DWORD"; break;
11935 case 8: size = "QWORD"; break;
11936 case 12: size = "TBYTE"; break;
11938 if (GET_MODE (x) == XFmode)
11943 case 32: size = "YMMWORD"; break;
11945 gcc_unreachable ();
11948 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11951 else if (code == 'w')
11953 else if (code == 'k')
11956 fputs (size, file);
11957 fputs (" PTR ", file);
11961 /* Avoid (%rip) for call operands. */
11962 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11963 && !CONST_INT_P (x))
11964 output_addr_const (file, x);
11965 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11966 output_operand_lossage ("invalid constraints for operand");
11968 output_address (x);
11971 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11976 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11977 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11979 if (ASSEMBLER_DIALECT == ASM_ATT)
11981 fprintf (file, "0x%08lx", (long unsigned int) l);
11984 /* These float cases don't actually occur as immediate operands. */
11985 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11989 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11990 fputs (dstr, file);
11993 else if (GET_CODE (x) == CONST_DOUBLE
11994 && GET_MODE (x) == XFmode)
11998 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11999 fputs (dstr, file);
12004 /* We have patterns that allow zero sets of memory, for instance.
12005 In 64-bit mode, we should probably support all 8-byte vectors,
12006 since we can in fact encode that into an immediate. */
12007 if (GET_CODE (x) == CONST_VECTOR)
12009 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
12015 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
12017 if (ASSEMBLER_DIALECT == ASM_ATT)
12020 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
12021 || GET_CODE (x) == LABEL_REF)
12023 if (ASSEMBLER_DIALECT == ASM_ATT)
12026 fputs ("OFFSET FLAT:", file);
12029 if (CONST_INT_P (x))
12030 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12032 output_pic_addr_const (file, x, code);
12034 output_addr_const (file, x);
12038 /* Print a memory operand whose address is ADDR. */
12041 print_operand_address (FILE *file, rtx addr)
12043 struct ix86_address parts;
12044 rtx base, index, disp;
12046 int ok = ix86_decompose_address (addr, &parts);
12051 index = parts.index;
12053 scale = parts.scale;
12061 if (ASSEMBLER_DIALECT == ASM_ATT)
12063 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
12066 gcc_unreachable ();
12069 /* Use one byte shorter RIP relative addressing for 64bit mode. */
12070 if (TARGET_64BIT && !base && !index)
12074 if (GET_CODE (disp) == CONST
12075 && GET_CODE (XEXP (disp, 0)) == PLUS
12076 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12077 symbol = XEXP (XEXP (disp, 0), 0);
12079 if (GET_CODE (symbol) == LABEL_REF
12080 || (GET_CODE (symbol) == SYMBOL_REF
12081 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
12084 if (!base && !index)
12086 /* Displacement only requires special attention. */
12088 if (CONST_INT_P (disp))
12090 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
12091 fputs ("ds:", file);
12092 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
12095 output_pic_addr_const (file, disp, 0);
12097 output_addr_const (file, disp);
12101 if (ASSEMBLER_DIALECT == ASM_ATT)
12106 output_pic_addr_const (file, disp, 0);
12107 else if (GET_CODE (disp) == LABEL_REF)
12108 output_asm_label (disp);
12110 output_addr_const (file, disp);
12115 print_reg (base, 0, file);
12119 print_reg (index, 0, file);
12121 fprintf (file, ",%d", scale);
12127 rtx offset = NULL_RTX;
12131 /* Pull out the offset of a symbol; print any symbol itself. */
12132 if (GET_CODE (disp) == CONST
12133 && GET_CODE (XEXP (disp, 0)) == PLUS
12134 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12136 offset = XEXP (XEXP (disp, 0), 1);
12137 disp = gen_rtx_CONST (VOIDmode,
12138 XEXP (XEXP (disp, 0), 0));
12142 output_pic_addr_const (file, disp, 0);
12143 else if (GET_CODE (disp) == LABEL_REF)
12144 output_asm_label (disp);
12145 else if (CONST_INT_P (disp))
12148 output_addr_const (file, disp);
12154 print_reg (base, 0, file);
12157 if (INTVAL (offset) >= 0)
12159 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12163 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12170 print_reg (index, 0, file);
12172 fprintf (file, "*%d", scale);
12180 output_addr_const_extra (FILE *file, rtx x)
12184 if (GET_CODE (x) != UNSPEC)
12187 op = XVECEXP (x, 0, 0);
12188 switch (XINT (x, 1))
12190 case UNSPEC_GOTTPOFF:
12191 output_addr_const (file, op);
12192 /* FIXME: This might be @TPOFF in Sun ld. */
12193 fputs ("@GOTTPOFF", file);
12196 output_addr_const (file, op);
12197 fputs ("@TPOFF", file);
12199 case UNSPEC_NTPOFF:
12200 output_addr_const (file, op);
12202 fputs ("@TPOFF", file);
12204 fputs ("@NTPOFF", file);
12206 case UNSPEC_DTPOFF:
12207 output_addr_const (file, op);
12208 fputs ("@DTPOFF", file);
12210 case UNSPEC_GOTNTPOFF:
12211 output_addr_const (file, op);
12213 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12214 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
12216 fputs ("@GOTNTPOFF", file);
12218 case UNSPEC_INDNTPOFF:
12219 output_addr_const (file, op);
12220 fputs ("@INDNTPOFF", file);
12223 case UNSPEC_MACHOPIC_OFFSET:
12224 output_addr_const (file, op);
12226 machopic_output_function_base_name (file);
12237 /* Split one or more DImode RTL references into pairs of SImode
12238 references. The RTL can be REG, offsettable MEM, integer constant, or
12239 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12240 split and "num" is its length. lo_half and hi_half are output arrays
12241 that parallel "operands". */
12244 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12248 rtx op = operands[num];
12250 /* simplify_subreg refuse to split volatile memory addresses,
12251 but we still have to handle it. */
12254 lo_half[num] = adjust_address (op, SImode, 0);
12255 hi_half[num] = adjust_address (op, SImode, 4);
12259 lo_half[num] = simplify_gen_subreg (SImode, op,
12260 GET_MODE (op) == VOIDmode
12261 ? DImode : GET_MODE (op), 0);
12262 hi_half[num] = simplify_gen_subreg (SImode, op,
12263 GET_MODE (op) == VOIDmode
12264 ? DImode : GET_MODE (op), 4);
12268 /* Split one or more TImode RTL references into pairs of DImode
12269 references. The RTL can be REG, offsettable MEM, integer constant, or
12270 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
12271 split and "num" is its length. lo_half and hi_half are output arrays
12272 that parallel "operands". */
12275 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
12279 rtx op = operands[num];
12281 /* simplify_subreg refuse to split volatile memory addresses, but we
12282 still have to handle it. */
12285 lo_half[num] = adjust_address (op, DImode, 0);
12286 hi_half[num] = adjust_address (op, DImode, 8);
12290 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
12291 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
12296 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
12297 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
12298 is the expression of the binary operation. The output may either be
12299 emitted here, or returned to the caller, like all output_* functions.
12301 There is no guarantee that the operands are the same mode, as they
12302 might be within FLOAT or FLOAT_EXTEND expressions. */
12304 #ifndef SYSV386_COMPAT
12305 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
12306 wants to fix the assemblers because that causes incompatibility
12307 with gcc. No-one wants to fix gcc because that causes
12308 incompatibility with assemblers... You can use the option of
12309 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
12310 #define SYSV386_COMPAT 1
12314 output_387_binary_op (rtx insn, rtx *operands)
12316 static char buf[40];
12319 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
12321 #ifdef ENABLE_CHECKING
12322 /* Even if we do not want to check the inputs, this documents input
12323 constraints. Which helps in understanding the following code. */
12324 if (STACK_REG_P (operands[0])
12325 && ((REG_P (operands[1])
12326 && REGNO (operands[0]) == REGNO (operands[1])
12327 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
12328 || (REG_P (operands[2])
12329 && REGNO (operands[0]) == REGNO (operands[2])
12330 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
12331 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
12334 gcc_assert (is_sse);
12337 switch (GET_CODE (operands[3]))
12340 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12341 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12349 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12350 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12358 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12359 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12367 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
12368 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
12376 gcc_unreachable ();
12383 strcpy (buf, ssep);
12384 if (GET_MODE (operands[0]) == SFmode)
12385 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
12387 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
12391 strcpy (buf, ssep + 1);
12392 if (GET_MODE (operands[0]) == SFmode)
12393 strcat (buf, "ss\t{%2, %0|%0, %2}");
12395 strcat (buf, "sd\t{%2, %0|%0, %2}");
12401 switch (GET_CODE (operands[3]))
12405 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
12407 rtx temp = operands[2];
12408 operands[2] = operands[1];
12409 operands[1] = temp;
12412 /* know operands[0] == operands[1]. */
12414 if (MEM_P (operands[2]))
12420 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12422 if (STACK_TOP_P (operands[0]))
12423 /* How is it that we are storing to a dead operand[2]?
12424 Well, presumably operands[1] is dead too. We can't
12425 store the result to st(0) as st(0) gets popped on this
12426 instruction. Instead store to operands[2] (which I
12427 think has to be st(1)). st(1) will be popped later.
12428 gcc <= 2.8.1 didn't have this check and generated
12429 assembly code that the Unixware assembler rejected. */
12430 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12432 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12436 if (STACK_TOP_P (operands[0]))
12437 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12439 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12444 if (MEM_P (operands[1]))
12450 if (MEM_P (operands[2]))
12456 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
12459 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
12460 derived assemblers, confusingly reverse the direction of
12461 the operation for fsub{r} and fdiv{r} when the
12462 destination register is not st(0). The Intel assembler
12463 doesn't have this brain damage. Read !SYSV386_COMPAT to
12464 figure out what the hardware really does. */
12465 if (STACK_TOP_P (operands[0]))
12466 p = "{p\t%0, %2|rp\t%2, %0}";
12468 p = "{rp\t%2, %0|p\t%0, %2}";
12470 if (STACK_TOP_P (operands[0]))
12471 /* As above for fmul/fadd, we can't store to st(0). */
12472 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
12474 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
12479 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
12482 if (STACK_TOP_P (operands[0]))
12483 p = "{rp\t%0, %1|p\t%1, %0}";
12485 p = "{p\t%1, %0|rp\t%0, %1}";
12487 if (STACK_TOP_P (operands[0]))
12488 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
12490 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
12495 if (STACK_TOP_P (operands[0]))
12497 if (STACK_TOP_P (operands[1]))
12498 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
12500 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
12503 else if (STACK_TOP_P (operands[1]))
12506 p = "{\t%1, %0|r\t%0, %1}";
12508 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
12514 p = "{r\t%2, %0|\t%0, %2}";
12516 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
12522 gcc_unreachable ();
12529 /* Return needed mode for entity in optimize_mode_switching pass. */
12532 ix86_mode_needed (int entity, rtx insn)
12534 enum attr_i387_cw mode;
12536 /* The mode UNINITIALIZED is used to store control word after a
12537 function call or ASM pattern. The mode ANY specify that function
12538 has no requirements on the control word and make no changes in the
12539 bits we are interested in. */
12542 || (NONJUMP_INSN_P (insn)
12543 && (asm_noperands (PATTERN (insn)) >= 0
12544 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12545 return I387_CW_UNINITIALIZED;
12547 if (recog_memoized (insn) < 0)
12548 return I387_CW_ANY;
12550 mode = get_attr_i387_cw (insn);
12555 if (mode == I387_CW_TRUNC)
12560 if (mode == I387_CW_FLOOR)
12565 if (mode == I387_CW_CEIL)
12570 if (mode == I387_CW_MASK_PM)
12575 gcc_unreachable ();
12578 return I387_CW_ANY;
12581 /* Output code to initialize control word copies used by trunc?f?i and
12582 rounding patterns. CURRENT_MODE is set to current control word,
12583 while NEW_MODE is set to new control word. */
12586 emit_i387_cw_initialization (int mode)
12588 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12591 enum ix86_stack_slot slot;
12593 rtx reg = gen_reg_rtx (HImode);
12595 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12596 emit_move_insn (reg, copy_rtx (stored_mode));
12598 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12599 || optimize_function_for_size_p (cfun))
12603 case I387_CW_TRUNC:
12604 /* round toward zero (truncate) */
12605 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12606 slot = SLOT_CW_TRUNC;
12609 case I387_CW_FLOOR:
12610 /* round down toward -oo */
12611 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12612 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12613 slot = SLOT_CW_FLOOR;
12617 /* round up toward +oo */
12618 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12619 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12620 slot = SLOT_CW_CEIL;
12623 case I387_CW_MASK_PM:
12624 /* mask precision exception for nearbyint() */
12625 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12626 slot = SLOT_CW_MASK_PM;
12630 gcc_unreachable ();
12637 case I387_CW_TRUNC:
12638 /* round toward zero (truncate) */
12639 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12640 slot = SLOT_CW_TRUNC;
12643 case I387_CW_FLOOR:
12644 /* round down toward -oo */
12645 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12646 slot = SLOT_CW_FLOOR;
12650 /* round up toward +oo */
12651 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12652 slot = SLOT_CW_CEIL;
12655 case I387_CW_MASK_PM:
12656 /* mask precision exception for nearbyint() */
12657 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12658 slot = SLOT_CW_MASK_PM;
12662 gcc_unreachable ();
12666 gcc_assert (slot < MAX_386_STACK_LOCALS);
12668 new_mode = assign_386_stack_local (HImode, slot);
12669 emit_move_insn (new_mode, reg);
12672 /* Output code for INSN to convert a float to a signed int. OPERANDS
12673 are the insn operands. The output may be [HSD]Imode and the input
12674 operand may be [SDX]Fmode. */
12677 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12679 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12680 int dimode_p = GET_MODE (operands[0]) == DImode;
12681 int round_mode = get_attr_i387_cw (insn);
12683 /* Jump through a hoop or two for DImode, since the hardware has no
12684 non-popping instruction. We used to do this a different way, but
12685 that was somewhat fragile and broke with post-reload splitters. */
12686 if ((dimode_p || fisttp) && !stack_top_dies)
12687 output_asm_insn ("fld\t%y1", operands);
12689 gcc_assert (STACK_TOP_P (operands[1]));
12690 gcc_assert (MEM_P (operands[0]));
12691 gcc_assert (GET_MODE (operands[1]) != TFmode);
12694 output_asm_insn ("fisttp%Z0\t%0", operands);
12697 if (round_mode != I387_CW_ANY)
12698 output_asm_insn ("fldcw\t%3", operands);
12699 if (stack_top_dies || dimode_p)
12700 output_asm_insn ("fistp%Z0\t%0", operands);
12702 output_asm_insn ("fist%Z0\t%0", operands);
12703 if (round_mode != I387_CW_ANY)
12704 output_asm_insn ("fldcw\t%2", operands);
12710 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12711 have the values zero or one, indicates the ffreep insn's operand
12712 from the OPERANDS array. */
12714 static const char *
12715 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12717 if (TARGET_USE_FFREEP)
12718 #ifdef HAVE_AS_IX86_FFREEP
12719 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12722 static char retval[32];
12723 int regno = REGNO (operands[opno]);
12725 gcc_assert (FP_REGNO_P (regno));
12727 regno -= FIRST_STACK_REG;
12729 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
12734 return opno ? "fstp\t%y1" : "fstp\t%y0";
12738 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12739 should be used. UNORDERED_P is true when fucom should be used. */
12742 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12744 int stack_top_dies;
12745 rtx cmp_op0, cmp_op1;
12746 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12750 cmp_op0 = operands[0];
12751 cmp_op1 = operands[1];
12755 cmp_op0 = operands[1];
12756 cmp_op1 = operands[2];
12761 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12762 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12763 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12764 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12766 if (GET_MODE (operands[0]) == SFmode)
12768 return &ucomiss[TARGET_AVX ? 0 : 1];
12770 return &comiss[TARGET_AVX ? 0 : 1];
12773 return &ucomisd[TARGET_AVX ? 0 : 1];
12775 return &comisd[TARGET_AVX ? 0 : 1];
12778 gcc_assert (STACK_TOP_P (cmp_op0));
12780 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12782 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12784 if (stack_top_dies)
12786 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12787 return output_387_ffreep (operands, 1);
12790 return "ftst\n\tfnstsw\t%0";
12793 if (STACK_REG_P (cmp_op1)
12795 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12796 && REGNO (cmp_op1) != FIRST_STACK_REG)
12798 /* If both the top of the 387 stack dies, and the other operand
12799 is also a stack register that dies, then this must be a
12800 `fcompp' float compare */
12804 /* There is no double popping fcomi variant. Fortunately,
12805 eflags is immune from the fstp's cc clobbering. */
12807 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12809 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12810 return output_387_ffreep (operands, 0);
12815 return "fucompp\n\tfnstsw\t%0";
12817 return "fcompp\n\tfnstsw\t%0";
12822 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12824 static const char * const alt[16] =
12826 "fcom%Z2\t%y2\n\tfnstsw\t%0",
12827 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
12828 "fucom%Z2\t%y2\n\tfnstsw\t%0",
12829 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
12831 "ficom%Z2\t%y2\n\tfnstsw\t%0",
12832 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
12836 "fcomi\t{%y1, %0|%0, %y1}",
12837 "fcomip\t{%y1, %0|%0, %y1}",
12838 "fucomi\t{%y1, %0|%0, %y1}",
12839 "fucomip\t{%y1, %0|%0, %y1}",
12850 mask = eflags_p << 3;
12851 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12852 mask |= unordered_p << 1;
12853 mask |= stack_top_dies;
12855 gcc_assert (mask < 16);
12864 ix86_output_addr_vec_elt (FILE *file, int value)
12866 const char *directive = ASM_LONG;
12870 directive = ASM_QUAD;
12872 gcc_assert (!TARGET_64BIT);
12875 fprintf (file, "%s" LPREFIX "%d\n", directive, value);
12879 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12881 const char *directive = ASM_LONG;
12884 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12885 directive = ASM_QUAD;
12887 gcc_assert (!TARGET_64BIT);
12889 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12890 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12891 fprintf (file, "%s" LPREFIX "%d-" LPREFIX "%d\n",
12892 directive, value, rel);
12893 else if (HAVE_AS_GOTOFF_IN_DATA)
12894 fprintf (file, ASM_LONG LPREFIX "%d@GOTOFF\n", value);
12896 else if (TARGET_MACHO)
12898 fprintf (file, ASM_LONG LPREFIX "%d-", value);
12899 machopic_output_function_base_name (file);
12904 asm_fprintf (file, ASM_LONG "%U%s+[.-" LPREFIX "%d]\n",
12905 GOT_SYMBOL_NAME, value);
12908 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12912 ix86_expand_clear (rtx dest)
12916 /* We play register width games, which are only valid after reload. */
12917 gcc_assert (reload_completed);
12919 /* Avoid HImode and its attendant prefix byte. */
12920 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12921 dest = gen_rtx_REG (SImode, REGNO (dest));
12922 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12924 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12925 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
12927 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12928 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12934 /* X is an unchanging MEM. If it is a constant pool reference, return
12935 the constant pool rtx, else NULL. */
12938 maybe_get_pool_constant (rtx x)
12940 x = ix86_delegitimize_address (XEXP (x, 0));
12942 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12943 return get_pool_constant (x);
12949 ix86_expand_move (enum machine_mode mode, rtx operands[])
12952 enum tls_model model;
12957 if (GET_CODE (op1) == SYMBOL_REF)
12959 model = SYMBOL_REF_TLS_MODEL (op1);
12962 op1 = legitimize_tls_address (op1, model, true);
12963 op1 = force_operand (op1, op0);
12967 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12968 && SYMBOL_REF_DLLIMPORT_P (op1))
12969 op1 = legitimize_dllimport_symbol (op1, false);
12971 else if (GET_CODE (op1) == CONST
12972 && GET_CODE (XEXP (op1, 0)) == PLUS
12973 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12975 rtx addend = XEXP (XEXP (op1, 0), 1);
12976 rtx symbol = XEXP (XEXP (op1, 0), 0);
12979 model = SYMBOL_REF_TLS_MODEL (symbol);
12981 tmp = legitimize_tls_address (symbol, model, true);
12982 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12983 && SYMBOL_REF_DLLIMPORT_P (symbol))
12984 tmp = legitimize_dllimport_symbol (symbol, true);
12988 tmp = force_operand (tmp, NULL);
12989 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12990 op0, 1, OPTAB_DIRECT);
12996 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12998 if (TARGET_MACHO && !TARGET_64BIT)
13003 rtx temp = ((reload_in_progress
13004 || ((op0 && REG_P (op0))
13006 ? op0 : gen_reg_rtx (Pmode));
13007 op1 = machopic_indirect_data_reference (op1, temp);
13008 op1 = machopic_legitimize_pic_address (op1, mode,
13009 temp == op1 ? 0 : temp);
13011 else if (MACHOPIC_INDIRECT)
13012 op1 = machopic_indirect_data_reference (op1, 0);
13020 op1 = force_reg (Pmode, op1);
13021 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
13023 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
13024 op1 = legitimize_pic_address (op1, reg);
13033 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
13034 || !push_operand (op0, mode))
13036 op1 = force_reg (mode, op1);
13038 if (push_operand (op0, mode)
13039 && ! general_no_elim_operand (op1, mode))
13040 op1 = copy_to_mode_reg (mode, op1);
13042 /* Force large constants in 64bit compilation into register
13043 to get them CSEed. */
13044 if (can_create_pseudo_p ()
13045 && (mode == DImode) && TARGET_64BIT
13046 && immediate_operand (op1, mode)
13047 && !x86_64_zext_immediate_operand (op1, VOIDmode)
13048 && !register_operand (op0, mode)
13050 op1 = copy_to_mode_reg (mode, op1);
13052 if (can_create_pseudo_p ()
13053 && FLOAT_MODE_P (mode)
13054 && GET_CODE (op1) == CONST_DOUBLE)
13056 /* If we are loading a floating point constant to a register,
13057 force the value to memory now, since we'll get better code
13058 out the back end. */
13060 op1 = validize_mem (force_const_mem (mode, op1));
13061 if (!register_operand (op0, mode))
13063 rtx temp = gen_reg_rtx (mode);
13064 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
13065 emit_move_insn (op0, temp);
13071 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13075 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
13077 rtx op0 = operands[0], op1 = operands[1];
13078 unsigned int align = GET_MODE_ALIGNMENT (mode);
13080 /* Force constants other than zero into memory. We do not know how
13081 the instructions used to build constants modify the upper 64 bits
13082 of the register, once we have that information we may be able
13083 to handle some of them more efficiently. */
13084 if (can_create_pseudo_p ()
13085 && register_operand (op0, mode)
13086 && (CONSTANT_P (op1)
13087 || (GET_CODE (op1) == SUBREG
13088 && CONSTANT_P (SUBREG_REG (op1))))
13089 && !standard_sse_constant_p (op1))
13090 op1 = validize_mem (force_const_mem (mode, op1));
13092 /* We need to check memory alignment for SSE mode since attribute
13093 can make operands unaligned. */
13094 if (can_create_pseudo_p ()
13095 && SSE_REG_MODE_P (mode)
13096 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
13097 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
13101 /* ix86_expand_vector_move_misalign() does not like constants ... */
13102 if (CONSTANT_P (op1)
13103 || (GET_CODE (op1) == SUBREG
13104 && CONSTANT_P (SUBREG_REG (op1))))
13105 op1 = validize_mem (force_const_mem (mode, op1));
13107 /* ... nor both arguments in memory. */
13108 if (!register_operand (op0, mode)
13109 && !register_operand (op1, mode))
13110 op1 = force_reg (mode, op1);
13112 tmp[0] = op0; tmp[1] = op1;
13113 ix86_expand_vector_move_misalign (mode, tmp);
13117 /* Make operand1 a register if it isn't already. */
13118 if (can_create_pseudo_p ()
13119 && !register_operand (op0, mode)
13120 && !register_operand (op1, mode))
13122 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
13126 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13129 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
13130 straight to ix86_expand_vector_move. */
13131 /* Code generation for scalar reg-reg moves of single and double precision data:
13132 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
13136 if (x86_sse_partial_reg_dependency == true)
13141 Code generation for scalar loads of double precision data:
13142 if (x86_sse_split_regs == true)
13143 movlpd mem, reg (gas syntax)
13147 Code generation for unaligned packed loads of single precision data
13148 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
13149 if (x86_sse_unaligned_move_optimal)
13152 if (x86_sse_partial_reg_dependency == true)
13164 Code generation for unaligned packed loads of double precision data
13165 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
13166 if (x86_sse_unaligned_move_optimal)
13169 if (x86_sse_split_regs == true)
13182 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
13191 switch (GET_MODE_CLASS (mode))
13193 case MODE_VECTOR_INT:
13195 switch (GET_MODE_SIZE (mode))
13198 op0 = gen_lowpart (V16QImode, op0);
13199 op1 = gen_lowpart (V16QImode, op1);
13200 emit_insn (gen_avx_movdqu (op0, op1));
13203 op0 = gen_lowpart (V32QImode, op0);
13204 op1 = gen_lowpart (V32QImode, op1);
13205 emit_insn (gen_avx_movdqu256 (op0, op1));
13208 gcc_unreachable ();
13211 case MODE_VECTOR_FLOAT:
13212 op0 = gen_lowpart (mode, op0);
13213 op1 = gen_lowpart (mode, op1);
13218 emit_insn (gen_avx_movups (op0, op1));
13221 emit_insn (gen_avx_movups256 (op0, op1));
13224 emit_insn (gen_avx_movupd (op0, op1));
13227 emit_insn (gen_avx_movupd256 (op0, op1));
13230 gcc_unreachable ();
13235 gcc_unreachable ();
13243 /* If we're optimizing for size, movups is the smallest. */
13244 if (optimize_insn_for_size_p ())
13246 op0 = gen_lowpart (V4SFmode, op0);
13247 op1 = gen_lowpart (V4SFmode, op1);
13248 emit_insn (gen_sse_movups (op0, op1));
13252 /* ??? If we have typed data, then it would appear that using
13253 movdqu is the only way to get unaligned data loaded with
13255 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13257 op0 = gen_lowpart (V16QImode, op0);
13258 op1 = gen_lowpart (V16QImode, op1);
13259 emit_insn (gen_sse2_movdqu (op0, op1));
13263 if (TARGET_SSE2 && mode == V2DFmode)
13267 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13269 op0 = gen_lowpart (V2DFmode, op0);
13270 op1 = gen_lowpart (V2DFmode, op1);
13271 emit_insn (gen_sse2_movupd (op0, op1));
13275 /* When SSE registers are split into halves, we can avoid
13276 writing to the top half twice. */
13277 if (TARGET_SSE_SPLIT_REGS)
13279 emit_clobber (op0);
13284 /* ??? Not sure about the best option for the Intel chips.
13285 The following would seem to satisfy; the register is
13286 entirely cleared, breaking the dependency chain. We
13287 then store to the upper half, with a dependency depth
13288 of one. A rumor has it that Intel recommends two movsd
13289 followed by an unpacklpd, but this is unconfirmed. And
13290 given that the dependency depth of the unpacklpd would
13291 still be one, I'm not sure why this would be better. */
13292 zero = CONST0_RTX (V2DFmode);
13295 m = adjust_address (op1, DFmode, 0);
13296 emit_insn (gen_sse2_loadlpd (op0, zero, m));
13297 m = adjust_address (op1, DFmode, 8);
13298 emit_insn (gen_sse2_loadhpd (op0, op0, m));
13302 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
13304 op0 = gen_lowpart (V4SFmode, op0);
13305 op1 = gen_lowpart (V4SFmode, op1);
13306 emit_insn (gen_sse_movups (op0, op1));
13310 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
13311 emit_move_insn (op0, CONST0_RTX (mode));
13313 emit_clobber (op0);
13315 if (mode != V4SFmode)
13316 op0 = gen_lowpart (V4SFmode, op0);
13317 m = adjust_address (op1, V2SFmode, 0);
13318 emit_insn (gen_sse_loadlps (op0, op0, m));
13319 m = adjust_address (op1, V2SFmode, 8);
13320 emit_insn (gen_sse_loadhps (op0, op0, m));
13323 else if (MEM_P (op0))
13325 /* If we're optimizing for size, movups is the smallest. */
13326 if (optimize_insn_for_size_p ())
13328 op0 = gen_lowpart (V4SFmode, op0);
13329 op1 = gen_lowpart (V4SFmode, op1);
13330 emit_insn (gen_sse_movups (op0, op1));
13334 /* ??? Similar to above, only less clear because of quote
13335 typeless stores unquote. */
13336 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
13337 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13339 op0 = gen_lowpart (V16QImode, op0);
13340 op1 = gen_lowpart (V16QImode, op1);
13341 emit_insn (gen_sse2_movdqu (op0, op1));
13345 if (TARGET_SSE2 && mode == V2DFmode)
13347 m = adjust_address (op0, DFmode, 0);
13348 emit_insn (gen_sse2_storelpd (m, op1));
13349 m = adjust_address (op0, DFmode, 8);
13350 emit_insn (gen_sse2_storehpd (m, op1));
13354 if (mode != V4SFmode)
13355 op1 = gen_lowpart (V4SFmode, op1);
13356 m = adjust_address (op0, V2SFmode, 0);
13357 emit_insn (gen_sse_storelps (m, op1));
13358 m = adjust_address (op0, V2SFmode, 8);
13359 emit_insn (gen_sse_storehps (m, op1));
13363 gcc_unreachable ();
13366 /* Expand a push in MODE. This is some mode for which we do not support
13367 proper push instructions, at least from the registers that we expect
13368 the value to live in. */
13371 ix86_expand_push (enum machine_mode mode, rtx x)
13375 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
13376 GEN_INT (-GET_MODE_SIZE (mode)),
13377 stack_pointer_rtx, 1, OPTAB_DIRECT);
13378 if (tmp != stack_pointer_rtx)
13379 emit_move_insn (stack_pointer_rtx, tmp);
13381 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
13383 /* When we push an operand onto stack, it has to be aligned at least
13384 at the function argument boundary. However since we don't have
13385 the argument type, we can't determine the actual argument
13387 emit_move_insn (tmp, x);
13390 /* Helper function of ix86_fixup_binary_operands to canonicalize
13391 operand order. Returns true if the operands should be swapped. */
13394 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
13397 rtx dst = operands[0];
13398 rtx src1 = operands[1];
13399 rtx src2 = operands[2];
13401 /* If the operation is not commutative, we can't do anything. */
13402 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
13405 /* Highest priority is that src1 should match dst. */
13406 if (rtx_equal_p (dst, src1))
13408 if (rtx_equal_p (dst, src2))
13411 /* Next highest priority is that immediate constants come second. */
13412 if (immediate_operand (src2, mode))
13414 if (immediate_operand (src1, mode))
13417 /* Lowest priority is that memory references should come second. */
13427 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
13428 destination to use for the operation. If different from the true
13429 destination in operands[0], a copy operation will be required. */
13432 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
13435 rtx dst = operands[0];
13436 rtx src1 = operands[1];
13437 rtx src2 = operands[2];
13439 /* Canonicalize operand order. */
13440 if (ix86_swap_binary_operands_p (code, mode, operands))
13444 /* It is invalid to swap operands of different modes. */
13445 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
13452 /* Both source operands cannot be in memory. */
13453 if (MEM_P (src1) && MEM_P (src2))
13455 /* Optimization: Only read from memory once. */
13456 if (rtx_equal_p (src1, src2))
13458 src2 = force_reg (mode, src2);
13462 src2 = force_reg (mode, src2);
13465 /* If the destination is memory, and we do not have matching source
13466 operands, do things in registers. */
13467 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13468 dst = gen_reg_rtx (mode);
13470 /* Source 1 cannot be a constant. */
13471 if (CONSTANT_P (src1))
13472 src1 = force_reg (mode, src1);
13474 /* Source 1 cannot be a non-matching memory. */
13475 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13476 src1 = force_reg (mode, src1);
13478 operands[1] = src1;
13479 operands[2] = src2;
13483 /* Similarly, but assume that the destination has already been
13484 set up properly. */
13487 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
13488 enum machine_mode mode, rtx operands[])
13490 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
13491 gcc_assert (dst == operands[0]);
13494 /* Attempt to expand a binary operator. Make the expansion closer to the
13495 actual machine, then just general_operand, which will allow 3 separate
13496 memory references (one output, two input) in a single insn. */
13499 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
13502 rtx src1, src2, dst, op, clob;
13504 dst = ix86_fixup_binary_operands (code, mode, operands);
13505 src1 = operands[1];
13506 src2 = operands[2];
13508 /* Emit the instruction. */
13510 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
13511 if (reload_in_progress)
13513 /* Reload doesn't know about the flags register, and doesn't know that
13514 it doesn't want to clobber it. We can only do this with PLUS. */
13515 gcc_assert (code == PLUS);
13520 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13521 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13524 /* Fix up the destination if needed. */
13525 if (dst != operands[0])
13526 emit_move_insn (operands[0], dst);
13529 /* Return TRUE or FALSE depending on whether the binary operator meets the
13530 appropriate constraints. */
13533 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13536 rtx dst = operands[0];
13537 rtx src1 = operands[1];
13538 rtx src2 = operands[2];
13540 /* Both source operands cannot be in memory. */
13541 if (MEM_P (src1) && MEM_P (src2))
13544 /* Canonicalize operand order for commutative operators. */
13545 if (ix86_swap_binary_operands_p (code, mode, operands))
13552 /* If the destination is memory, we must have a matching source operand. */
13553 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13556 /* Source 1 cannot be a constant. */
13557 if (CONSTANT_P (src1))
13560 /* Source 1 cannot be a non-matching memory. */
13561 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13567 /* Attempt to expand a unary operator. Make the expansion closer to the
13568 actual machine, then just general_operand, which will allow 2 separate
13569 memory references (one output, one input) in a single insn. */
13572 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13575 int matching_memory;
13576 rtx src, dst, op, clob;
13581 /* If the destination is memory, and we do not have matching source
13582 operands, do things in registers. */
13583 matching_memory = 0;
13586 if (rtx_equal_p (dst, src))
13587 matching_memory = 1;
13589 dst = gen_reg_rtx (mode);
13592 /* When source operand is memory, destination must match. */
13593 if (MEM_P (src) && !matching_memory)
13594 src = force_reg (mode, src);
13596 /* Emit the instruction. */
13598 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13599 if (reload_in_progress || code == NOT)
13601 /* Reload doesn't know about the flags register, and doesn't know that
13602 it doesn't want to clobber it. */
13603 gcc_assert (code == NOT);
13608 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13609 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13612 /* Fix up the destination if needed. */
13613 if (dst != operands[0])
13614 emit_move_insn (operands[0], dst);
13617 #define LEA_SEARCH_THRESHOLD 12
13619 /* Search backward for non-agu definition of register number REGNO1
13620 or register number REGNO2 in INSN's basic block until
13621 1. Pass LEA_SEARCH_THRESHOLD instructions, or
13622 2. Reach BB boundary, or
13623 3. Reach agu definition.
13624 Returns the distance between the non-agu definition point and INSN.
13625 If no definition point, returns -1. */
13628 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
13631 basic_block bb = BLOCK_FOR_INSN (insn);
13634 enum attr_type insn_type;
13636 if (insn != BB_HEAD (bb))
13638 rtx prev = PREV_INSN (insn);
13639 while (prev && distance < LEA_SEARCH_THRESHOLD)
13644 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13645 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13646 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13647 && (regno1 == DF_REF_REGNO (*def_rec)
13648 || regno2 == DF_REF_REGNO (*def_rec)))
13650 insn_type = get_attr_type (prev);
13651 if (insn_type != TYPE_LEA)
13655 if (prev == BB_HEAD (bb))
13657 prev = PREV_INSN (prev);
13661 if (distance < LEA_SEARCH_THRESHOLD)
13665 bool simple_loop = false;
13667 FOR_EACH_EDGE (e, ei, bb->preds)
13670 simple_loop = true;
13676 rtx prev = BB_END (bb);
13679 && distance < LEA_SEARCH_THRESHOLD)
13684 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
13685 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13686 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13687 && (regno1 == DF_REF_REGNO (*def_rec)
13688 || regno2 == DF_REF_REGNO (*def_rec)))
13690 insn_type = get_attr_type (prev);
13691 if (insn_type != TYPE_LEA)
13695 prev = PREV_INSN (prev);
13703 /* get_attr_type may modify recog data. We want to make sure
13704 that recog data is valid for instruction INSN, on which
13705 distance_non_agu_define is called. INSN is unchanged here. */
13706 extract_insn_cached (insn);
13710 /* Return the distance between INSN and the next insn that uses
13711 register number REGNO0 in memory address. Return -1 if no such
13712 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
13715 distance_agu_use (unsigned int regno0, rtx insn)
13717 basic_block bb = BLOCK_FOR_INSN (insn);
13722 if (insn != BB_END (bb))
13724 rtx next = NEXT_INSN (insn);
13725 while (next && distance < LEA_SEARCH_THRESHOLD)
13731 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13732 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13733 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13734 && regno0 == DF_REF_REGNO (*use_rec))
13736 /* Return DISTANCE if OP0 is used in memory
13737 address in NEXT. */
13741 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13742 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13743 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13744 && regno0 == DF_REF_REGNO (*def_rec))
13746 /* Return -1 if OP0 is set in NEXT. */
13750 if (next == BB_END (bb))
13752 next = NEXT_INSN (next);
13756 if (distance < LEA_SEARCH_THRESHOLD)
13760 bool simple_loop = false;
13762 FOR_EACH_EDGE (e, ei, bb->succs)
13765 simple_loop = true;
13771 rtx next = BB_HEAD (bb);
13774 && distance < LEA_SEARCH_THRESHOLD)
13780 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
13781 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
13782 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
13783 && regno0 == DF_REF_REGNO (*use_rec))
13785 /* Return DISTANCE if OP0 is used in memory
13786 address in NEXT. */
13790 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
13791 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
13792 && !DF_REF_IS_ARTIFICIAL (*def_rec)
13793 && regno0 == DF_REF_REGNO (*def_rec))
13795 /* Return -1 if OP0 is set in NEXT. */
13800 next = NEXT_INSN (next);
13808 /* Define this macro to tune LEA priority vs ADD, it take effect when
13809 there is a dilemma of choicing LEA or ADD
13810 Negative value: ADD is more preferred than LEA
13812 Positive value: LEA is more preferred than ADD*/
13813 #define IX86_LEA_PRIORITY 2
13815 /* Return true if it is ok to optimize an ADD operation to LEA
13816 operation to avoid flag register consumation. For the processors
13817 like ATOM, if the destination register of LEA holds an actual
13818 address which will be used soon, LEA is better and otherwise ADD
13822 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13823 rtx insn, rtx operands[])
13825 unsigned int regno0 = true_regnum (operands[0]);
13826 unsigned int regno1 = true_regnum (operands[1]);
13827 unsigned int regno2;
13829 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
13830 return regno0 != regno1;
13832 regno2 = true_regnum (operands[2]);
13834 /* If a = b + c, (a!=b && a!=c), must use lea form. */
13835 if (regno0 != regno1 && regno0 != regno2)
13839 int dist_define, dist_use;
13840 dist_define = distance_non_agu_define (regno1, regno2, insn);
13841 if (dist_define <= 0)
13844 /* If this insn has both backward non-agu dependence and forward
13845 agu dependence, the one with short distance take effect. */
13846 dist_use = distance_agu_use (regno0, insn);
13848 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
13855 /* Return true if destination reg of SET_BODY is shift count of
13859 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
13865 /* Retrieve destination of SET_BODY. */
13866 switch (GET_CODE (set_body))
13869 set_dest = SET_DEST (set_body);
13870 if (!set_dest || !REG_P (set_dest))
13874 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
13875 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
13883 /* Retrieve shift count of USE_BODY. */
13884 switch (GET_CODE (use_body))
13887 shift_rtx = XEXP (use_body, 1);
13890 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
13891 if (ix86_dep_by_shift_count_body (set_body,
13892 XVECEXP (use_body, 0, i)))
13900 && (GET_CODE (shift_rtx) == ASHIFT
13901 || GET_CODE (shift_rtx) == LSHIFTRT
13902 || GET_CODE (shift_rtx) == ASHIFTRT
13903 || GET_CODE (shift_rtx) == ROTATE
13904 || GET_CODE (shift_rtx) == ROTATERT))
13906 rtx shift_count = XEXP (shift_rtx, 1);
13908 /* Return true if shift count is dest of SET_BODY. */
13909 if (REG_P (shift_count)
13910 && true_regnum (set_dest) == true_regnum (shift_count))
13917 /* Return true if destination reg of SET_INSN is shift count of
13921 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
13923 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
13924 PATTERN (use_insn));
13927 /* Return TRUE or FALSE depending on whether the unary operator meets the
13928 appropriate constraints. */
13931 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13932 enum machine_mode mode ATTRIBUTE_UNUSED,
13933 rtx operands[2] ATTRIBUTE_UNUSED)
13935 /* If one of operands is memory, source and destination must match. */
13936 if ((MEM_P (operands[0])
13937 || MEM_P (operands[1]))
13938 && ! rtx_equal_p (operands[0], operands[1]))
13943 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
13944 are ok, keeping in mind the possible movddup alternative. */
13947 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
13949 if (MEM_P (operands[0]))
13950 return rtx_equal_p (operands[0], operands[1 + high]);
13951 if (MEM_P (operands[1]) && MEM_P (operands[2]))
13952 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
13956 /* Post-reload splitter for converting an SF or DFmode value in an
13957 SSE register into an unsigned SImode. */
13960 ix86_split_convert_uns_si_sse (rtx operands[])
13962 enum machine_mode vecmode;
13963 rtx value, large, zero_or_two31, input, two31, x;
13965 large = operands[1];
13966 zero_or_two31 = operands[2];
13967 input = operands[3];
13968 two31 = operands[4];
13969 vecmode = GET_MODE (large);
13970 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13972 /* Load up the value into the low element. We must ensure that the other
13973 elements are valid floats -- zero is the easiest such value. */
13976 if (vecmode == V4SFmode)
13977 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13979 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13983 input = gen_rtx_REG (vecmode, REGNO (input));
13984 emit_move_insn (value, CONST0_RTX (vecmode));
13985 if (vecmode == V4SFmode)
13986 emit_insn (gen_sse_movss (value, value, input));
13988 emit_insn (gen_sse2_movsd (value, value, input));
13991 emit_move_insn (large, two31);
13992 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13994 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13995 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13997 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13998 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
14000 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
14001 emit_insn (gen_rtx_SET (VOIDmode, value, x));
14003 large = gen_rtx_REG (V4SImode, REGNO (large));
14004 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
14006 x = gen_rtx_REG (V4SImode, REGNO (value));
14007 if (vecmode == V4SFmode)
14008 emit_insn (gen_sse2_cvttps2dq (x, value));
14010 emit_insn (gen_sse2_cvttpd2dq (x, value));
14013 emit_insn (gen_xorv4si3 (value, value, large));
14016 /* Convert an unsigned DImode value into a DFmode, using only SSE.
14017 Expects the 64-bit DImode to be supplied in a pair of integral
14018 registers. Requires SSE2; will use SSE3 if available. For x86_32,
14019 -mfpmath=sse, !optimize_size only. */
14022 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
14024 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
14025 rtx int_xmm, fp_xmm;
14026 rtx biases, exponents;
14029 int_xmm = gen_reg_rtx (V4SImode);
14030 if (TARGET_INTER_UNIT_MOVES)
14031 emit_insn (gen_movdi_to_sse (int_xmm, input));
14032 else if (TARGET_SSE_SPLIT_REGS)
14034 emit_clobber (int_xmm);
14035 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
14039 x = gen_reg_rtx (V2DImode);
14040 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
14041 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
14044 x = gen_rtx_CONST_VECTOR (V4SImode,
14045 gen_rtvec (4, GEN_INT (0x43300000UL),
14046 GEN_INT (0x45300000UL),
14047 const0_rtx, const0_rtx));
14048 exponents = validize_mem (force_const_mem (V4SImode, x));
14050 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
14051 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
14053 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
14054 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
14055 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
14056 (0x1.0p84 + double(fp_value_hi_xmm)).
14057 Note these exponents differ by 32. */
14059 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
14061 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
14062 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
14063 real_ldexp (&bias_lo_rvt, &dconst1, 52);
14064 real_ldexp (&bias_hi_rvt, &dconst1, 84);
14065 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
14066 x = const_double_from_real_value (bias_hi_rvt, DFmode);
14067 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
14068 biases = validize_mem (force_const_mem (V2DFmode, biases));
14069 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
14071 /* Add the upper and lower DFmode values together. */
14073 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
14076 x = copy_to_mode_reg (V2DFmode, fp_xmm);
14077 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
14078 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
14081 ix86_expand_vector_extract (false, target, fp_xmm, 0);
14084 /* Not used, but eases macroization of patterns. */
14086 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
14087 rtx input ATTRIBUTE_UNUSED)
14089 gcc_unreachable ();
14092 /* Convert an unsigned SImode value into a DFmode. Only currently used
14093 for SSE, but applicable anywhere. */
14096 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
14098 REAL_VALUE_TYPE TWO31r;
14101 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
14102 NULL, 1, OPTAB_DIRECT);
14104 fp = gen_reg_rtx (DFmode);
14105 emit_insn (gen_floatsidf2 (fp, x));
14107 real_ldexp (&TWO31r, &dconst1, 31);
14108 x = const_double_from_real_value (TWO31r, DFmode);
14110 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
14112 emit_move_insn (target, x);
14115 /* Convert a signed DImode value into a DFmode. Only used for SSE in
14116 32-bit mode; otherwise we have a direct convert instruction. */
14119 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
14121 REAL_VALUE_TYPE TWO32r;
14122 rtx fp_lo, fp_hi, x;
14124 fp_lo = gen_reg_rtx (DFmode);
14125 fp_hi = gen_reg_rtx (DFmode);
14127 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
14129 real_ldexp (&TWO32r, &dconst1, 32);
14130 x = const_double_from_real_value (TWO32r, DFmode);
14131 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
14133 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
14135 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
14138 emit_move_insn (target, x);
14141 /* Convert an unsigned SImode value into a SFmode, using only SSE.
14142 For x86_32, -mfpmath=sse, !optimize_size only. */
14144 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
14146 REAL_VALUE_TYPE ONE16r;
14147 rtx fp_hi, fp_lo, int_hi, int_lo, x;
14149 real_ldexp (&ONE16r, &dconst1, 16);
14150 x = const_double_from_real_value (ONE16r, SFmode);
14151 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
14152 NULL, 0, OPTAB_DIRECT);
14153 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
14154 NULL, 0, OPTAB_DIRECT);
14155 fp_hi = gen_reg_rtx (SFmode);
14156 fp_lo = gen_reg_rtx (SFmode);
14157 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
14158 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
14159 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
14161 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
14163 if (!rtx_equal_p (target, fp_hi))
14164 emit_move_insn (target, fp_hi);
14167 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
14168 then replicate the value for all elements of the vector
14172 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
14179 v = gen_rtvec (4, value, value, value, value);
14180 return gen_rtx_CONST_VECTOR (V4SImode, v);
14184 v = gen_rtvec (2, value, value);
14185 return gen_rtx_CONST_VECTOR (V2DImode, v);
14189 v = gen_rtvec (4, value, value, value, value);
14191 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
14192 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
14193 return gen_rtx_CONST_VECTOR (V4SFmode, v);
14197 v = gen_rtvec (2, value, value);
14199 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
14200 return gen_rtx_CONST_VECTOR (V2DFmode, v);
14203 gcc_unreachable ();
14207 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
14208 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
14209 for an SSE register. If VECT is true, then replicate the mask for
14210 all elements of the vector register. If INVERT is true, then create
14211 a mask excluding the sign bit. */
14214 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
14216 enum machine_mode vec_mode, imode;
14217 HOST_WIDE_INT hi, lo;
14222 /* Find the sign bit, sign extended to 2*HWI. */
14228 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
14229 lo = 0x80000000, hi = lo < 0;
14235 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
14236 if (HOST_BITS_PER_WIDE_INT >= 64)
14237 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
14239 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
14244 vec_mode = VOIDmode;
14245 if (HOST_BITS_PER_WIDE_INT >= 64)
14248 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
14255 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
14259 lo = ~lo, hi = ~hi;
14265 mask = immed_double_const (lo, hi, imode);
14267 vec = gen_rtvec (2, v, mask);
14268 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
14269 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
14276 gcc_unreachable ();
14280 lo = ~lo, hi = ~hi;
14282 /* Force this value into the low part of a fp vector constant. */
14283 mask = immed_double_const (lo, hi, imode);
14284 mask = gen_lowpart (mode, mask);
14286 if (vec_mode == VOIDmode)
14287 return force_reg (mode, mask);
14289 v = ix86_build_const_vector (mode, vect, mask);
14290 return force_reg (vec_mode, v);
14293 /* Generate code for floating point ABS or NEG. */
14296 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
14299 rtx mask, set, use, clob, dst, src;
14300 bool use_sse = false;
14301 bool vector_mode = VECTOR_MODE_P (mode);
14302 enum machine_mode elt_mode = mode;
14306 elt_mode = GET_MODE_INNER (mode);
14309 else if (mode == TFmode)
14311 else if (TARGET_SSE_MATH)
14312 use_sse = SSE_FLOAT_MODE_P (mode);
14314 /* NEG and ABS performed with SSE use bitwise mask operations.
14315 Create the appropriate mask now. */
14317 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
14326 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
14327 set = gen_rtx_SET (VOIDmode, dst, set);
14332 set = gen_rtx_fmt_e (code, mode, src);
14333 set = gen_rtx_SET (VOIDmode, dst, set);
14336 use = gen_rtx_USE (VOIDmode, mask);
14337 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14338 emit_insn (gen_rtx_PARALLEL (VOIDmode,
14339 gen_rtvec (3, set, use, clob)));
14346 /* Expand a copysign operation. Special case operand 0 being a constant. */
14349 ix86_expand_copysign (rtx operands[])
14351 enum machine_mode mode;
14352 rtx dest, op0, op1, mask, nmask;
14354 dest = operands[0];
14358 mode = GET_MODE (dest);
14360 if (GET_CODE (op0) == CONST_DOUBLE)
14362 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
14364 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
14365 op0 = simplify_unary_operation (ABS, mode, op0, mode);
14367 if (mode == SFmode || mode == DFmode)
14369 enum machine_mode vmode;
14371 vmode = mode == SFmode ? V4SFmode : V2DFmode;
14373 if (op0 == CONST0_RTX (mode))
14374 op0 = CONST0_RTX (vmode);
14377 rtx v = ix86_build_const_vector (mode, false, op0);
14379 op0 = force_reg (vmode, v);
14382 else if (op0 != CONST0_RTX (mode))
14383 op0 = force_reg (mode, op0);
14385 mask = ix86_build_signbit_mask (mode, 0, 0);
14387 if (mode == SFmode)
14388 copysign_insn = gen_copysignsf3_const;
14389 else if (mode == DFmode)
14390 copysign_insn = gen_copysigndf3_const;
14392 copysign_insn = gen_copysigntf3_const;
14394 emit_insn (copysign_insn (dest, op0, op1, mask));
14398 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
14400 nmask = ix86_build_signbit_mask (mode, 0, 1);
14401 mask = ix86_build_signbit_mask (mode, 0, 0);
14403 if (mode == SFmode)
14404 copysign_insn = gen_copysignsf3_var;
14405 else if (mode == DFmode)
14406 copysign_insn = gen_copysigndf3_var;
14408 copysign_insn = gen_copysigntf3_var;
14410 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
14414 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
14415 be a constant, and so has already been expanded into a vector constant. */
14418 ix86_split_copysign_const (rtx operands[])
14420 enum machine_mode mode, vmode;
14421 rtx dest, op0, mask, x;
14423 dest = operands[0];
14425 mask = operands[3];
14427 mode = GET_MODE (dest);
14428 vmode = GET_MODE (mask);
14430 dest = simplify_gen_subreg (vmode, dest, mode, 0);
14431 x = gen_rtx_AND (vmode, dest, mask);
14432 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14434 if (op0 != CONST0_RTX (vmode))
14436 x = gen_rtx_IOR (vmode, dest, op0);
14437 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14441 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
14442 so we have to do two masks. */
14445 ix86_split_copysign_var (rtx operands[])
14447 enum machine_mode mode, vmode;
14448 rtx dest, scratch, op0, op1, mask, nmask, x;
14450 dest = operands[0];
14451 scratch = operands[1];
14454 nmask = operands[4];
14455 mask = operands[5];
14457 mode = GET_MODE (dest);
14458 vmode = GET_MODE (mask);
14460 if (rtx_equal_p (op0, op1))
14462 /* Shouldn't happen often (it's useless, obviously), but when it does
14463 we'd generate incorrect code if we continue below. */
14464 emit_move_insn (dest, op0);
14468 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
14470 gcc_assert (REGNO (op1) == REGNO (scratch));
14472 x = gen_rtx_AND (vmode, scratch, mask);
14473 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14476 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14477 x = gen_rtx_NOT (vmode, dest);
14478 x = gen_rtx_AND (vmode, x, op0);
14479 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14483 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
14485 x = gen_rtx_AND (vmode, scratch, mask);
14487 else /* alternative 2,4 */
14489 gcc_assert (REGNO (mask) == REGNO (scratch));
14490 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
14491 x = gen_rtx_AND (vmode, scratch, op1);
14493 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
14495 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
14497 dest = simplify_gen_subreg (vmode, op0, mode, 0);
14498 x = gen_rtx_AND (vmode, dest, nmask);
14500 else /* alternative 3,4 */
14502 gcc_assert (REGNO (nmask) == REGNO (dest));
14504 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
14505 x = gen_rtx_AND (vmode, dest, op0);
14507 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14510 x = gen_rtx_IOR (vmode, dest, scratch);
14511 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
14514 /* Return TRUE or FALSE depending on whether the first SET in INSN
14515 has source and destination with matching CC modes, and that the
14516 CC mode is at least as constrained as REQ_MODE. */
14519 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
14522 enum machine_mode set_mode;
14524 set = PATTERN (insn);
14525 if (GET_CODE (set) == PARALLEL)
14526 set = XVECEXP (set, 0, 0);
14527 gcc_assert (GET_CODE (set) == SET);
14528 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
14530 set_mode = GET_MODE (SET_DEST (set));
14534 if (req_mode != CCNOmode
14535 && (req_mode != CCmode
14536 || XEXP (SET_SRC (set), 1) != const0_rtx))
14540 if (req_mode == CCGCmode)
14544 if (req_mode == CCGOCmode || req_mode == CCNOmode)
14548 if (req_mode == CCZmode)
14559 gcc_unreachable ();
14562 return (GET_MODE (SET_SRC (set)) == set_mode);
14565 /* Generate insn patterns to do an integer compare of OPERANDS. */
14568 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
14570 enum machine_mode cmpmode;
14573 cmpmode = SELECT_CC_MODE (code, op0, op1);
14574 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
14576 /* This is very simple, but making the interface the same as in the
14577 FP case makes the rest of the code easier. */
14578 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
14579 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
14581 /* Return the test that should be put into the flags user, i.e.
14582 the bcc, scc, or cmov instruction. */
14583 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
14586 /* Figure out whether to use ordered or unordered fp comparisons.
14587 Return the appropriate mode to use. */
14590 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
14592 /* ??? In order to make all comparisons reversible, we do all comparisons
14593 non-trapping when compiling for IEEE. Once gcc is able to distinguish
14594 all forms trapping and nontrapping comparisons, we can make inequality
14595 comparisons trapping again, since it results in better code when using
14596 FCOM based compares. */
14597 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
14601 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
14603 enum machine_mode mode = GET_MODE (op0);
14605 if (SCALAR_FLOAT_MODE_P (mode))
14607 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14608 return ix86_fp_compare_mode (code);
14613 /* Only zero flag is needed. */
14614 case EQ: /* ZF=0 */
14615 case NE: /* ZF!=0 */
14617 /* Codes needing carry flag. */
14618 case GEU: /* CF=0 */
14619 case LTU: /* CF=1 */
14620 /* Detect overflow checks. They need just the carry flag. */
14621 if (GET_CODE (op0) == PLUS
14622 && rtx_equal_p (op1, XEXP (op0, 0)))
14626 case GTU: /* CF=0 & ZF=0 */
14627 case LEU: /* CF=1 | ZF=1 */
14628 /* Detect overflow checks. They need just the carry flag. */
14629 if (GET_CODE (op0) == MINUS
14630 && rtx_equal_p (op1, XEXP (op0, 0)))
14634 /* Codes possibly doable only with sign flag when
14635 comparing against zero. */
14636 case GE: /* SF=OF or SF=0 */
14637 case LT: /* SF<>OF or SF=1 */
14638 if (op1 == const0_rtx)
14641 /* For other cases Carry flag is not required. */
14643 /* Codes doable only with sign flag when comparing
14644 against zero, but we miss jump instruction for it
14645 so we need to use relational tests against overflow
14646 that thus needs to be zero. */
14647 case GT: /* ZF=0 & SF=OF */
14648 case LE: /* ZF=1 | SF<>OF */
14649 if (op1 == const0_rtx)
14653 /* strcmp pattern do (use flags) and combine may ask us for proper
14658 gcc_unreachable ();
14662 /* Return the fixed registers used for condition codes. */
14665 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
14672 /* If two condition code modes are compatible, return a condition code
14673 mode which is compatible with both. Otherwise, return
14676 static enum machine_mode
14677 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
14682 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
14685 if ((m1 == CCGCmode && m2 == CCGOCmode)
14686 || (m1 == CCGOCmode && m2 == CCGCmode))
14692 gcc_unreachable ();
14722 /* These are only compatible with themselves, which we already
14729 /* Return a comparison we can do and that it is equivalent to
14730 swap_condition (code) apart possibly from orderedness.
14731 But, never change orderedness if TARGET_IEEE_FP, returning
14732 UNKNOWN in that case if necessary. */
14734 static enum rtx_code
14735 ix86_fp_swap_condition (enum rtx_code code)
14739 case GT: /* GTU - CF=0 & ZF=0 */
14740 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
14741 case GE: /* GEU - CF=0 */
14742 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
14743 case UNLT: /* LTU - CF=1 */
14744 return TARGET_IEEE_FP ? UNKNOWN : GT;
14745 case UNLE: /* LEU - CF=1 | ZF=1 */
14746 return TARGET_IEEE_FP ? UNKNOWN : GE;
14748 return swap_condition (code);
14752 /* Return cost of comparison CODE using the best strategy for performance.
14753 All following functions do use number of instructions as a cost metrics.
14754 In future this should be tweaked to compute bytes for optimize_size and
14755 take into account performance of various instructions on various CPUs. */
14758 ix86_fp_comparison_cost (enum rtx_code code)
14762 /* The cost of code using bit-twiddling on %ah. */
14779 arith_cost = TARGET_IEEE_FP ? 5 : 4;
14783 arith_cost = TARGET_IEEE_FP ? 6 : 4;
14786 gcc_unreachable ();
14789 switch (ix86_fp_comparison_strategy (code))
14791 case IX86_FPCMP_COMI:
14792 return arith_cost > 4 ? 3 : 2;
14793 case IX86_FPCMP_SAHF:
14794 return arith_cost > 4 ? 4 : 3;
14800 /* Return strategy to use for floating-point. We assume that fcomi is always
14801 preferrable where available, since that is also true when looking at size
14802 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
14804 enum ix86_fpcmp_strategy
14805 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
14807 /* Do fcomi/sahf based test when profitable. */
14810 return IX86_FPCMP_COMI;
14812 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
14813 return IX86_FPCMP_SAHF;
14815 return IX86_FPCMP_ARITH;
14818 /* Swap, force into registers, or otherwise massage the two operands
14819 to a fp comparison. The operands are updated in place; the new
14820 comparison code is returned. */
14822 static enum rtx_code
14823 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14825 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14826 rtx op0 = *pop0, op1 = *pop1;
14827 enum machine_mode op_mode = GET_MODE (op0);
14828 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14830 /* All of the unordered compare instructions only work on registers.
14831 The same is true of the fcomi compare instructions. The XFmode
14832 compare instructions require registers except when comparing
14833 against zero or when converting operand 1 from fixed point to
14837 && (fpcmp_mode == CCFPUmode
14838 || (op_mode == XFmode
14839 && ! (standard_80387_constant_p (op0) == 1
14840 || standard_80387_constant_p (op1) == 1)
14841 && GET_CODE (op1) != FLOAT)
14842 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
14844 op0 = force_reg (op_mode, op0);
14845 op1 = force_reg (op_mode, op1);
14849 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14850 things around if they appear profitable, otherwise force op0
14851 into a register. */
14853 if (standard_80387_constant_p (op0) == 0
14855 && ! (standard_80387_constant_p (op1) == 0
14858 enum rtx_code new_code = ix86_fp_swap_condition (code);
14859 if (new_code != UNKNOWN)
14862 tmp = op0, op0 = op1, op1 = tmp;
14868 op0 = force_reg (op_mode, op0);
14870 if (CONSTANT_P (op1))
14872 int tmp = standard_80387_constant_p (op1);
14874 op1 = validize_mem (force_const_mem (op_mode, op1));
14878 op1 = force_reg (op_mode, op1);
14881 op1 = force_reg (op_mode, op1);
14885 /* Try to rearrange the comparison to make it cheaper. */
14886 if (ix86_fp_comparison_cost (code)
14887 > ix86_fp_comparison_cost (swap_condition (code))
14888 && (REG_P (op1) || can_create_pseudo_p ()))
14891 tmp = op0, op0 = op1, op1 = tmp;
14892 code = swap_condition (code);
14894 op0 = force_reg (op_mode, op0);
14902 /* Convert comparison codes we use to represent FP comparison to integer
14903 code that will result in proper branch. Return UNKNOWN if no such code
14907 ix86_fp_compare_code_to_integer (enum rtx_code code)
14936 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14939 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
14941 enum machine_mode fpcmp_mode, intcmp_mode;
14944 fpcmp_mode = ix86_fp_compare_mode (code);
14945 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14947 /* Do fcomi/sahf based test when profitable. */
14948 switch (ix86_fp_comparison_strategy (code))
14950 case IX86_FPCMP_COMI:
14951 intcmp_mode = fpcmp_mode;
14952 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14953 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14958 case IX86_FPCMP_SAHF:
14959 intcmp_mode = fpcmp_mode;
14960 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14961 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14965 scratch = gen_reg_rtx (HImode);
14966 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14967 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14970 case IX86_FPCMP_ARITH:
14971 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14972 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14973 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14975 scratch = gen_reg_rtx (HImode);
14976 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14978 /* In the unordered case, we have to check C2 for NaN's, which
14979 doesn't happen to work out to anything nice combination-wise.
14980 So do some bit twiddling on the value we've got in AH to come
14981 up with an appropriate set of condition codes. */
14983 intcmp_mode = CCNOmode;
14988 if (code == GT || !TARGET_IEEE_FP)
14990 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14995 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14996 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14997 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14998 intcmp_mode = CCmode;
15004 if (code == LT && TARGET_IEEE_FP)
15006 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15007 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
15008 intcmp_mode = CCmode;
15013 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
15019 if (code == GE || !TARGET_IEEE_FP)
15021 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
15026 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15027 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
15033 if (code == LE && TARGET_IEEE_FP)
15035 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15036 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15037 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15038 intcmp_mode = CCmode;
15043 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15049 if (code == EQ && TARGET_IEEE_FP)
15051 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15052 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15053 intcmp_mode = CCmode;
15058 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15064 if (code == NE && TARGET_IEEE_FP)
15066 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15067 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
15073 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15079 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15083 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15088 gcc_unreachable ();
15096 /* Return the test that should be put into the flags user, i.e.
15097 the bcc, scc, or cmov instruction. */
15098 return gen_rtx_fmt_ee (code, VOIDmode,
15099 gen_rtx_REG (intcmp_mode, FLAGS_REG),
15104 ix86_expand_compare (enum rtx_code code)
15107 op0 = ix86_compare_op0;
15108 op1 = ix86_compare_op1;
15110 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC)
15111 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_op0, ix86_compare_op1);
15113 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
15115 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
15116 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15119 ret = ix86_expand_int_compare (code, op0, op1);
15125 ix86_expand_branch (enum rtx_code code, rtx label)
15129 switch (GET_MODE (ix86_compare_op0))
15138 tmp = ix86_expand_compare (code);
15139 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15140 gen_rtx_LABEL_REF (VOIDmode, label),
15142 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15149 /* Expand DImode branch into multiple compare+branch. */
15151 rtx lo[2], hi[2], label2;
15152 enum rtx_code code1, code2, code3;
15153 enum machine_mode submode;
15155 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
15157 tmp = ix86_compare_op0;
15158 ix86_compare_op0 = ix86_compare_op1;
15159 ix86_compare_op1 = tmp;
15160 code = swap_condition (code);
15162 if (GET_MODE (ix86_compare_op0) == DImode)
15164 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
15165 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
15170 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
15171 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
15175 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
15176 avoid two branches. This costs one extra insn, so disable when
15177 optimizing for size. */
15179 if ((code == EQ || code == NE)
15180 && (!optimize_insn_for_size_p ()
15181 || hi[1] == const0_rtx || lo[1] == const0_rtx))
15186 if (hi[1] != const0_rtx)
15187 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
15188 NULL_RTX, 0, OPTAB_WIDEN);
15191 if (lo[1] != const0_rtx)
15192 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
15193 NULL_RTX, 0, OPTAB_WIDEN);
15195 tmp = expand_binop (submode, ior_optab, xor1, xor0,
15196 NULL_RTX, 0, OPTAB_WIDEN);
15198 ix86_compare_op0 = tmp;
15199 ix86_compare_op1 = const0_rtx;
15200 ix86_expand_branch (code, label);
15204 /* Otherwise, if we are doing less-than or greater-or-equal-than,
15205 op1 is a constant and the low word is zero, then we can just
15206 examine the high word. Similarly for low word -1 and
15207 less-or-equal-than or greater-than. */
15209 if (CONST_INT_P (hi[1]))
15212 case LT: case LTU: case GE: case GEU:
15213 if (lo[1] == const0_rtx)
15215 ix86_compare_op0 = hi[0];
15216 ix86_compare_op1 = hi[1];
15217 ix86_expand_branch (code, label);
15221 case LE: case LEU: case GT: case GTU:
15222 if (lo[1] == constm1_rtx)
15224 ix86_compare_op0 = hi[0];
15225 ix86_compare_op1 = hi[1];
15226 ix86_expand_branch (code, label);
15234 /* Otherwise, we need two or three jumps. */
15236 label2 = gen_label_rtx ();
15239 code2 = swap_condition (code);
15240 code3 = unsigned_condition (code);
15244 case LT: case GT: case LTU: case GTU:
15247 case LE: code1 = LT; code2 = GT; break;
15248 case GE: code1 = GT; code2 = LT; break;
15249 case LEU: code1 = LTU; code2 = GTU; break;
15250 case GEU: code1 = GTU; code2 = LTU; break;
15252 case EQ: code1 = UNKNOWN; code2 = NE; break;
15253 case NE: code2 = UNKNOWN; break;
15256 gcc_unreachable ();
15261 * if (hi(a) < hi(b)) goto true;
15262 * if (hi(a) > hi(b)) goto false;
15263 * if (lo(a) < lo(b)) goto true;
15267 ix86_compare_op0 = hi[0];
15268 ix86_compare_op1 = hi[1];
15270 if (code1 != UNKNOWN)
15271 ix86_expand_branch (code1, label);
15272 if (code2 != UNKNOWN)
15273 ix86_expand_branch (code2, label2);
15275 ix86_compare_op0 = lo[0];
15276 ix86_compare_op1 = lo[1];
15277 ix86_expand_branch (code3, label);
15279 if (code2 != UNKNOWN)
15280 emit_label (label2);
15285 /* If we have already emitted a compare insn, go straight to simple.
15286 ix86_expand_compare won't emit anything if ix86_compare_emitted
15288 gcc_assert (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_CC);
15293 /* Split branch based on floating point condition. */
15295 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
15296 rtx target1, rtx target2, rtx tmp, rtx pushed)
15301 if (target2 != pc_rtx)
15304 code = reverse_condition_maybe_unordered (code);
15309 condition = ix86_expand_fp_compare (code, op1, op2,
15312 /* Remove pushed operand from stack. */
15314 ix86_free_from_memory (GET_MODE (pushed));
15316 i = emit_jump_insn (gen_rtx_SET
15318 gen_rtx_IF_THEN_ELSE (VOIDmode,
15319 condition, target1, target2)));
15320 if (split_branch_probability >= 0)
15321 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
15325 ix86_expand_setcc (enum rtx_code code, rtx dest)
15329 gcc_assert (GET_MODE (dest) == QImode);
15331 ret = ix86_expand_compare (code);
15332 PUT_MODE (ret, QImode);
15333 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
15336 /* Expand comparison setting or clearing carry flag. Return true when
15337 successful and set pop for the operation. */
15339 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
15341 enum machine_mode mode =
15342 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
15344 /* Do not handle DImode compares that go through special path. */
15345 if (mode == (TARGET_64BIT ? TImode : DImode))
15348 if (SCALAR_FLOAT_MODE_P (mode))
15350 rtx compare_op, compare_seq;
15352 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15354 /* Shortcut: following common codes never translate
15355 into carry flag compares. */
15356 if (code == EQ || code == NE || code == UNEQ || code == LTGT
15357 || code == ORDERED || code == UNORDERED)
15360 /* These comparisons require zero flag; swap operands so they won't. */
15361 if ((code == GT || code == UNLE || code == LE || code == UNGT)
15362 && !TARGET_IEEE_FP)
15367 code = swap_condition (code);
15370 /* Try to expand the comparison and verify that we end up with
15371 carry flag based comparison. This fails to be true only when
15372 we decide to expand comparison using arithmetic that is not
15373 too common scenario. */
15375 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15376 compare_seq = get_insns ();
15379 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
15380 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
15381 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
15383 code = GET_CODE (compare_op);
15385 if (code != LTU && code != GEU)
15388 emit_insn (compare_seq);
15393 if (!INTEGRAL_MODE_P (mode))
15402 /* Convert a==0 into (unsigned)a<1. */
15405 if (op1 != const0_rtx)
15408 code = (code == EQ ? LTU : GEU);
15411 /* Convert a>b into b<a or a>=b-1. */
15414 if (CONST_INT_P (op1))
15416 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
15417 /* Bail out on overflow. We still can swap operands but that
15418 would force loading of the constant into register. */
15419 if (op1 == const0_rtx
15420 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
15422 code = (code == GTU ? GEU : LTU);
15429 code = (code == GTU ? LTU : GEU);
15433 /* Convert a>=0 into (unsigned)a<0x80000000. */
15436 if (mode == DImode || op1 != const0_rtx)
15438 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15439 code = (code == LT ? GEU : LTU);
15443 if (mode == DImode || op1 != constm1_rtx)
15445 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
15446 code = (code == LE ? GEU : LTU);
15452 /* Swapping operands may cause constant to appear as first operand. */
15453 if (!nonimmediate_operand (op0, VOIDmode))
15455 if (!can_create_pseudo_p ())
15457 op0 = force_reg (mode, op0);
15459 ix86_compare_op0 = op0;
15460 ix86_compare_op1 = op1;
15461 *pop = ix86_expand_compare (code);
15462 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
15467 ix86_expand_int_movcc (rtx operands[])
15469 enum rtx_code code = GET_CODE (operands[1]), compare_code;
15470 rtx compare_seq, compare_op;
15471 enum machine_mode mode = GET_MODE (operands[0]);
15472 bool sign_bit_compare_p = false;
15475 ix86_compare_op0 = XEXP (operands[1], 0);
15476 ix86_compare_op1 = XEXP (operands[1], 1);
15477 compare_op = ix86_expand_compare (code);
15478 compare_seq = get_insns ();
15481 compare_code = GET_CODE (compare_op);
15483 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
15484 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
15485 sign_bit_compare_p = true;
15487 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
15488 HImode insns, we'd be swallowed in word prefix ops. */
15490 if ((mode != HImode || TARGET_FAST_PREFIX)
15491 && (mode != (TARGET_64BIT ? TImode : DImode))
15492 && CONST_INT_P (operands[2])
15493 && CONST_INT_P (operands[3]))
15495 rtx out = operands[0];
15496 HOST_WIDE_INT ct = INTVAL (operands[2]);
15497 HOST_WIDE_INT cf = INTVAL (operands[3]);
15498 HOST_WIDE_INT diff;
15501 /* Sign bit compares are better done using shifts than we do by using
15503 if (sign_bit_compare_p
15504 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
15505 ix86_compare_op1, &compare_op))
15507 /* Detect overlap between destination and compare sources. */
15510 if (!sign_bit_compare_p)
15513 bool fpcmp = false;
15515 compare_code = GET_CODE (compare_op);
15517 flags = XEXP (compare_op, 0);
15519 if (GET_MODE (flags) == CCFPmode
15520 || GET_MODE (flags) == CCFPUmode)
15524 = ix86_fp_compare_code_to_integer (compare_code);
15527 /* To simplify rest of code, restrict to the GEU case. */
15528 if (compare_code == LTU)
15530 HOST_WIDE_INT tmp = ct;
15533 compare_code = reverse_condition (compare_code);
15534 code = reverse_condition (code);
15539 PUT_CODE (compare_op,
15540 reverse_condition_maybe_unordered
15541 (GET_CODE (compare_op)));
15543 PUT_CODE (compare_op,
15544 reverse_condition (GET_CODE (compare_op)));
15548 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
15549 || reg_overlap_mentioned_p (out, ix86_compare_op1))
15550 tmp = gen_reg_rtx (mode);
15552 if (mode == DImode)
15553 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
15555 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
15556 flags, compare_op));
15560 if (code == GT || code == GE)
15561 code = reverse_condition (code);
15564 HOST_WIDE_INT tmp = ct;
15569 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
15570 ix86_compare_op1, VOIDmode, 0, -1);
15583 tmp = expand_simple_binop (mode, PLUS,
15585 copy_rtx (tmp), 1, OPTAB_DIRECT);
15596 tmp = expand_simple_binop (mode, IOR,
15598 copy_rtx (tmp), 1, OPTAB_DIRECT);
15600 else if (diff == -1 && ct)
15610 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15612 tmp = expand_simple_binop (mode, PLUS,
15613 copy_rtx (tmp), GEN_INT (cf),
15614 copy_rtx (tmp), 1, OPTAB_DIRECT);
15622 * andl cf - ct, dest
15632 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15635 tmp = expand_simple_binop (mode, AND,
15637 gen_int_mode (cf - ct, mode),
15638 copy_rtx (tmp), 1, OPTAB_DIRECT);
15640 tmp = expand_simple_binop (mode, PLUS,
15641 copy_rtx (tmp), GEN_INT (ct),
15642 copy_rtx (tmp), 1, OPTAB_DIRECT);
15645 if (!rtx_equal_p (tmp, out))
15646 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15648 return 1; /* DONE */
15653 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15656 tmp = ct, ct = cf, cf = tmp;
15659 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15661 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15663 /* We may be reversing unordered compare to normal compare, that
15664 is not valid in general (we may convert non-trapping condition
15665 to trapping one), however on i386 we currently emit all
15666 comparisons unordered. */
15667 compare_code = reverse_condition_maybe_unordered (compare_code);
15668 code = reverse_condition_maybe_unordered (code);
15672 compare_code = reverse_condition (compare_code);
15673 code = reverse_condition (code);
15677 compare_code = UNKNOWN;
15678 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15679 && CONST_INT_P (ix86_compare_op1))
15681 if (ix86_compare_op1 == const0_rtx
15682 && (code == LT || code == GE))
15683 compare_code = code;
15684 else if (ix86_compare_op1 == constm1_rtx)
15688 else if (code == GT)
15693 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15694 if (compare_code != UNKNOWN
15695 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15696 && (cf == -1 || ct == -1))
15698 /* If lea code below could be used, only optimize
15699 if it results in a 2 insn sequence. */
15701 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15702 || diff == 3 || diff == 5 || diff == 9)
15703 || (compare_code == LT && ct == -1)
15704 || (compare_code == GE && cf == -1))
15707 * notl op1 (if necessary)
15715 code = reverse_condition (code);
15718 out = emit_store_flag (out, code, ix86_compare_op0,
15719 ix86_compare_op1, VOIDmode, 0, -1);
15721 out = expand_simple_binop (mode, IOR,
15723 out, 1, OPTAB_DIRECT);
15724 if (out != operands[0])
15725 emit_move_insn (operands[0], out);
15727 return 1; /* DONE */
15732 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15733 || diff == 3 || diff == 5 || diff == 9)
15734 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15736 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15742 * lea cf(dest*(ct-cf)),dest
15746 * This also catches the degenerate setcc-only case.
15752 out = emit_store_flag (out, code, ix86_compare_op0,
15753 ix86_compare_op1, VOIDmode, 0, 1);
15756 /* On x86_64 the lea instruction operates on Pmode, so we need
15757 to get arithmetics done in proper mode to match. */
15759 tmp = copy_rtx (out);
15763 out1 = copy_rtx (out);
15764 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15768 tmp = gen_rtx_PLUS (mode, tmp, out1);
15774 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15777 if (!rtx_equal_p (tmp, out))
15780 out = force_operand (tmp, copy_rtx (out));
15782 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15784 if (!rtx_equal_p (out, operands[0]))
15785 emit_move_insn (operands[0], copy_rtx (out));
15787 return 1; /* DONE */
15791 * General case: Jumpful:
15792 * xorl dest,dest cmpl op1, op2
15793 * cmpl op1, op2 movl ct, dest
15794 * setcc dest jcc 1f
15795 * decl dest movl cf, dest
15796 * andl (cf-ct),dest 1:
15799 * Size 20. Size 14.
15801 * This is reasonably steep, but branch mispredict costs are
15802 * high on modern cpus, so consider failing only if optimizing
15806 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15807 && BRANCH_COST (optimize_insn_for_speed_p (),
15812 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15817 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15819 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15821 /* We may be reversing unordered compare to normal compare,
15822 that is not valid in general (we may convert non-trapping
15823 condition to trapping one), however on i386 we currently
15824 emit all comparisons unordered. */
15825 code = reverse_condition_maybe_unordered (code);
15829 code = reverse_condition (code);
15830 if (compare_code != UNKNOWN)
15831 compare_code = reverse_condition (compare_code);
15835 if (compare_code != UNKNOWN)
15837 /* notl op1 (if needed)
15842 For x < 0 (resp. x <= -1) there will be no notl,
15843 so if possible swap the constants to get rid of the
15845 True/false will be -1/0 while code below (store flag
15846 followed by decrement) is 0/-1, so the constants need
15847 to be exchanged once more. */
15849 if (compare_code == GE || !cf)
15851 code = reverse_condition (code);
15856 HOST_WIDE_INT tmp = cf;
15861 out = emit_store_flag (out, code, ix86_compare_op0,
15862 ix86_compare_op1, VOIDmode, 0, -1);
15866 out = emit_store_flag (out, code, ix86_compare_op0,
15867 ix86_compare_op1, VOIDmode, 0, 1);
15869 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15870 copy_rtx (out), 1, OPTAB_DIRECT);
15873 out = expand_simple_binop (mode, AND, copy_rtx (out),
15874 gen_int_mode (cf - ct, mode),
15875 copy_rtx (out), 1, OPTAB_DIRECT);
15877 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15878 copy_rtx (out), 1, OPTAB_DIRECT);
15879 if (!rtx_equal_p (out, operands[0]))
15880 emit_move_insn (operands[0], copy_rtx (out));
15882 return 1; /* DONE */
15886 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15888 /* Try a few things more with specific constants and a variable. */
15891 rtx var, orig_out, out, tmp;
15893 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15894 return 0; /* FAIL */
15896 /* If one of the two operands is an interesting constant, load a
15897 constant with the above and mask it in with a logical operation. */
15899 if (CONST_INT_P (operands[2]))
15902 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15903 operands[3] = constm1_rtx, op = and_optab;
15904 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15905 operands[3] = const0_rtx, op = ior_optab;
15907 return 0; /* FAIL */
15909 else if (CONST_INT_P (operands[3]))
15912 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15913 operands[2] = constm1_rtx, op = and_optab;
15914 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15915 operands[2] = const0_rtx, op = ior_optab;
15917 return 0; /* FAIL */
15920 return 0; /* FAIL */
15922 orig_out = operands[0];
15923 tmp = gen_reg_rtx (mode);
15926 /* Recurse to get the constant loaded. */
15927 if (ix86_expand_int_movcc (operands) == 0)
15928 return 0; /* FAIL */
15930 /* Mask in the interesting variable. */
15931 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15933 if (!rtx_equal_p (out, orig_out))
15934 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15936 return 1; /* DONE */
15940 * For comparison with above,
15950 if (! nonimmediate_operand (operands[2], mode))
15951 operands[2] = force_reg (mode, operands[2]);
15952 if (! nonimmediate_operand (operands[3], mode))
15953 operands[3] = force_reg (mode, operands[3]);
15955 if (! register_operand (operands[2], VOIDmode)
15957 || ! register_operand (operands[3], VOIDmode)))
15958 operands[2] = force_reg (mode, operands[2]);
15961 && ! register_operand (operands[3], VOIDmode))
15962 operands[3] = force_reg (mode, operands[3]);
15964 emit_insn (compare_seq);
15965 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15966 gen_rtx_IF_THEN_ELSE (mode,
15967 compare_op, operands[2],
15970 return 1; /* DONE */
15973 /* Swap, force into registers, or otherwise massage the two operands
15974 to an sse comparison with a mask result. Thus we differ a bit from
15975 ix86_prepare_fp_compare_args which expects to produce a flags result.
15977 The DEST operand exists to help determine whether to commute commutative
15978 operators. The POP0/POP1 operands are updated in place. The new
15979 comparison code is returned, or UNKNOWN if not implementable. */
15981 static enum rtx_code
15982 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15983 rtx *pop0, rtx *pop1)
15991 /* We have no LTGT as an operator. We could implement it with
15992 NE & ORDERED, but this requires an extra temporary. It's
15993 not clear that it's worth it. */
16000 /* These are supported directly. */
16007 /* For commutative operators, try to canonicalize the destination
16008 operand to be first in the comparison - this helps reload to
16009 avoid extra moves. */
16010 if (!dest || !rtx_equal_p (dest, *pop1))
16018 /* These are not supported directly. Swap the comparison operands
16019 to transform into something that is supported. */
16023 code = swap_condition (code);
16027 gcc_unreachable ();
16033 /* Detect conditional moves that exactly match min/max operational
16034 semantics. Note that this is IEEE safe, as long as we don't
16035 interchange the operands.
16037 Returns FALSE if this conditional move doesn't match a MIN/MAX,
16038 and TRUE if the operation is successful and instructions are emitted. */
16041 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
16042 rtx cmp_op1, rtx if_true, rtx if_false)
16044 enum machine_mode mode;
16050 else if (code == UNGE)
16053 if_true = if_false;
16059 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
16061 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
16066 mode = GET_MODE (dest);
16068 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
16069 but MODE may be a vector mode and thus not appropriate. */
16070 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
16072 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
16075 if_true = force_reg (mode, if_true);
16076 v = gen_rtvec (2, if_true, if_false);
16077 tmp = gen_rtx_UNSPEC (mode, v, u);
16081 code = is_min ? SMIN : SMAX;
16082 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
16085 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
16089 /* Expand an sse vector comparison. Return the register with the result. */
16092 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
16093 rtx op_true, rtx op_false)
16095 enum machine_mode mode = GET_MODE (dest);
16098 cmp_op0 = force_reg (mode, cmp_op0);
16099 if (!nonimmediate_operand (cmp_op1, mode))
16100 cmp_op1 = force_reg (mode, cmp_op1);
16103 || reg_overlap_mentioned_p (dest, op_true)
16104 || reg_overlap_mentioned_p (dest, op_false))
16105 dest = gen_reg_rtx (mode);
16107 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
16108 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16113 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
16114 operations. This is used for both scalar and vector conditional moves. */
16117 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
16119 enum machine_mode mode = GET_MODE (dest);
16122 if (op_false == CONST0_RTX (mode))
16124 op_true = force_reg (mode, op_true);
16125 x = gen_rtx_AND (mode, cmp, op_true);
16126 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16128 else if (op_true == CONST0_RTX (mode))
16130 op_false = force_reg (mode, op_false);
16131 x = gen_rtx_NOT (mode, cmp);
16132 x = gen_rtx_AND (mode, x, op_false);
16133 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16135 else if (TARGET_XOP)
16137 rtx pcmov = gen_rtx_SET (mode, dest,
16138 gen_rtx_IF_THEN_ELSE (mode, cmp,
16145 op_true = force_reg (mode, op_true);
16146 op_false = force_reg (mode, op_false);
16148 t2 = gen_reg_rtx (mode);
16150 t3 = gen_reg_rtx (mode);
16154 x = gen_rtx_AND (mode, op_true, cmp);
16155 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
16157 x = gen_rtx_NOT (mode, cmp);
16158 x = gen_rtx_AND (mode, x, op_false);
16159 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
16161 x = gen_rtx_IOR (mode, t3, t2);
16162 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16166 /* Expand a floating-point conditional move. Return true if successful. */
16169 ix86_expand_fp_movcc (rtx operands[])
16171 enum machine_mode mode = GET_MODE (operands[0]);
16172 enum rtx_code code = GET_CODE (operands[1]);
16173 rtx tmp, compare_op;
16175 ix86_compare_op0 = XEXP (operands[1], 0);
16176 ix86_compare_op1 = XEXP (operands[1], 1);
16177 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16179 enum machine_mode cmode;
16181 /* Since we've no cmove for sse registers, don't force bad register
16182 allocation just to gain access to it. Deny movcc when the
16183 comparison mode doesn't match the move mode. */
16184 cmode = GET_MODE (ix86_compare_op0);
16185 if (cmode == VOIDmode)
16186 cmode = GET_MODE (ix86_compare_op1);
16190 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16192 &ix86_compare_op1);
16193 if (code == UNKNOWN)
16196 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
16197 ix86_compare_op1, operands[2],
16201 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
16202 ix86_compare_op1, operands[2], operands[3]);
16203 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
16207 /* The floating point conditional move instructions don't directly
16208 support conditions resulting from a signed integer comparison. */
16210 compare_op = ix86_expand_compare (code);
16211 if (!fcmov_comparison_operator (compare_op, VOIDmode))
16213 tmp = gen_reg_rtx (QImode);
16214 ix86_expand_setcc (code, tmp);
16216 ix86_compare_op0 = tmp;
16217 ix86_compare_op1 = const0_rtx;
16218 compare_op = ix86_expand_compare (code);
16221 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16222 gen_rtx_IF_THEN_ELSE (mode, compare_op,
16223 operands[2], operands[3])));
16228 /* Expand a floating-point vector conditional move; a vcond operation
16229 rather than a movcc operation. */
16232 ix86_expand_fp_vcond (rtx operands[])
16234 enum rtx_code code = GET_CODE (operands[3]);
16237 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
16238 &operands[4], &operands[5]);
16239 if (code == UNKNOWN)
16242 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
16243 operands[5], operands[1], operands[2]))
16246 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
16247 operands[1], operands[2]);
16248 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
16252 /* Expand a signed/unsigned integral vector conditional move. */
16255 ix86_expand_int_vcond (rtx operands[])
16257 enum machine_mode mode = GET_MODE (operands[0]);
16258 enum rtx_code code = GET_CODE (operands[3]);
16259 bool negate = false;
16262 cop0 = operands[4];
16263 cop1 = operands[5];
16265 /* XOP supports all of the comparisons on all vector int types. */
16268 /* Canonicalize the comparison to EQ, GT, GTU. */
16279 code = reverse_condition (code);
16285 code = reverse_condition (code);
16291 code = swap_condition (code);
16292 x = cop0, cop0 = cop1, cop1 = x;
16296 gcc_unreachable ();
16299 /* Only SSE4.1/SSE4.2 supports V2DImode. */
16300 if (mode == V2DImode)
16305 /* SSE4.1 supports EQ. */
16306 if (!TARGET_SSE4_1)
16312 /* SSE4.2 supports GT/GTU. */
16313 if (!TARGET_SSE4_2)
16318 gcc_unreachable ();
16322 /* Unsigned parallel compare is not supported by the hardware.
16323 Play some tricks to turn this into a signed comparison
16327 cop0 = force_reg (mode, cop0);
16335 rtx (*gen_sub3) (rtx, rtx, rtx);
16337 /* Subtract (-(INT MAX) - 1) from both operands to make
16339 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
16341 gen_sub3 = (mode == V4SImode
16342 ? gen_subv4si3 : gen_subv2di3);
16343 t1 = gen_reg_rtx (mode);
16344 emit_insn (gen_sub3 (t1, cop0, mask));
16346 t2 = gen_reg_rtx (mode);
16347 emit_insn (gen_sub3 (t2, cop1, mask));
16357 /* Perform a parallel unsigned saturating subtraction. */
16358 x = gen_reg_rtx (mode);
16359 emit_insn (gen_rtx_SET (VOIDmode, x,
16360 gen_rtx_US_MINUS (mode, cop0, cop1)));
16363 cop1 = CONST0_RTX (mode);
16369 gcc_unreachable ();
16374 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
16375 operands[1+negate], operands[2-negate]);
16377 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
16378 operands[2-negate]);
16382 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
16383 true if we should do zero extension, else sign extension. HIGH_P is
16384 true if we want the N/2 high elements, else the low elements. */
16387 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16389 enum machine_mode imode = GET_MODE (operands[1]);
16390 rtx (*unpack)(rtx, rtx, rtx);
16397 unpack = gen_vec_interleave_highv16qi;
16399 unpack = gen_vec_interleave_lowv16qi;
16403 unpack = gen_vec_interleave_highv8hi;
16405 unpack = gen_vec_interleave_lowv8hi;
16409 unpack = gen_vec_interleave_highv4si;
16411 unpack = gen_vec_interleave_lowv4si;
16414 gcc_unreachable ();
16417 dest = gen_lowpart (imode, operands[0]);
16420 se = force_reg (imode, CONST0_RTX (imode));
16422 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
16423 operands[1], pc_rtx, pc_rtx);
16425 emit_insn (unpack (dest, operands[1], se));
16428 /* This function performs the same task as ix86_expand_sse_unpack,
16429 but with SSE4.1 instructions. */
16432 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
16434 enum machine_mode imode = GET_MODE (operands[1]);
16435 rtx (*unpack)(rtx, rtx);
16442 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
16444 unpack = gen_sse4_1_extendv8qiv8hi2;
16448 unpack = gen_sse4_1_zero_extendv4hiv4si2;
16450 unpack = gen_sse4_1_extendv4hiv4si2;
16454 unpack = gen_sse4_1_zero_extendv2siv2di2;
16456 unpack = gen_sse4_1_extendv2siv2di2;
16459 gcc_unreachable ();
16462 dest = operands[0];
16465 /* Shift higher 8 bytes to lower 8 bytes. */
16466 src = gen_reg_rtx (imode);
16467 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
16468 gen_lowpart (V1TImode, operands[1]),
16474 emit_insn (unpack (dest, src));
16477 /* Expand conditional increment or decrement using adb/sbb instructions.
16478 The default case using setcc followed by the conditional move can be
16479 done by generic code. */
16481 ix86_expand_int_addcc (rtx operands[])
16483 enum rtx_code code = GET_CODE (operands[1]);
16485 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
16487 rtx val = const0_rtx;
16488 bool fpcmp = false;
16489 enum machine_mode mode;
16491 ix86_compare_op0 = XEXP (operands[1], 0);
16492 ix86_compare_op1 = XEXP (operands[1], 1);
16493 if (operands[3] != const1_rtx
16494 && operands[3] != constm1_rtx)
16496 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16497 ix86_compare_op1, &compare_op))
16499 code = GET_CODE (compare_op);
16501 flags = XEXP (compare_op, 0);
16503 if (GET_MODE (flags) == CCFPmode
16504 || GET_MODE (flags) == CCFPUmode)
16507 code = ix86_fp_compare_code_to_integer (code);
16514 PUT_CODE (compare_op,
16515 reverse_condition_maybe_unordered
16516 (GET_CODE (compare_op)));
16518 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16521 mode = GET_MODE (operands[0]);
16523 /* Construct either adc or sbb insn. */
16524 if ((code == LTU) == (operands[3] == constm1_rtx))
16529 insn = gen_subqi3_carry;
16532 insn = gen_subhi3_carry;
16535 insn = gen_subsi3_carry;
16538 insn = gen_subdi3_carry;
16541 gcc_unreachable ();
16549 insn = gen_addqi3_carry;
16552 insn = gen_addhi3_carry;
16555 insn = gen_addsi3_carry;
16558 insn = gen_adddi3_carry;
16561 gcc_unreachable ();
16564 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
16566 return 1; /* DONE */
16570 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16571 works for floating pointer parameters and nonoffsetable memories.
16572 For pushes, it returns just stack offsets; the values will be saved
16573 in the right order. Maximally three parts are generated. */
16576 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16581 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16583 size = (GET_MODE_SIZE (mode) + 4) / 8;
16585 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16586 gcc_assert (size >= 2 && size <= 4);
16588 /* Optimize constant pool reference to immediates. This is used by fp
16589 moves, that force all constants to memory to allow combining. */
16590 if (MEM_P (operand) && MEM_READONLY_P (operand))
16592 rtx tmp = maybe_get_pool_constant (operand);
16597 if (MEM_P (operand) && !offsettable_memref_p (operand))
16599 /* The only non-offsetable memories we handle are pushes. */
16600 int ok = push_operand (operand, VOIDmode);
16604 operand = copy_rtx (operand);
16605 PUT_MODE (operand, Pmode);
16606 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16610 if (GET_CODE (operand) == CONST_VECTOR)
16612 enum machine_mode imode = int_mode_for_mode (mode);
16613 /* Caution: if we looked through a constant pool memory above,
16614 the operand may actually have a different mode now. That's
16615 ok, since we want to pun this all the way back to an integer. */
16616 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16617 gcc_assert (operand != NULL);
16623 if (mode == DImode)
16624 split_di (&operand, 1, &parts[0], &parts[1]);
16629 if (REG_P (operand))
16631 gcc_assert (reload_completed);
16632 for (i = 0; i < size; i++)
16633 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16635 else if (offsettable_memref_p (operand))
16637 operand = adjust_address (operand, SImode, 0);
16638 parts[0] = operand;
16639 for (i = 1; i < size; i++)
16640 parts[i] = adjust_address (operand, SImode, 4 * i);
16642 else if (GET_CODE (operand) == CONST_DOUBLE)
16647 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16651 real_to_target (l, &r, mode);
16652 parts[3] = gen_int_mode (l[3], SImode);
16653 parts[2] = gen_int_mode (l[2], SImode);
16656 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16657 parts[2] = gen_int_mode (l[2], SImode);
16660 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16663 gcc_unreachable ();
16665 parts[1] = gen_int_mode (l[1], SImode);
16666 parts[0] = gen_int_mode (l[0], SImode);
16669 gcc_unreachable ();
16674 if (mode == TImode)
16675 split_ti (&operand, 1, &parts[0], &parts[1]);
16676 if (mode == XFmode || mode == TFmode)
16678 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16679 if (REG_P (operand))
16681 gcc_assert (reload_completed);
16682 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16683 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16685 else if (offsettable_memref_p (operand))
16687 operand = adjust_address (operand, DImode, 0);
16688 parts[0] = operand;
16689 parts[1] = adjust_address (operand, upper_mode, 8);
16691 else if (GET_CODE (operand) == CONST_DOUBLE)
16696 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16697 real_to_target (l, &r, mode);
16699 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16700 if (HOST_BITS_PER_WIDE_INT >= 64)
16703 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16704 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16707 parts[0] = immed_double_const (l[0], l[1], DImode);
16709 if (upper_mode == SImode)
16710 parts[1] = gen_int_mode (l[2], SImode);
16711 else if (HOST_BITS_PER_WIDE_INT >= 64)
16714 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16715 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16718 parts[1] = immed_double_const (l[2], l[3], DImode);
16721 gcc_unreachable ();
16728 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16729 Return false when normal moves are needed; true when all required
16730 insns have been emitted. Operands 2-4 contain the input values
16731 int the correct order; operands 5-7 contain the output values. */
16734 ix86_split_long_move (rtx operands[])
16739 int collisions = 0;
16740 enum machine_mode mode = GET_MODE (operands[0]);
16741 bool collisionparts[4];
16743 /* The DFmode expanders may ask us to move double.
16744 For 64bit target this is single move. By hiding the fact
16745 here we simplify i386.md splitters. */
16746 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16748 /* Optimize constant pool reference to immediates. This is used by
16749 fp moves, that force all constants to memory to allow combining. */
16751 if (MEM_P (operands[1])
16752 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16753 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16754 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16755 if (push_operand (operands[0], VOIDmode))
16757 operands[0] = copy_rtx (operands[0]);
16758 PUT_MODE (operands[0], Pmode);
16761 operands[0] = gen_lowpart (DImode, operands[0]);
16762 operands[1] = gen_lowpart (DImode, operands[1]);
16763 emit_move_insn (operands[0], operands[1]);
16767 /* The only non-offsettable memory we handle is push. */
16768 if (push_operand (operands[0], VOIDmode))
16771 gcc_assert (!MEM_P (operands[0])
16772 || offsettable_memref_p (operands[0]));
16774 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16775 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16777 /* When emitting push, take care for source operands on the stack. */
16778 if (push && MEM_P (operands[1])
16779 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16781 rtx src_base = XEXP (part[1][nparts - 1], 0);
16783 /* Compensate for the stack decrement by 4. */
16784 if (!TARGET_64BIT && nparts == 3
16785 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
16786 src_base = plus_constant (src_base, 4);
16788 /* src_base refers to the stack pointer and is
16789 automatically decreased by emitted push. */
16790 for (i = 0; i < nparts; i++)
16791 part[1][i] = change_address (part[1][i],
16792 GET_MODE (part[1][i]), src_base);
16795 /* We need to do copy in the right order in case an address register
16796 of the source overlaps the destination. */
16797 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16801 for (i = 0; i < nparts; i++)
16804 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16805 if (collisionparts[i])
16809 /* Collision in the middle part can be handled by reordering. */
16810 if (collisions == 1 && nparts == 3 && collisionparts [1])
16812 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16813 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16815 else if (collisions == 1
16817 && (collisionparts [1] || collisionparts [2]))
16819 if (collisionparts [1])
16821 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16822 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16826 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16827 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16831 /* If there are more collisions, we can't handle it by reordering.
16832 Do an lea to the last part and use only one colliding move. */
16833 else if (collisions > 1)
16839 base = part[0][nparts - 1];
16841 /* Handle the case when the last part isn't valid for lea.
16842 Happens in 64-bit mode storing the 12-byte XFmode. */
16843 if (GET_MODE (base) != Pmode)
16844 base = gen_rtx_REG (Pmode, REGNO (base));
16846 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16847 part[1][0] = replace_equiv_address (part[1][0], base);
16848 for (i = 1; i < nparts; i++)
16850 tmp = plus_constant (base, UNITS_PER_WORD * i);
16851 part[1][i] = replace_equiv_address (part[1][i], tmp);
16862 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16863 emit_insn (gen_addsi3 (stack_pointer_rtx,
16864 stack_pointer_rtx, GEN_INT (-4)));
16865 emit_move_insn (part[0][2], part[1][2]);
16867 else if (nparts == 4)
16869 emit_move_insn (part[0][3], part[1][3]);
16870 emit_move_insn (part[0][2], part[1][2]);
16875 /* In 64bit mode we don't have 32bit push available. In case this is
16876 register, it is OK - we will just use larger counterpart. We also
16877 retype memory - these comes from attempt to avoid REX prefix on
16878 moving of second half of TFmode value. */
16879 if (GET_MODE (part[1][1]) == SImode)
16881 switch (GET_CODE (part[1][1]))
16884 part[1][1] = adjust_address (part[1][1], DImode, 0);
16888 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16892 gcc_unreachable ();
16895 if (GET_MODE (part[1][0]) == SImode)
16896 part[1][0] = part[1][1];
16899 emit_move_insn (part[0][1], part[1][1]);
16900 emit_move_insn (part[0][0], part[1][0]);
16904 /* Choose correct order to not overwrite the source before it is copied. */
16905 if ((REG_P (part[0][0])
16906 && REG_P (part[1][1])
16907 && (REGNO (part[0][0]) == REGNO (part[1][1])
16909 && REGNO (part[0][0]) == REGNO (part[1][2]))
16911 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16913 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16915 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16917 operands[2 + i] = part[0][j];
16918 operands[6 + i] = part[1][j];
16923 for (i = 0; i < nparts; i++)
16925 operands[2 + i] = part[0][i];
16926 operands[6 + i] = part[1][i];
16930 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16931 if (optimize_insn_for_size_p ())
16933 for (j = 0; j < nparts - 1; j++)
16934 if (CONST_INT_P (operands[6 + j])
16935 && operands[6 + j] != const0_rtx
16936 && REG_P (operands[2 + j]))
16937 for (i = j; i < nparts - 1; i++)
16938 if (CONST_INT_P (operands[7 + i])
16939 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16940 operands[7 + i] = operands[2 + j];
16943 for (i = 0; i < nparts; i++)
16944 emit_move_insn (operands[2 + i], operands[6 + i]);
16949 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16950 left shift by a constant, either using a single shift or
16951 a sequence of add instructions. */
16954 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16958 emit_insn ((mode == DImode
16960 : gen_adddi3) (operand, operand, operand));
16962 else if (!optimize_insn_for_size_p ()
16963 && count * ix86_cost->add <= ix86_cost->shift_const)
16966 for (i=0; i<count; i++)
16968 emit_insn ((mode == DImode
16970 : gen_adddi3) (operand, operand, operand));
16974 emit_insn ((mode == DImode
16976 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16980 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16982 rtx low[2], high[2];
16984 const int single_width = mode == DImode ? 32 : 64;
16986 if (CONST_INT_P (operands[2]))
16988 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16989 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16991 if (count >= single_width)
16993 emit_move_insn (high[0], low[1]);
16994 emit_move_insn (low[0], const0_rtx);
16996 if (count > single_width)
16997 ix86_expand_ashl_const (high[0], count - single_width, mode);
17001 if (!rtx_equal_p (operands[0], operands[1]))
17002 emit_move_insn (operands[0], operands[1]);
17003 emit_insn ((mode == DImode
17005 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
17006 ix86_expand_ashl_const (low[0], count, mode);
17011 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17013 if (operands[1] == const1_rtx)
17015 /* Assuming we've chosen a QImode capable registers, then 1 << N
17016 can be done with two 32/64-bit shifts, no branches, no cmoves. */
17017 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17019 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17021 ix86_expand_clear (low[0]);
17022 ix86_expand_clear (high[0]);
17023 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
17025 d = gen_lowpart (QImode, low[0]);
17026 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17027 s = gen_rtx_EQ (QImode, flags, const0_rtx);
17028 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17030 d = gen_lowpart (QImode, high[0]);
17031 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17032 s = gen_rtx_NE (QImode, flags, const0_rtx);
17033 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17036 /* Otherwise, we can get the same results by manually performing
17037 a bit extract operation on bit 5/6, and then performing the two
17038 shifts. The two methods of getting 0/1 into low/high are exactly
17039 the same size. Avoiding the shift in the bit extract case helps
17040 pentium4 a bit; no one else seems to care much either way. */
17045 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
17046 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
17048 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
17049 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
17051 emit_insn ((mode == DImode
17053 : gen_lshrdi3) (high[0], high[0],
17054 GEN_INT (mode == DImode ? 5 : 6)));
17055 emit_insn ((mode == DImode
17057 : gen_anddi3) (high[0], high[0], const1_rtx));
17058 emit_move_insn (low[0], high[0]);
17059 emit_insn ((mode == DImode
17061 : gen_xordi3) (low[0], low[0], const1_rtx));
17064 emit_insn ((mode == DImode
17066 : gen_ashldi3) (low[0], low[0], operands[2]));
17067 emit_insn ((mode == DImode
17069 : gen_ashldi3) (high[0], high[0], operands[2]));
17073 if (operands[1] == constm1_rtx)
17075 /* For -1 << N, we can avoid the shld instruction, because we
17076 know that we're shifting 0...31/63 ones into a -1. */
17077 emit_move_insn (low[0], constm1_rtx);
17078 if (optimize_insn_for_size_p ())
17079 emit_move_insn (high[0], low[0]);
17081 emit_move_insn (high[0], constm1_rtx);
17085 if (!rtx_equal_p (operands[0], operands[1]))
17086 emit_move_insn (operands[0], operands[1]);
17088 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17089 emit_insn ((mode == DImode
17091 : gen_x86_64_shld) (high[0], low[0], operands[2]));
17094 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
17096 if (TARGET_CMOVE && scratch)
17098 ix86_expand_clear (scratch);
17099 emit_insn ((mode == DImode
17100 ? gen_x86_shift_adj_1
17101 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
17105 emit_insn ((mode == DImode
17106 ? gen_x86_shift_adj_2
17107 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
17111 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17113 rtx low[2], high[2];
17115 const int single_width = mode == DImode ? 32 : 64;
17117 if (CONST_INT_P (operands[2]))
17119 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17120 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17122 if (count == single_width * 2 - 1)
17124 emit_move_insn (high[0], high[1]);
17125 emit_insn ((mode == DImode
17127 : gen_ashrdi3) (high[0], high[0],
17128 GEN_INT (single_width - 1)));
17129 emit_move_insn (low[0], high[0]);
17132 else if (count >= single_width)
17134 emit_move_insn (low[0], high[1]);
17135 emit_move_insn (high[0], low[0]);
17136 emit_insn ((mode == DImode
17138 : gen_ashrdi3) (high[0], high[0],
17139 GEN_INT (single_width - 1)));
17140 if (count > single_width)
17141 emit_insn ((mode == DImode
17143 : gen_ashrdi3) (low[0], low[0],
17144 GEN_INT (count - single_width)));
17148 if (!rtx_equal_p (operands[0], operands[1]))
17149 emit_move_insn (operands[0], operands[1]);
17150 emit_insn ((mode == DImode
17152 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17153 emit_insn ((mode == DImode
17155 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17160 if (!rtx_equal_p (operands[0], operands[1]))
17161 emit_move_insn (operands[0], operands[1]);
17163 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17165 emit_insn ((mode == DImode
17167 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17168 emit_insn ((mode == DImode
17170 : gen_ashrdi3) (high[0], high[0], operands[2]));
17172 if (TARGET_CMOVE && scratch)
17174 emit_move_insn (scratch, high[0]);
17175 emit_insn ((mode == DImode
17177 : gen_ashrdi3) (scratch, scratch,
17178 GEN_INT (single_width - 1)));
17179 emit_insn ((mode == DImode
17180 ? gen_x86_shift_adj_1
17181 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17185 emit_insn ((mode == DImode
17186 ? gen_x86_shift_adj_3
17187 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
17192 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17194 rtx low[2], high[2];
17196 const int single_width = mode == DImode ? 32 : 64;
17198 if (CONST_INT_P (operands[2]))
17200 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17201 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17203 if (count >= single_width)
17205 emit_move_insn (low[0], high[1]);
17206 ix86_expand_clear (high[0]);
17208 if (count > single_width)
17209 emit_insn ((mode == DImode
17211 : gen_lshrdi3) (low[0], low[0],
17212 GEN_INT (count - single_width)));
17216 if (!rtx_equal_p (operands[0], operands[1]))
17217 emit_move_insn (operands[0], operands[1]);
17218 emit_insn ((mode == DImode
17220 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17221 emit_insn ((mode == DImode
17223 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
17228 if (!rtx_equal_p (operands[0], operands[1]))
17229 emit_move_insn (operands[0], operands[1]);
17231 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17233 emit_insn ((mode == DImode
17235 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17236 emit_insn ((mode == DImode
17238 : gen_lshrdi3) (high[0], high[0], operands[2]));
17240 /* Heh. By reversing the arguments, we can reuse this pattern. */
17241 if (TARGET_CMOVE && scratch)
17243 ix86_expand_clear (scratch);
17244 emit_insn ((mode == DImode
17245 ? gen_x86_shift_adj_1
17246 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
17250 emit_insn ((mode == DImode
17251 ? gen_x86_shift_adj_2
17252 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
17256 /* Predict just emitted jump instruction to be taken with probability PROB. */
17258 predict_jump (int prob)
17260 rtx insn = get_last_insn ();
17261 gcc_assert (JUMP_P (insn));
17262 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
17265 /* Helper function for the string operations below. Dest VARIABLE whether
17266 it is aligned to VALUE bytes. If true, jump to the label. */
17268 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
17270 rtx label = gen_label_rtx ();
17271 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
17272 if (GET_MODE (variable) == DImode)
17273 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
17275 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
17276 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
17279 predict_jump (REG_BR_PROB_BASE * 50 / 100);
17281 predict_jump (REG_BR_PROB_BASE * 90 / 100);
17285 /* Adjust COUNTER by the VALUE. */
17287 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
17289 if (GET_MODE (countreg) == DImode)
17290 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
17292 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
17295 /* Zero extend possibly SImode EXP to Pmode register. */
17297 ix86_zero_extend_to_Pmode (rtx exp)
17300 if (GET_MODE (exp) == VOIDmode)
17301 return force_reg (Pmode, exp);
17302 if (GET_MODE (exp) == Pmode)
17303 return copy_to_mode_reg (Pmode, exp);
17304 r = gen_reg_rtx (Pmode);
17305 emit_insn (gen_zero_extendsidi2 (r, exp));
17309 /* Divide COUNTREG by SCALE. */
17311 scale_counter (rtx countreg, int scale)
17317 if (CONST_INT_P (countreg))
17318 return GEN_INT (INTVAL (countreg) / scale);
17319 gcc_assert (REG_P (countreg));
17321 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
17322 GEN_INT (exact_log2 (scale)),
17323 NULL, 1, OPTAB_DIRECT);
17327 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
17328 DImode for constant loop counts. */
17330 static enum machine_mode
17331 counter_mode (rtx count_exp)
17333 if (GET_MODE (count_exp) != VOIDmode)
17334 return GET_MODE (count_exp);
17335 if (!CONST_INT_P (count_exp))
17337 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
17342 /* When SRCPTR is non-NULL, output simple loop to move memory
17343 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
17344 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
17345 equivalent loop to set memory by VALUE (supposed to be in MODE).
17347 The size is rounded down to whole number of chunk size moved at once.
17348 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
17352 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
17353 rtx destptr, rtx srcptr, rtx value,
17354 rtx count, enum machine_mode mode, int unroll,
17357 rtx out_label, top_label, iter, tmp;
17358 enum machine_mode iter_mode = counter_mode (count);
17359 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
17360 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
17366 top_label = gen_label_rtx ();
17367 out_label = gen_label_rtx ();
17368 iter = gen_reg_rtx (iter_mode);
17370 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17371 NULL, 1, OPTAB_DIRECT);
17372 /* Those two should combine. */
17373 if (piece_size == const1_rtx)
17375 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17377 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17379 emit_move_insn (iter, const0_rtx);
17381 emit_label (top_label);
17383 tmp = convert_modes (Pmode, iter_mode, iter, true);
17384 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17385 destmem = change_address (destmem, mode, x_addr);
17389 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17390 srcmem = change_address (srcmem, mode, y_addr);
17392 /* When unrolling for chips that reorder memory reads and writes,
17393 we can save registers by using single temporary.
17394 Also using 4 temporaries is overkill in 32bit mode. */
17395 if (!TARGET_64BIT && 0)
17397 for (i = 0; i < unroll; i++)
17402 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17404 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17406 emit_move_insn (destmem, srcmem);
17412 gcc_assert (unroll <= 4);
17413 for (i = 0; i < unroll; i++)
17415 tmpreg[i] = gen_reg_rtx (mode);
17419 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17421 emit_move_insn (tmpreg[i], srcmem);
17423 for (i = 0; i < unroll; i++)
17428 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17430 emit_move_insn (destmem, tmpreg[i]);
17435 for (i = 0; i < unroll; i++)
17439 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17440 emit_move_insn (destmem, value);
17443 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17444 true, OPTAB_LIB_WIDEN);
17446 emit_move_insn (iter, tmp);
17448 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17450 if (expected_size != -1)
17452 expected_size /= GET_MODE_SIZE (mode) * unroll;
17453 if (expected_size == 0)
17455 else if (expected_size > REG_BR_PROB_BASE)
17456 predict_jump (REG_BR_PROB_BASE - 1);
17458 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17461 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17462 iter = ix86_zero_extend_to_Pmode (iter);
17463 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17464 true, OPTAB_LIB_WIDEN);
17465 if (tmp != destptr)
17466 emit_move_insn (destptr, tmp);
17469 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17470 true, OPTAB_LIB_WIDEN);
17472 emit_move_insn (srcptr, tmp);
17474 emit_label (out_label);
17477 /* Output "rep; mov" instruction.
17478 Arguments have same meaning as for previous function */
17480 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17481 rtx destptr, rtx srcptr,
17483 enum machine_mode mode)
17489 /* If the size is known, it is shorter to use rep movs. */
17490 if (mode == QImode && CONST_INT_P (count)
17491 && !(INTVAL (count) & 3))
17494 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17495 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17496 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17497 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17498 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17499 if (mode != QImode)
17501 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17502 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17503 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17504 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17505 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17506 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17510 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17511 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17513 if (CONST_INT_P (count))
17515 count = GEN_INT (INTVAL (count)
17516 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17517 destmem = shallow_copy_rtx (destmem);
17518 srcmem = shallow_copy_rtx (srcmem);
17519 set_mem_size (destmem, count);
17520 set_mem_size (srcmem, count);
17524 if (MEM_SIZE (destmem))
17525 set_mem_size (destmem, NULL_RTX);
17526 if (MEM_SIZE (srcmem))
17527 set_mem_size (srcmem, NULL_RTX);
17529 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17533 /* Output "rep; stos" instruction.
17534 Arguments have same meaning as for previous function */
17536 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17537 rtx count, enum machine_mode mode,
17543 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17544 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17545 value = force_reg (mode, gen_lowpart (mode, value));
17546 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17547 if (mode != QImode)
17549 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17550 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17551 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17554 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17555 if (orig_value == const0_rtx && CONST_INT_P (count))
17557 count = GEN_INT (INTVAL (count)
17558 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17559 destmem = shallow_copy_rtx (destmem);
17560 set_mem_size (destmem, count);
17562 else if (MEM_SIZE (destmem))
17563 set_mem_size (destmem, NULL_RTX);
17564 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17568 emit_strmov (rtx destmem, rtx srcmem,
17569 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17571 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17572 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17573 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17576 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17578 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17579 rtx destptr, rtx srcptr, rtx count, int max_size)
17582 if (CONST_INT_P (count))
17584 HOST_WIDE_INT countval = INTVAL (count);
17587 if ((countval & 0x10) && max_size > 16)
17591 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17592 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17595 gcc_unreachable ();
17598 if ((countval & 0x08) && max_size > 8)
17601 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17604 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17605 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17609 if ((countval & 0x04) && max_size > 4)
17611 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17614 if ((countval & 0x02) && max_size > 2)
17616 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17619 if ((countval & 0x01) && max_size > 1)
17621 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17628 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17629 count, 1, OPTAB_DIRECT);
17630 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17631 count, QImode, 1, 4);
17635 /* When there are stringops, we can cheaply increase dest and src pointers.
17636 Otherwise we save code size by maintaining offset (zero is readily
17637 available from preceding rep operation) and using x86 addressing modes.
17639 if (TARGET_SINGLE_STRINGOP)
17643 rtx label = ix86_expand_aligntest (count, 4, true);
17644 src = change_address (srcmem, SImode, srcptr);
17645 dest = change_address (destmem, SImode, destptr);
17646 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17647 emit_label (label);
17648 LABEL_NUSES (label) = 1;
17652 rtx label = ix86_expand_aligntest (count, 2, true);
17653 src = change_address (srcmem, HImode, srcptr);
17654 dest = change_address (destmem, HImode, destptr);
17655 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17656 emit_label (label);
17657 LABEL_NUSES (label) = 1;
17661 rtx label = ix86_expand_aligntest (count, 1, true);
17662 src = change_address (srcmem, QImode, srcptr);
17663 dest = change_address (destmem, QImode, destptr);
17664 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17665 emit_label (label);
17666 LABEL_NUSES (label) = 1;
17671 rtx offset = force_reg (Pmode, const0_rtx);
17676 rtx label = ix86_expand_aligntest (count, 4, true);
17677 src = change_address (srcmem, SImode, srcptr);
17678 dest = change_address (destmem, SImode, destptr);
17679 emit_move_insn (dest, src);
17680 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17681 true, OPTAB_LIB_WIDEN);
17683 emit_move_insn (offset, tmp);
17684 emit_label (label);
17685 LABEL_NUSES (label) = 1;
17689 rtx label = ix86_expand_aligntest (count, 2, true);
17690 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17691 src = change_address (srcmem, HImode, tmp);
17692 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17693 dest = change_address (destmem, HImode, tmp);
17694 emit_move_insn (dest, src);
17695 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17696 true, OPTAB_LIB_WIDEN);
17698 emit_move_insn (offset, tmp);
17699 emit_label (label);
17700 LABEL_NUSES (label) = 1;
17704 rtx label = ix86_expand_aligntest (count, 1, true);
17705 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17706 src = change_address (srcmem, QImode, tmp);
17707 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17708 dest = change_address (destmem, QImode, tmp);
17709 emit_move_insn (dest, src);
17710 emit_label (label);
17711 LABEL_NUSES (label) = 1;
17716 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17718 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17719 rtx count, int max_size)
17722 expand_simple_binop (counter_mode (count), AND, count,
17723 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17724 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17725 gen_lowpart (QImode, value), count, QImode,
17729 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17731 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17735 if (CONST_INT_P (count))
17737 HOST_WIDE_INT countval = INTVAL (count);
17740 if ((countval & 0x10) && max_size > 16)
17744 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17745 emit_insn (gen_strset (destptr, dest, value));
17746 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17747 emit_insn (gen_strset (destptr, dest, value));
17750 gcc_unreachable ();
17753 if ((countval & 0x08) && max_size > 8)
17757 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17758 emit_insn (gen_strset (destptr, dest, value));
17762 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17763 emit_insn (gen_strset (destptr, dest, value));
17764 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17765 emit_insn (gen_strset (destptr, dest, value));
17769 if ((countval & 0x04) && max_size > 4)
17771 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17772 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17775 if ((countval & 0x02) && max_size > 2)
17777 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17778 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17781 if ((countval & 0x01) && max_size > 1)
17783 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17784 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17791 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17796 rtx label = ix86_expand_aligntest (count, 16, true);
17799 dest = change_address (destmem, DImode, destptr);
17800 emit_insn (gen_strset (destptr, dest, value));
17801 emit_insn (gen_strset (destptr, dest, value));
17805 dest = change_address (destmem, SImode, destptr);
17806 emit_insn (gen_strset (destptr, dest, value));
17807 emit_insn (gen_strset (destptr, dest, value));
17808 emit_insn (gen_strset (destptr, dest, value));
17809 emit_insn (gen_strset (destptr, dest, value));
17811 emit_label (label);
17812 LABEL_NUSES (label) = 1;
17816 rtx label = ix86_expand_aligntest (count, 8, true);
17819 dest = change_address (destmem, DImode, destptr);
17820 emit_insn (gen_strset (destptr, dest, value));
17824 dest = change_address (destmem, SImode, destptr);
17825 emit_insn (gen_strset (destptr, dest, value));
17826 emit_insn (gen_strset (destptr, dest, value));
17828 emit_label (label);
17829 LABEL_NUSES (label) = 1;
17833 rtx label = ix86_expand_aligntest (count, 4, true);
17834 dest = change_address (destmem, SImode, destptr);
17835 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17836 emit_label (label);
17837 LABEL_NUSES (label) = 1;
17841 rtx label = ix86_expand_aligntest (count, 2, true);
17842 dest = change_address (destmem, HImode, destptr);
17843 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17844 emit_label (label);
17845 LABEL_NUSES (label) = 1;
17849 rtx label = ix86_expand_aligntest (count, 1, true);
17850 dest = change_address (destmem, QImode, destptr);
17851 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17852 emit_label (label);
17853 LABEL_NUSES (label) = 1;
17857 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17858 DESIRED_ALIGNMENT. */
17860 expand_movmem_prologue (rtx destmem, rtx srcmem,
17861 rtx destptr, rtx srcptr, rtx count,
17862 int align, int desired_alignment)
17864 if (align <= 1 && desired_alignment > 1)
17866 rtx label = ix86_expand_aligntest (destptr, 1, false);
17867 srcmem = change_address (srcmem, QImode, srcptr);
17868 destmem = change_address (destmem, QImode, destptr);
17869 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17870 ix86_adjust_counter (count, 1);
17871 emit_label (label);
17872 LABEL_NUSES (label) = 1;
17874 if (align <= 2 && desired_alignment > 2)
17876 rtx label = ix86_expand_aligntest (destptr, 2, false);
17877 srcmem = change_address (srcmem, HImode, srcptr);
17878 destmem = change_address (destmem, HImode, destptr);
17879 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17880 ix86_adjust_counter (count, 2);
17881 emit_label (label);
17882 LABEL_NUSES (label) = 1;
17884 if (align <= 4 && desired_alignment > 4)
17886 rtx label = ix86_expand_aligntest (destptr, 4, false);
17887 srcmem = change_address (srcmem, SImode, srcptr);
17888 destmem = change_address (destmem, SImode, destptr);
17889 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17890 ix86_adjust_counter (count, 4);
17891 emit_label (label);
17892 LABEL_NUSES (label) = 1;
17894 gcc_assert (desired_alignment <= 8);
17897 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17898 ALIGN_BYTES is how many bytes need to be copied. */
17900 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17901 int desired_align, int align_bytes)
17904 rtx src_size, dst_size;
17906 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17907 if (src_align_bytes >= 0)
17908 src_align_bytes = desired_align - src_align_bytes;
17909 src_size = MEM_SIZE (src);
17910 dst_size = MEM_SIZE (dst);
17911 if (align_bytes & 1)
17913 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17914 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17916 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17918 if (align_bytes & 2)
17920 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17921 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17922 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17923 set_mem_align (dst, 2 * BITS_PER_UNIT);
17924 if (src_align_bytes >= 0
17925 && (src_align_bytes & 1) == (align_bytes & 1)
17926 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17927 set_mem_align (src, 2 * BITS_PER_UNIT);
17929 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17931 if (align_bytes & 4)
17933 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17934 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17935 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17936 set_mem_align (dst, 4 * BITS_PER_UNIT);
17937 if (src_align_bytes >= 0)
17939 unsigned int src_align = 0;
17940 if ((src_align_bytes & 3) == (align_bytes & 3))
17942 else if ((src_align_bytes & 1) == (align_bytes & 1))
17944 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17945 set_mem_align (src, src_align * BITS_PER_UNIT);
17948 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17950 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17951 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17952 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17953 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17954 if (src_align_bytes >= 0)
17956 unsigned int src_align = 0;
17957 if ((src_align_bytes & 7) == (align_bytes & 7))
17959 else if ((src_align_bytes & 3) == (align_bytes & 3))
17961 else if ((src_align_bytes & 1) == (align_bytes & 1))
17963 if (src_align > (unsigned int) desired_align)
17964 src_align = desired_align;
17965 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17966 set_mem_align (src, src_align * BITS_PER_UNIT);
17969 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17971 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17976 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17977 DESIRED_ALIGNMENT. */
17979 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17980 int align, int desired_alignment)
17982 if (align <= 1 && desired_alignment > 1)
17984 rtx label = ix86_expand_aligntest (destptr, 1, false);
17985 destmem = change_address (destmem, QImode, destptr);
17986 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17987 ix86_adjust_counter (count, 1);
17988 emit_label (label);
17989 LABEL_NUSES (label) = 1;
17991 if (align <= 2 && desired_alignment > 2)
17993 rtx label = ix86_expand_aligntest (destptr, 2, false);
17994 destmem = change_address (destmem, HImode, destptr);
17995 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17996 ix86_adjust_counter (count, 2);
17997 emit_label (label);
17998 LABEL_NUSES (label) = 1;
18000 if (align <= 4 && desired_alignment > 4)
18002 rtx label = ix86_expand_aligntest (destptr, 4, false);
18003 destmem = change_address (destmem, SImode, destptr);
18004 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
18005 ix86_adjust_counter (count, 4);
18006 emit_label (label);
18007 LABEL_NUSES (label) = 1;
18009 gcc_assert (desired_alignment <= 8);
18012 /* Set enough from DST to align DST known to by aligned by ALIGN to
18013 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
18015 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
18016 int desired_align, int align_bytes)
18019 rtx dst_size = MEM_SIZE (dst);
18020 if (align_bytes & 1)
18022 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18024 emit_insn (gen_strset (destreg, dst,
18025 gen_lowpart (QImode, value)));
18027 if (align_bytes & 2)
18029 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18030 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18031 set_mem_align (dst, 2 * BITS_PER_UNIT);
18033 emit_insn (gen_strset (destreg, dst,
18034 gen_lowpart (HImode, value)));
18036 if (align_bytes & 4)
18038 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18039 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18040 set_mem_align (dst, 4 * BITS_PER_UNIT);
18042 emit_insn (gen_strset (destreg, dst,
18043 gen_lowpart (SImode, value)));
18045 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18046 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18047 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18049 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18053 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
18054 static enum stringop_alg
18055 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
18056 int *dynamic_check)
18058 const struct stringop_algs * algs;
18059 bool optimize_for_speed;
18060 /* Algorithms using the rep prefix want at least edi and ecx;
18061 additionally, memset wants eax and memcpy wants esi. Don't
18062 consider such algorithms if the user has appropriated those
18063 registers for their own purposes. */
18064 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
18066 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
18068 #define ALG_USABLE_P(alg) (rep_prefix_usable \
18069 || (alg != rep_prefix_1_byte \
18070 && alg != rep_prefix_4_byte \
18071 && alg != rep_prefix_8_byte))
18072 const struct processor_costs *cost;
18074 /* Even if the string operation call is cold, we still might spend a lot
18075 of time processing large blocks. */
18076 if (optimize_function_for_size_p (cfun)
18077 || (optimize_insn_for_size_p ()
18078 && expected_size != -1 && expected_size < 256))
18079 optimize_for_speed = false;
18081 optimize_for_speed = true;
18083 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
18085 *dynamic_check = -1;
18087 algs = &cost->memset[TARGET_64BIT != 0];
18089 algs = &cost->memcpy[TARGET_64BIT != 0];
18090 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18091 return stringop_alg;
18092 /* rep; movq or rep; movl is the smallest variant. */
18093 else if (!optimize_for_speed)
18095 if (!count || (count & 3))
18096 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18098 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18100 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18102 else if (expected_size != -1 && expected_size < 4)
18103 return loop_1_byte;
18104 else if (expected_size != -1)
18107 enum stringop_alg alg = libcall;
18108 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18110 /* We get here if the algorithms that were not libcall-based
18111 were rep-prefix based and we are unable to use rep prefixes
18112 based on global register usage. Break out of the loop and
18113 use the heuristic below. */
18114 if (algs->size[i].max == 0)
18116 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18118 enum stringop_alg candidate = algs->size[i].alg;
18120 if (candidate != libcall && ALG_USABLE_P (candidate))
18122 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18123 last non-libcall inline algorithm. */
18124 if (TARGET_INLINE_ALL_STRINGOPS)
18126 /* When the current size is best to be copied by a libcall,
18127 but we are still forced to inline, run the heuristic below
18128 that will pick code for medium sized blocks. */
18129 if (alg != libcall)
18133 else if (ALG_USABLE_P (candidate))
18137 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18139 /* When asked to inline the call anyway, try to pick meaningful choice.
18140 We look for maximal size of block that is faster to copy by hand and
18141 take blocks of at most of that size guessing that average size will
18142 be roughly half of the block.
18144 If this turns out to be bad, we might simply specify the preferred
18145 choice in ix86_costs. */
18146 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18147 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18150 enum stringop_alg alg;
18152 bool any_alg_usable_p = true;
18154 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18156 enum stringop_alg candidate = algs->size[i].alg;
18157 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18159 if (candidate != libcall && candidate
18160 && ALG_USABLE_P (candidate))
18161 max = algs->size[i].max;
18163 /* If there aren't any usable algorithms, then recursing on
18164 smaller sizes isn't going to find anything. Just return the
18165 simple byte-at-a-time copy loop. */
18166 if (!any_alg_usable_p)
18168 /* Pick something reasonable. */
18169 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18170 *dynamic_check = 128;
18171 return loop_1_byte;
18175 alg = decide_alg (count, max / 2, memset, dynamic_check);
18176 gcc_assert (*dynamic_check == -1);
18177 gcc_assert (alg != libcall);
18178 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18179 *dynamic_check = max;
18182 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18183 #undef ALG_USABLE_P
18186 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18187 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18189 decide_alignment (int align,
18190 enum stringop_alg alg,
18193 int desired_align = 0;
18197 gcc_unreachable ();
18199 case unrolled_loop:
18200 desired_align = GET_MODE_SIZE (Pmode);
18202 case rep_prefix_8_byte:
18205 case rep_prefix_4_byte:
18206 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18207 copying whole cacheline at once. */
18208 if (TARGET_PENTIUMPRO)
18213 case rep_prefix_1_byte:
18214 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18215 copying whole cacheline at once. */
18216 if (TARGET_PENTIUMPRO)
18230 if (desired_align < align)
18231 desired_align = align;
18232 if (expected_size != -1 && expected_size < 4)
18233 desired_align = align;
18234 return desired_align;
18237 /* Return the smallest power of 2 greater than VAL. */
18239 smallest_pow2_greater_than (int val)
18247 /* Expand string move (memcpy) operation. Use i386 string operations when
18248 profitable. expand_setmem contains similar code. The code depends upon
18249 architecture, block size and alignment, but always has the same
18252 1) Prologue guard: Conditional that jumps up to epilogues for small
18253 blocks that can be handled by epilogue alone. This is faster but
18254 also needed for correctness, since prologue assume the block is larger
18255 than the desired alignment.
18257 Optional dynamic check for size and libcall for large
18258 blocks is emitted here too, with -minline-stringops-dynamically.
18260 2) Prologue: copy first few bytes in order to get destination aligned
18261 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
18262 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
18263 We emit either a jump tree on power of two sized blocks, or a byte loop.
18265 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
18266 with specified algorithm.
18268 4) Epilogue: code copying tail of the block that is too small to be
18269 handled by main body (or up to size guarded by prologue guard). */
18272 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
18273 rtx expected_align_exp, rtx expected_size_exp)
18279 rtx jump_around_label = NULL;
18280 HOST_WIDE_INT align = 1;
18281 unsigned HOST_WIDE_INT count = 0;
18282 HOST_WIDE_INT expected_size = -1;
18283 int size_needed = 0, epilogue_size_needed;
18284 int desired_align = 0, align_bytes = 0;
18285 enum stringop_alg alg;
18287 bool need_zero_guard = false;
18289 if (CONST_INT_P (align_exp))
18290 align = INTVAL (align_exp);
18291 /* i386 can do misaligned access on reasonably increased cost. */
18292 if (CONST_INT_P (expected_align_exp)
18293 && INTVAL (expected_align_exp) > align)
18294 align = INTVAL (expected_align_exp);
18295 /* ALIGN is the minimum of destination and source alignment, but we care here
18296 just about destination alignment. */
18297 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
18298 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
18300 if (CONST_INT_P (count_exp))
18301 count = expected_size = INTVAL (count_exp);
18302 if (CONST_INT_P (expected_size_exp) && count == 0)
18303 expected_size = INTVAL (expected_size_exp);
18305 /* Make sure we don't need to care about overflow later on. */
18306 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18309 /* Step 0: Decide on preferred algorithm, desired alignment and
18310 size of chunks to be copied by main loop. */
18312 alg = decide_alg (count, expected_size, false, &dynamic_check);
18313 desired_align = decide_alignment (align, alg, expected_size);
18315 if (!TARGET_ALIGN_STRINGOPS)
18316 align = desired_align;
18318 if (alg == libcall)
18320 gcc_assert (alg != no_stringop);
18322 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
18323 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18324 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
18329 gcc_unreachable ();
18331 need_zero_guard = true;
18332 size_needed = GET_MODE_SIZE (Pmode);
18334 case unrolled_loop:
18335 need_zero_guard = true;
18336 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
18338 case rep_prefix_8_byte:
18341 case rep_prefix_4_byte:
18344 case rep_prefix_1_byte:
18348 need_zero_guard = true;
18353 epilogue_size_needed = size_needed;
18355 /* Step 1: Prologue guard. */
18357 /* Alignment code needs count to be in register. */
18358 if (CONST_INT_P (count_exp) && desired_align > align)
18360 if (INTVAL (count_exp) > desired_align
18361 && INTVAL (count_exp) > size_needed)
18364 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18365 if (align_bytes <= 0)
18368 align_bytes = desired_align - align_bytes;
18370 if (align_bytes == 0)
18371 count_exp = force_reg (counter_mode (count_exp), count_exp);
18373 gcc_assert (desired_align >= 1 && align >= 1);
18375 /* Ensure that alignment prologue won't copy past end of block. */
18376 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18378 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18379 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18380 Make sure it is power of 2. */
18381 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18385 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18387 /* If main algorithm works on QImode, no epilogue is needed.
18388 For small sizes just don't align anything. */
18389 if (size_needed == 1)
18390 desired_align = align;
18397 label = gen_label_rtx ();
18398 emit_cmp_and_jump_insns (count_exp,
18399 GEN_INT (epilogue_size_needed),
18400 LTU, 0, counter_mode (count_exp), 1, label);
18401 if (expected_size == -1 || expected_size < epilogue_size_needed)
18402 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18404 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18408 /* Emit code to decide on runtime whether library call or inline should be
18410 if (dynamic_check != -1)
18412 if (CONST_INT_P (count_exp))
18414 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18416 emit_block_move_via_libcall (dst, src, count_exp, false);
18417 count_exp = const0_rtx;
18423 rtx hot_label = gen_label_rtx ();
18424 jump_around_label = gen_label_rtx ();
18425 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18426 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18427 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18428 emit_block_move_via_libcall (dst, src, count_exp, false);
18429 emit_jump (jump_around_label);
18430 emit_label (hot_label);
18434 /* Step 2: Alignment prologue. */
18436 if (desired_align > align)
18438 if (align_bytes == 0)
18440 /* Except for the first move in epilogue, we no longer know
18441 constant offset in aliasing info. It don't seems to worth
18442 the pain to maintain it for the first move, so throw away
18444 src = change_address (src, BLKmode, srcreg);
18445 dst = change_address (dst, BLKmode, destreg);
18446 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18451 /* If we know how many bytes need to be stored before dst is
18452 sufficiently aligned, maintain aliasing info accurately. */
18453 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18454 desired_align, align_bytes);
18455 count_exp = plus_constant (count_exp, -align_bytes);
18456 count -= align_bytes;
18458 if (need_zero_guard
18459 && (count < (unsigned HOST_WIDE_INT) size_needed
18460 || (align_bytes == 0
18461 && count < ((unsigned HOST_WIDE_INT) size_needed
18462 + desired_align - align))))
18464 /* It is possible that we copied enough so the main loop will not
18466 gcc_assert (size_needed > 1);
18467 if (label == NULL_RTX)
18468 label = gen_label_rtx ();
18469 emit_cmp_and_jump_insns (count_exp,
18470 GEN_INT (size_needed),
18471 LTU, 0, counter_mode (count_exp), 1, label);
18472 if (expected_size == -1
18473 || expected_size < (desired_align - align) / 2 + size_needed)
18474 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18476 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18479 if (label && size_needed == 1)
18481 emit_label (label);
18482 LABEL_NUSES (label) = 1;
18484 epilogue_size_needed = 1;
18486 else if (label == NULL_RTX)
18487 epilogue_size_needed = size_needed;
18489 /* Step 3: Main loop. */
18495 gcc_unreachable ();
18497 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18498 count_exp, QImode, 1, expected_size);
18501 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18502 count_exp, Pmode, 1, expected_size);
18504 case unrolled_loop:
18505 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18506 registers for 4 temporaries anyway. */
18507 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18508 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18511 case rep_prefix_8_byte:
18512 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18515 case rep_prefix_4_byte:
18516 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18519 case rep_prefix_1_byte:
18520 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18524 /* Adjust properly the offset of src and dest memory for aliasing. */
18525 if (CONST_INT_P (count_exp))
18527 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18528 (count / size_needed) * size_needed);
18529 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18530 (count / size_needed) * size_needed);
18534 src = change_address (src, BLKmode, srcreg);
18535 dst = change_address (dst, BLKmode, destreg);
18538 /* Step 4: Epilogue to copy the remaining bytes. */
18542 /* When the main loop is done, COUNT_EXP might hold original count,
18543 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18544 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18545 bytes. Compensate if needed. */
18547 if (size_needed < epilogue_size_needed)
18550 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18551 GEN_INT (size_needed - 1), count_exp, 1,
18553 if (tmp != count_exp)
18554 emit_move_insn (count_exp, tmp);
18556 emit_label (label);
18557 LABEL_NUSES (label) = 1;
18560 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18561 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18562 epilogue_size_needed);
18563 if (jump_around_label)
18564 emit_label (jump_around_label);
18568 /* Helper function for memcpy. For QImode value 0xXY produce
18569 0xXYXYXYXY of wide specified by MODE. This is essentially
18570 a * 0x10101010, but we can do slightly better than
18571 synth_mult by unwinding the sequence by hand on CPUs with
18574 promote_duplicated_reg (enum machine_mode mode, rtx val)
18576 enum machine_mode valmode = GET_MODE (val);
18578 int nops = mode == DImode ? 3 : 2;
18580 gcc_assert (mode == SImode || mode == DImode);
18581 if (val == const0_rtx)
18582 return copy_to_mode_reg (mode, const0_rtx);
18583 if (CONST_INT_P (val))
18585 HOST_WIDE_INT v = INTVAL (val) & 255;
18589 if (mode == DImode)
18590 v |= (v << 16) << 16;
18591 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18594 if (valmode == VOIDmode)
18596 if (valmode != QImode)
18597 val = gen_lowpart (QImode, val);
18598 if (mode == QImode)
18600 if (!TARGET_PARTIAL_REG_STALL)
18602 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18603 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18604 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18605 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18607 rtx reg = convert_modes (mode, QImode, val, true);
18608 tmp = promote_duplicated_reg (mode, const1_rtx);
18609 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18614 rtx reg = convert_modes (mode, QImode, val, true);
18616 if (!TARGET_PARTIAL_REG_STALL)
18617 if (mode == SImode)
18618 emit_insn (gen_movsi_insv_1 (reg, reg));
18620 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18623 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18624 NULL, 1, OPTAB_DIRECT);
18626 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18628 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18629 NULL, 1, OPTAB_DIRECT);
18630 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18631 if (mode == SImode)
18633 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18634 NULL, 1, OPTAB_DIRECT);
18635 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18640 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18641 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18642 alignment from ALIGN to DESIRED_ALIGN. */
18644 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18649 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18650 promoted_val = promote_duplicated_reg (DImode, val);
18651 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18652 promoted_val = promote_duplicated_reg (SImode, val);
18653 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18654 promoted_val = promote_duplicated_reg (HImode, val);
18656 promoted_val = val;
18658 return promoted_val;
18661 /* Expand string clear operation (bzero). Use i386 string operations when
18662 profitable. See expand_movmem comment for explanation of individual
18663 steps performed. */
18665 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18666 rtx expected_align_exp, rtx expected_size_exp)
18671 rtx jump_around_label = NULL;
18672 HOST_WIDE_INT align = 1;
18673 unsigned HOST_WIDE_INT count = 0;
18674 HOST_WIDE_INT expected_size = -1;
18675 int size_needed = 0, epilogue_size_needed;
18676 int desired_align = 0, align_bytes = 0;
18677 enum stringop_alg alg;
18678 rtx promoted_val = NULL;
18679 bool force_loopy_epilogue = false;
18681 bool need_zero_guard = false;
18683 if (CONST_INT_P (align_exp))
18684 align = INTVAL (align_exp);
18685 /* i386 can do misaligned access on reasonably increased cost. */
18686 if (CONST_INT_P (expected_align_exp)
18687 && INTVAL (expected_align_exp) > align)
18688 align = INTVAL (expected_align_exp);
18689 if (CONST_INT_P (count_exp))
18690 count = expected_size = INTVAL (count_exp);
18691 if (CONST_INT_P (expected_size_exp) && count == 0)
18692 expected_size = INTVAL (expected_size_exp);
18694 /* Make sure we don't need to care about overflow later on. */
18695 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18698 /* Step 0: Decide on preferred algorithm, desired alignment and
18699 size of chunks to be copied by main loop. */
18701 alg = decide_alg (count, expected_size, true, &dynamic_check);
18702 desired_align = decide_alignment (align, alg, expected_size);
18704 if (!TARGET_ALIGN_STRINGOPS)
18705 align = desired_align;
18707 if (alg == libcall)
18709 gcc_assert (alg != no_stringop);
18711 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18712 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18717 gcc_unreachable ();
18719 need_zero_guard = true;
18720 size_needed = GET_MODE_SIZE (Pmode);
18722 case unrolled_loop:
18723 need_zero_guard = true;
18724 size_needed = GET_MODE_SIZE (Pmode) * 4;
18726 case rep_prefix_8_byte:
18729 case rep_prefix_4_byte:
18732 case rep_prefix_1_byte:
18736 need_zero_guard = true;
18740 epilogue_size_needed = size_needed;
18742 /* Step 1: Prologue guard. */
18744 /* Alignment code needs count to be in register. */
18745 if (CONST_INT_P (count_exp) && desired_align > align)
18747 if (INTVAL (count_exp) > desired_align
18748 && INTVAL (count_exp) > size_needed)
18751 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18752 if (align_bytes <= 0)
18755 align_bytes = desired_align - align_bytes;
18757 if (align_bytes == 0)
18759 enum machine_mode mode = SImode;
18760 if (TARGET_64BIT && (count & ~0xffffffff))
18762 count_exp = force_reg (mode, count_exp);
18765 /* Do the cheap promotion to allow better CSE across the
18766 main loop and epilogue (ie one load of the big constant in the
18767 front of all code. */
18768 if (CONST_INT_P (val_exp))
18769 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18770 desired_align, align);
18771 /* Ensure that alignment prologue won't copy past end of block. */
18772 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18774 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18775 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18776 Make sure it is power of 2. */
18777 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18779 /* To improve performance of small blocks, we jump around the VAL
18780 promoting mode. This mean that if the promoted VAL is not constant,
18781 we might not use it in the epilogue and have to use byte
18783 if (epilogue_size_needed > 2 && !promoted_val)
18784 force_loopy_epilogue = true;
18787 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18789 /* If main algorithm works on QImode, no epilogue is needed.
18790 For small sizes just don't align anything. */
18791 if (size_needed == 1)
18792 desired_align = align;
18799 label = gen_label_rtx ();
18800 emit_cmp_and_jump_insns (count_exp,
18801 GEN_INT (epilogue_size_needed),
18802 LTU, 0, counter_mode (count_exp), 1, label);
18803 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18804 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18806 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18809 if (dynamic_check != -1)
18811 rtx hot_label = gen_label_rtx ();
18812 jump_around_label = gen_label_rtx ();
18813 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18814 LEU, 0, counter_mode (count_exp), 1, hot_label);
18815 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18816 set_storage_via_libcall (dst, count_exp, val_exp, false);
18817 emit_jump (jump_around_label);
18818 emit_label (hot_label);
18821 /* Step 2: Alignment prologue. */
18823 /* Do the expensive promotion once we branched off the small blocks. */
18825 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18826 desired_align, align);
18827 gcc_assert (desired_align >= 1 && align >= 1);
18829 if (desired_align > align)
18831 if (align_bytes == 0)
18833 /* Except for the first move in epilogue, we no longer know
18834 constant offset in aliasing info. It don't seems to worth
18835 the pain to maintain it for the first move, so throw away
18837 dst = change_address (dst, BLKmode, destreg);
18838 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18843 /* If we know how many bytes need to be stored before dst is
18844 sufficiently aligned, maintain aliasing info accurately. */
18845 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18846 desired_align, align_bytes);
18847 count_exp = plus_constant (count_exp, -align_bytes);
18848 count -= align_bytes;
18850 if (need_zero_guard
18851 && (count < (unsigned HOST_WIDE_INT) size_needed
18852 || (align_bytes == 0
18853 && count < ((unsigned HOST_WIDE_INT) size_needed
18854 + desired_align - align))))
18856 /* It is possible that we copied enough so the main loop will not
18858 gcc_assert (size_needed > 1);
18859 if (label == NULL_RTX)
18860 label = gen_label_rtx ();
18861 emit_cmp_and_jump_insns (count_exp,
18862 GEN_INT (size_needed),
18863 LTU, 0, counter_mode (count_exp), 1, label);
18864 if (expected_size == -1
18865 || expected_size < (desired_align - align) / 2 + size_needed)
18866 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18868 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18871 if (label && size_needed == 1)
18873 emit_label (label);
18874 LABEL_NUSES (label) = 1;
18876 promoted_val = val_exp;
18877 epilogue_size_needed = 1;
18879 else if (label == NULL_RTX)
18880 epilogue_size_needed = size_needed;
18882 /* Step 3: Main loop. */
18888 gcc_unreachable ();
18890 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18891 count_exp, QImode, 1, expected_size);
18894 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18895 count_exp, Pmode, 1, expected_size);
18897 case unrolled_loop:
18898 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18899 count_exp, Pmode, 4, expected_size);
18901 case rep_prefix_8_byte:
18902 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18905 case rep_prefix_4_byte:
18906 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18909 case rep_prefix_1_byte:
18910 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18914 /* Adjust properly the offset of src and dest memory for aliasing. */
18915 if (CONST_INT_P (count_exp))
18916 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18917 (count / size_needed) * size_needed);
18919 dst = change_address (dst, BLKmode, destreg);
18921 /* Step 4: Epilogue to copy the remaining bytes. */
18925 /* When the main loop is done, COUNT_EXP might hold original count,
18926 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18927 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18928 bytes. Compensate if needed. */
18930 if (size_needed < epilogue_size_needed)
18933 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18934 GEN_INT (size_needed - 1), count_exp, 1,
18936 if (tmp != count_exp)
18937 emit_move_insn (count_exp, tmp);
18939 emit_label (label);
18940 LABEL_NUSES (label) = 1;
18943 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18945 if (force_loopy_epilogue)
18946 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18947 epilogue_size_needed);
18949 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18950 epilogue_size_needed);
18952 if (jump_around_label)
18953 emit_label (jump_around_label);
18957 /* Expand the appropriate insns for doing strlen if not just doing
18960 out = result, initialized with the start address
18961 align_rtx = alignment of the address.
18962 scratch = scratch register, initialized with the startaddress when
18963 not aligned, otherwise undefined
18965 This is just the body. It needs the initializations mentioned above and
18966 some address computing at the end. These things are done in i386.md. */
18969 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18973 rtx align_2_label = NULL_RTX;
18974 rtx align_3_label = NULL_RTX;
18975 rtx align_4_label = gen_label_rtx ();
18976 rtx end_0_label = gen_label_rtx ();
18978 rtx tmpreg = gen_reg_rtx (SImode);
18979 rtx scratch = gen_reg_rtx (SImode);
18983 if (CONST_INT_P (align_rtx))
18984 align = INTVAL (align_rtx);
18986 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18988 /* Is there a known alignment and is it less than 4? */
18991 rtx scratch1 = gen_reg_rtx (Pmode);
18992 emit_move_insn (scratch1, out);
18993 /* Is there a known alignment and is it not 2? */
18996 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18997 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18999 /* Leave just the 3 lower bits. */
19000 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
19001 NULL_RTX, 0, OPTAB_WIDEN);
19003 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19004 Pmode, 1, align_4_label);
19005 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
19006 Pmode, 1, align_2_label);
19007 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
19008 Pmode, 1, align_3_label);
19012 /* Since the alignment is 2, we have to check 2 or 0 bytes;
19013 check if is aligned to 4 - byte. */
19015 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
19016 NULL_RTX, 0, OPTAB_WIDEN);
19018 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19019 Pmode, 1, align_4_label);
19022 mem = change_address (src, QImode, out);
19024 /* Now compare the bytes. */
19026 /* Compare the first n unaligned byte on a byte per byte basis. */
19027 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
19028 QImode, 1, end_0_label);
19030 /* Increment the address. */
19031 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19033 /* Not needed with an alignment of 2 */
19036 emit_label (align_2_label);
19038 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19041 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19043 emit_label (align_3_label);
19046 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19049 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
19052 /* Generate loop to check 4 bytes at a time. It is not a good idea to
19053 align this loop. It gives only huge programs, but does not help to
19055 emit_label (align_4_label);
19057 mem = change_address (src, SImode, out);
19058 emit_move_insn (scratch, mem);
19059 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
19061 /* This formula yields a nonzero result iff one of the bytes is zero.
19062 This saves three branches inside loop and many cycles. */
19064 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
19065 emit_insn (gen_one_cmplsi2 (scratch, scratch));
19066 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
19067 emit_insn (gen_andsi3 (tmpreg, tmpreg,
19068 gen_int_mode (0x80808080, SImode)));
19069 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
19074 rtx reg = gen_reg_rtx (SImode);
19075 rtx reg2 = gen_reg_rtx (Pmode);
19076 emit_move_insn (reg, tmpreg);
19077 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
19079 /* If zero is not in the first two bytes, move two bytes forward. */
19080 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19081 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19082 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19083 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
19084 gen_rtx_IF_THEN_ELSE (SImode, tmp,
19087 /* Emit lea manually to avoid clobbering of flags. */
19088 emit_insn (gen_rtx_SET (SImode, reg2,
19089 gen_rtx_PLUS (Pmode, out, const2_rtx)));
19091 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19092 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19093 emit_insn (gen_rtx_SET (VOIDmode, out,
19094 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19100 rtx end_2_label = gen_label_rtx ();
19101 /* Is zero in the first two bytes? */
19103 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19104 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19105 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19106 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19107 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19109 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19110 JUMP_LABEL (tmp) = end_2_label;
19112 /* Not in the first two. Move two bytes forward. */
19113 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19114 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
19116 emit_label (end_2_label);
19120 /* Avoid branch in fixing the byte. */
19121 tmpreg = gen_lowpart (QImode, tmpreg);
19122 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19123 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
19124 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
19125 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), tmp, cmp));
19127 emit_label (end_0_label);
19130 /* Expand strlen. */
19133 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19135 rtx addr, scratch1, scratch2, scratch3, scratch4;
19137 /* The generic case of strlen expander is long. Avoid it's
19138 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19140 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19141 && !TARGET_INLINE_ALL_STRINGOPS
19142 && !optimize_insn_for_size_p ()
19143 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19146 addr = force_reg (Pmode, XEXP (src, 0));
19147 scratch1 = gen_reg_rtx (Pmode);
19149 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19150 && !optimize_insn_for_size_p ())
19152 /* Well it seems that some optimizer does not combine a call like
19153 foo(strlen(bar), strlen(bar));
19154 when the move and the subtraction is done here. It does calculate
19155 the length just once when these instructions are done inside of
19156 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19157 often used and I use one fewer register for the lifetime of
19158 output_strlen_unroll() this is better. */
19160 emit_move_insn (out, addr);
19162 ix86_expand_strlensi_unroll_1 (out, src, align);
19164 /* strlensi_unroll_1 returns the address of the zero at the end of
19165 the string, like memchr(), so compute the length by subtracting
19166 the start address. */
19167 emit_insn ((*ix86_gen_sub3) (out, out, addr));
19173 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19174 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19177 scratch2 = gen_reg_rtx (Pmode);
19178 scratch3 = gen_reg_rtx (Pmode);
19179 scratch4 = force_reg (Pmode, constm1_rtx);
19181 emit_move_insn (scratch3, addr);
19182 eoschar = force_reg (QImode, eoschar);
19184 src = replace_equiv_address_nv (src, scratch3);
19186 /* If .md starts supporting :P, this can be done in .md. */
19187 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19188 scratch4), UNSPEC_SCAS);
19189 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19190 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
19191 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
19196 /* For given symbol (function) construct code to compute address of it's PLT
19197 entry in large x86-64 PIC model. */
19199 construct_plt_address (rtx symbol)
19201 rtx tmp = gen_reg_rtx (Pmode);
19202 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19204 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19205 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19207 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19208 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19213 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19215 rtx pop, int sibcall)
19217 rtx use = NULL, call;
19219 if (pop == const0_rtx)
19221 gcc_assert (!TARGET_64BIT || !pop);
19223 if (TARGET_MACHO && !TARGET_64BIT)
19226 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
19227 fnaddr = machopic_indirect_call_target (fnaddr);
19232 /* Static functions and indirect calls don't need the pic register. */
19233 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
19234 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19235 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
19236 use_reg (&use, pic_offset_table_rtx);
19239 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
19241 rtx al = gen_rtx_REG (QImode, AX_REG);
19242 emit_move_insn (al, callarg2);
19243 use_reg (&use, al);
19246 if (ix86_cmodel == CM_LARGE_PIC
19248 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
19249 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
19250 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
19252 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
19253 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
19255 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
19256 fnaddr = gen_rtx_MEM (QImode, fnaddr);
19259 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
19261 call = gen_rtx_SET (VOIDmode, retval, call);
19264 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
19265 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
19266 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
19269 && ix86_cfun_abi () == MS_ABI
19270 && (!callarg2 || INTVAL (callarg2) != -2))
19272 /* We need to represent that SI and DI registers are clobbered
19274 static int clobbered_registers[] = {
19275 XMM6_REG, XMM7_REG, XMM8_REG,
19276 XMM9_REG, XMM10_REG, XMM11_REG,
19277 XMM12_REG, XMM13_REG, XMM14_REG,
19278 XMM15_REG, SI_REG, DI_REG
19281 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
19282 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
19283 UNSPEC_MS_TO_SYSV_CALL);
19287 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
19288 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
19291 (SSE_REGNO_P (clobbered_registers[i])
19293 clobbered_registers[i]));
19295 call = gen_rtx_PARALLEL (VOIDmode,
19296 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
19300 call = emit_call_insn (call);
19302 CALL_INSN_FUNCTION_USAGE (call) = use;
19306 /* Clear stack slot assignments remembered from previous functions.
19307 This is called from INIT_EXPANDERS once before RTL is emitted for each
19310 static struct machine_function *
19311 ix86_init_machine_status (void)
19313 struct machine_function *f;
19315 f = GGC_CNEW (struct machine_function);
19316 f->use_fast_prologue_epilogue_nregs = -1;
19317 f->tls_descriptor_call_expanded_p = 0;
19318 f->call_abi = ix86_abi;
19323 /* Return a MEM corresponding to a stack slot with mode MODE.
19324 Allocate a new slot if necessary.
19326 The RTL for a function can have several slots available: N is
19327 which slot to use. */
19330 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
19332 struct stack_local_entry *s;
19334 gcc_assert (n < MAX_386_STACK_LOCALS);
19336 /* Virtual slot is valid only before vregs are instantiated. */
19337 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
19339 for (s = ix86_stack_locals; s; s = s->next)
19340 if (s->mode == mode && s->n == n)
19341 return copy_rtx (s->rtl);
19343 s = (struct stack_local_entry *)
19344 ggc_alloc (sizeof (struct stack_local_entry));
19347 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
19349 s->next = ix86_stack_locals;
19350 ix86_stack_locals = s;
19354 /* Construct the SYMBOL_REF for the tls_get_addr function. */
19356 static GTY(()) rtx ix86_tls_symbol;
19358 ix86_tls_get_addr (void)
19361 if (!ix86_tls_symbol)
19363 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
19364 (TARGET_ANY_GNU_TLS
19366 ? "___tls_get_addr"
19367 : "__tls_get_addr");
19370 return ix86_tls_symbol;
19373 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19375 static GTY(()) rtx ix86_tls_module_base_symbol;
19377 ix86_tls_module_base (void)
19380 if (!ix86_tls_module_base_symbol)
19382 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19383 "_TLS_MODULE_BASE_");
19384 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19385 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19388 return ix86_tls_module_base_symbol;
19391 /* Calculate the length of the memory address in the instruction
19392 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19395 memory_address_length (rtx addr)
19397 struct ix86_address parts;
19398 rtx base, index, disp;
19402 if (GET_CODE (addr) == PRE_DEC
19403 || GET_CODE (addr) == POST_INC
19404 || GET_CODE (addr) == PRE_MODIFY
19405 || GET_CODE (addr) == POST_MODIFY)
19408 ok = ix86_decompose_address (addr, &parts);
19411 if (parts.base && GET_CODE (parts.base) == SUBREG)
19412 parts.base = SUBREG_REG (parts.base);
19413 if (parts.index && GET_CODE (parts.index) == SUBREG)
19414 parts.index = SUBREG_REG (parts.index);
19417 index = parts.index;
19422 - esp as the base always wants an index,
19423 - ebp as the base always wants a displacement,
19424 - r12 as the base always wants an index,
19425 - r13 as the base always wants a displacement. */
19427 /* Register Indirect. */
19428 if (base && !index && !disp)
19430 /* esp (for its index) and ebp (for its displacement) need
19431 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
19434 && (addr == arg_pointer_rtx
19435 || addr == frame_pointer_rtx
19436 || REGNO (addr) == SP_REG
19437 || REGNO (addr) == BP_REG
19438 || REGNO (addr) == R12_REG
19439 || REGNO (addr) == R13_REG))
19443 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
19444 is not disp32, but disp32(%rip), so for disp32
19445 SIB byte is needed, unless print_operand_address
19446 optimizes it into disp32(%rip) or (%rip) is implied
19448 else if (disp && !base && !index)
19455 if (GET_CODE (disp) == CONST)
19456 symbol = XEXP (disp, 0);
19457 if (GET_CODE (symbol) == PLUS
19458 && CONST_INT_P (XEXP (symbol, 1)))
19459 symbol = XEXP (symbol, 0);
19461 if (GET_CODE (symbol) != LABEL_REF
19462 && (GET_CODE (symbol) != SYMBOL_REF
19463 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
19464 && (GET_CODE (symbol) != UNSPEC
19465 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
19466 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
19473 /* Find the length of the displacement constant. */
19476 if (base && satisfies_constraint_K (disp))
19481 /* ebp always wants a displacement. Similarly r13. */
19482 else if (base && REG_P (base)
19483 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
19486 /* An index requires the two-byte modrm form.... */
19488 /* ...like esp (or r12), which always wants an index. */
19489 || base == arg_pointer_rtx
19490 || base == frame_pointer_rtx
19491 || (base && REG_P (base)
19492 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
19509 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19510 is set, expect that insn have 8bit immediate alternative. */
19512 ix86_attr_length_immediate_default (rtx insn, int shortform)
19516 extract_insn_cached (insn);
19517 for (i = recog_data.n_operands - 1; i >= 0; --i)
19518 if (CONSTANT_P (recog_data.operand[i]))
19520 enum attr_mode mode = get_attr_mode (insn);
19523 if (shortform && CONST_INT_P (recog_data.operand[i]))
19525 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
19532 ival = trunc_int_for_mode (ival, HImode);
19535 ival = trunc_int_for_mode (ival, SImode);
19540 if (IN_RANGE (ival, -128, 127))
19557 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19562 fatal_insn ("unknown insn mode", insn);
19567 /* Compute default value for "length_address" attribute. */
19569 ix86_attr_length_address_default (rtx insn)
19573 if (get_attr_type (insn) == TYPE_LEA)
19575 rtx set = PATTERN (insn), addr;
19577 if (GET_CODE (set) == PARALLEL)
19578 set = XVECEXP (set, 0, 0);
19580 gcc_assert (GET_CODE (set) == SET);
19582 addr = SET_SRC (set);
19583 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
19585 if (GET_CODE (addr) == ZERO_EXTEND)
19586 addr = XEXP (addr, 0);
19587 if (GET_CODE (addr) == SUBREG)
19588 addr = SUBREG_REG (addr);
19591 return memory_address_length (addr);
19594 extract_insn_cached (insn);
19595 for (i = recog_data.n_operands - 1; i >= 0; --i)
19596 if (MEM_P (recog_data.operand[i]))
19598 constrain_operands_cached (reload_completed);
19599 if (which_alternative != -1)
19601 const char *constraints = recog_data.constraints[i];
19602 int alt = which_alternative;
19604 while (*constraints == '=' || *constraints == '+')
19607 while (*constraints++ != ',')
19609 /* Skip ignored operands. */
19610 if (*constraints == 'X')
19613 return memory_address_length (XEXP (recog_data.operand[i], 0));
19618 /* Compute default value for "length_vex" attribute. It includes
19619 2 or 3 byte VEX prefix and 1 opcode byte. */
19622 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19627 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19628 byte VEX prefix. */
19629 if (!has_0f_opcode || has_vex_w)
19632 /* We can always use 2 byte VEX prefix in 32bit. */
19636 extract_insn_cached (insn);
19638 for (i = recog_data.n_operands - 1; i >= 0; --i)
19639 if (REG_P (recog_data.operand[i]))
19641 /* REX.W bit uses 3 byte VEX prefix. */
19642 if (GET_MODE (recog_data.operand[i]) == DImode
19643 && GENERAL_REG_P (recog_data.operand[i]))
19648 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19649 if (MEM_P (recog_data.operand[i])
19650 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19657 /* Return the maximum number of instructions a cpu can issue. */
19660 ix86_issue_rate (void)
19664 case PROCESSOR_PENTIUM:
19665 case PROCESSOR_ATOM:
19669 case PROCESSOR_PENTIUMPRO:
19670 case PROCESSOR_PENTIUM4:
19671 case PROCESSOR_ATHLON:
19673 case PROCESSOR_AMDFAM10:
19674 case PROCESSOR_NOCONA:
19675 case PROCESSOR_GENERIC32:
19676 case PROCESSOR_GENERIC64:
19679 case PROCESSOR_CORE2:
19687 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19688 by DEP_INSN and nothing set by DEP_INSN. */
19691 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19695 /* Simplify the test for uninteresting insns. */
19696 if (insn_type != TYPE_SETCC
19697 && insn_type != TYPE_ICMOV
19698 && insn_type != TYPE_FCMOV
19699 && insn_type != TYPE_IBR)
19702 if ((set = single_set (dep_insn)) != 0)
19704 set = SET_DEST (set);
19707 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19708 && XVECLEN (PATTERN (dep_insn), 0) == 2
19709 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19710 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19712 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19713 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19718 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19721 /* This test is true if the dependent insn reads the flags but
19722 not any other potentially set register. */
19723 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19726 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19732 /* Return true iff USE_INSN has a memory address with operands set by
19736 ix86_agi_dependent (rtx set_insn, rtx use_insn)
19739 extract_insn_cached (use_insn);
19740 for (i = recog_data.n_operands - 1; i >= 0; --i)
19741 if (MEM_P (recog_data.operand[i]))
19743 rtx addr = XEXP (recog_data.operand[i], 0);
19744 return modified_in_p (addr, set_insn) != 0;
19750 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19752 enum attr_type insn_type, dep_insn_type;
19753 enum attr_memory memory;
19755 int dep_insn_code_number;
19757 /* Anti and output dependencies have zero cost on all CPUs. */
19758 if (REG_NOTE_KIND (link) != 0)
19761 dep_insn_code_number = recog_memoized (dep_insn);
19763 /* If we can't recognize the insns, we can't really do anything. */
19764 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19767 insn_type = get_attr_type (insn);
19768 dep_insn_type = get_attr_type (dep_insn);
19772 case PROCESSOR_PENTIUM:
19773 /* Address Generation Interlock adds a cycle of latency. */
19774 if (insn_type == TYPE_LEA)
19776 rtx addr = PATTERN (insn);
19778 if (GET_CODE (addr) == PARALLEL)
19779 addr = XVECEXP (addr, 0, 0);
19781 gcc_assert (GET_CODE (addr) == SET);
19783 addr = SET_SRC (addr);
19784 if (modified_in_p (addr, dep_insn))
19787 else if (ix86_agi_dependent (dep_insn, insn))
19790 /* ??? Compares pair with jump/setcc. */
19791 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19794 /* Floating point stores require value to be ready one cycle earlier. */
19795 if (insn_type == TYPE_FMOV
19796 && get_attr_memory (insn) == MEMORY_STORE
19797 && !ix86_agi_dependent (dep_insn, insn))
19801 case PROCESSOR_PENTIUMPRO:
19802 memory = get_attr_memory (insn);
19804 /* INT->FP conversion is expensive. */
19805 if (get_attr_fp_int_src (dep_insn))
19808 /* There is one cycle extra latency between an FP op and a store. */
19809 if (insn_type == TYPE_FMOV
19810 && (set = single_set (dep_insn)) != NULL_RTX
19811 && (set2 = single_set (insn)) != NULL_RTX
19812 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19813 && MEM_P (SET_DEST (set2)))
19816 /* Show ability of reorder buffer to hide latency of load by executing
19817 in parallel with previous instruction in case
19818 previous instruction is not needed to compute the address. */
19819 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19820 && !ix86_agi_dependent (dep_insn, insn))
19822 /* Claim moves to take one cycle, as core can issue one load
19823 at time and the next load can start cycle later. */
19824 if (dep_insn_type == TYPE_IMOV
19825 || dep_insn_type == TYPE_FMOV)
19833 memory = get_attr_memory (insn);
19835 /* The esp dependency is resolved before the instruction is really
19837 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19838 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19841 /* INT->FP conversion is expensive. */
19842 if (get_attr_fp_int_src (dep_insn))
19845 /* Show ability of reorder buffer to hide latency of load by executing
19846 in parallel with previous instruction in case
19847 previous instruction is not needed to compute the address. */
19848 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19849 && !ix86_agi_dependent (dep_insn, insn))
19851 /* Claim moves to take one cycle, as core can issue one load
19852 at time and the next load can start cycle later. */
19853 if (dep_insn_type == TYPE_IMOV
19854 || dep_insn_type == TYPE_FMOV)
19863 case PROCESSOR_ATHLON:
19865 case PROCESSOR_AMDFAM10:
19866 case PROCESSOR_ATOM:
19867 case PROCESSOR_GENERIC32:
19868 case PROCESSOR_GENERIC64:
19869 memory = get_attr_memory (insn);
19871 /* Show ability of reorder buffer to hide latency of load by executing
19872 in parallel with previous instruction in case
19873 previous instruction is not needed to compute the address. */
19874 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19875 && !ix86_agi_dependent (dep_insn, insn))
19877 enum attr_unit unit = get_attr_unit (insn);
19880 /* Because of the difference between the length of integer and
19881 floating unit pipeline preparation stages, the memory operands
19882 for floating point are cheaper.
19884 ??? For Athlon it the difference is most probably 2. */
19885 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19888 loadcost = TARGET_ATHLON ? 2 : 0;
19890 if (cost >= loadcost)
19903 /* How many alternative schedules to try. This should be as wide as the
19904 scheduling freedom in the DFA, but no wider. Making this value too
19905 large results extra work for the scheduler. */
19908 ia32_multipass_dfa_lookahead (void)
19912 case PROCESSOR_PENTIUM:
19915 case PROCESSOR_PENTIUMPRO:
19925 /* Compute the alignment given to a constant that is being placed in memory.
19926 EXP is the constant and ALIGN is the alignment that the object would
19928 The value of this function is used instead of that alignment to align
19932 ix86_constant_alignment (tree exp, int align)
19934 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19935 || TREE_CODE (exp) == INTEGER_CST)
19937 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19939 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19942 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19943 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19944 return BITS_PER_WORD;
19949 /* Compute the alignment for a static variable.
19950 TYPE is the data type, and ALIGN is the alignment that
19951 the object would ordinarily have. The value of this function is used
19952 instead of that alignment to align the object. */
19955 ix86_data_alignment (tree type, int align)
19957 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19959 if (AGGREGATE_TYPE_P (type)
19960 && TYPE_SIZE (type)
19961 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19962 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19963 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19964 && align < max_align)
19967 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19968 to 16byte boundary. */
19971 if (AGGREGATE_TYPE_P (type)
19972 && TYPE_SIZE (type)
19973 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19974 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19975 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19979 if (TREE_CODE (type) == ARRAY_TYPE)
19981 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19983 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19986 else if (TREE_CODE (type) == COMPLEX_TYPE)
19989 if (TYPE_MODE (type) == DCmode && align < 64)
19991 if ((TYPE_MODE (type) == XCmode
19992 || TYPE_MODE (type) == TCmode) && align < 128)
19995 else if ((TREE_CODE (type) == RECORD_TYPE
19996 || TREE_CODE (type) == UNION_TYPE
19997 || TREE_CODE (type) == QUAL_UNION_TYPE)
19998 && TYPE_FIELDS (type))
20000 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20002 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20005 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20006 || TREE_CODE (type) == INTEGER_TYPE)
20008 if (TYPE_MODE (type) == DFmode && align < 64)
20010 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20017 /* Compute the alignment for a local variable or a stack slot. EXP is
20018 the data type or decl itself, MODE is the widest mode available and
20019 ALIGN is the alignment that the object would ordinarily have. The
20020 value of this macro is used instead of that alignment to align the
20024 ix86_local_alignment (tree exp, enum machine_mode mode,
20025 unsigned int align)
20029 if (exp && DECL_P (exp))
20031 type = TREE_TYPE (exp);
20040 /* Don't do dynamic stack realignment for long long objects with
20041 -mpreferred-stack-boundary=2. */
20044 && ix86_preferred_stack_boundary < 64
20045 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
20046 && (!type || !TYPE_USER_ALIGN (type))
20047 && (!decl || !DECL_USER_ALIGN (decl)))
20050 /* If TYPE is NULL, we are allocating a stack slot for caller-save
20051 register in MODE. We will return the largest alignment of XF
20055 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
20056 align = GET_MODE_ALIGNMENT (DFmode);
20060 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20061 to 16byte boundary. */
20064 if (AGGREGATE_TYPE_P (type)
20065 && TYPE_SIZE (type)
20066 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20067 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
20068 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20071 if (TREE_CODE (type) == ARRAY_TYPE)
20073 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20075 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20078 else if (TREE_CODE (type) == COMPLEX_TYPE)
20080 if (TYPE_MODE (type) == DCmode && align < 64)
20082 if ((TYPE_MODE (type) == XCmode
20083 || TYPE_MODE (type) == TCmode) && align < 128)
20086 else if ((TREE_CODE (type) == RECORD_TYPE
20087 || TREE_CODE (type) == UNION_TYPE
20088 || TREE_CODE (type) == QUAL_UNION_TYPE)
20089 && TYPE_FIELDS (type))
20091 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20093 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20096 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20097 || TREE_CODE (type) == INTEGER_TYPE)
20100 if (TYPE_MODE (type) == DFmode && align < 64)
20102 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20108 /* Compute the minimum required alignment for dynamic stack realignment
20109 purposes for a local variable, parameter or a stack slot. EXP is
20110 the data type or decl itself, MODE is its mode and ALIGN is the
20111 alignment that the object would ordinarily have. */
20114 ix86_minimum_alignment (tree exp, enum machine_mode mode,
20115 unsigned int align)
20119 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
20122 if (exp && DECL_P (exp))
20124 type = TREE_TYPE (exp);
20133 /* Don't do dynamic stack realignment for long long objects with
20134 -mpreferred-stack-boundary=2. */
20135 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
20136 && (!type || !TYPE_USER_ALIGN (type))
20137 && (!decl || !DECL_USER_ALIGN (decl)))
20143 /* Find a location for the static chain incoming to a nested function.
20144 This is a register, unless all free registers are used by arguments. */
20147 ix86_static_chain (const_tree fndecl, bool incoming_p)
20151 if (!DECL_STATIC_CHAIN (fndecl))
20156 /* We always use R10 in 64-bit mode. */
20162 /* By default in 32-bit mode we use ECX to pass the static chain. */
20165 fntype = TREE_TYPE (fndecl);
20166 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
20168 /* Fastcall functions use ecx/edx for arguments, which leaves
20169 us with EAX for the static chain. */
20172 else if (ix86_function_regparm (fntype, fndecl) == 3)
20174 /* For regparm 3, we have no free call-clobbered registers in
20175 which to store the static chain. In order to implement this,
20176 we have the trampoline push the static chain to the stack.
20177 However, we can't push a value below the return address when
20178 we call the nested function directly, so we have to use an
20179 alternate entry point. For this we use ESI, and have the
20180 alternate entry point push ESI, so that things appear the
20181 same once we're executing the nested function. */
20184 if (fndecl == current_function_decl)
20185 ix86_static_chain_on_stack = true;
20186 return gen_frame_mem (SImode,
20187 plus_constant (arg_pointer_rtx, -8));
20193 return gen_rtx_REG (Pmode, regno);
20196 /* Emit RTL insns to initialize the variable parts of a trampoline.
20197 FNDECL is the decl of the target address; M_TRAMP is a MEM for
20198 the trampoline, and CHAIN_VALUE is an RTX for the static chain
20199 to be passed to the target function. */
20202 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
20206 fnaddr = XEXP (DECL_RTL (fndecl), 0);
20213 /* Depending on the static chain location, either load a register
20214 with a constant, or push the constant to the stack. All of the
20215 instructions are the same size. */
20216 chain = ix86_static_chain (fndecl, true);
20219 if (REGNO (chain) == CX_REG)
20221 else if (REGNO (chain) == AX_REG)
20224 gcc_unreachable ();
20229 mem = adjust_address (m_tramp, QImode, 0);
20230 emit_move_insn (mem, gen_int_mode (opcode, QImode));
20232 mem = adjust_address (m_tramp, SImode, 1);
20233 emit_move_insn (mem, chain_value);
20235 /* Compute offset from the end of the jmp to the target function.
20236 In the case in which the trampoline stores the static chain on
20237 the stack, we need to skip the first insn which pushes the
20238 (call-saved) register static chain; this push is 1 byte. */
20239 disp = expand_binop (SImode, sub_optab, fnaddr,
20240 plus_constant (XEXP (m_tramp, 0),
20241 MEM_P (chain) ? 9 : 10),
20242 NULL_RTX, 1, OPTAB_DIRECT);
20244 mem = adjust_address (m_tramp, QImode, 5);
20245 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
20247 mem = adjust_address (m_tramp, SImode, 6);
20248 emit_move_insn (mem, disp);
20254 /* Load the function address to r11. Try to load address using
20255 the shorter movl instead of movabs. We may want to support
20256 movq for kernel mode, but kernel does not use trampolines at
20258 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
20260 fnaddr = copy_to_mode_reg (DImode, fnaddr);
20262 mem = adjust_address (m_tramp, HImode, offset);
20263 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
20265 mem = adjust_address (m_tramp, SImode, offset + 2);
20266 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
20271 mem = adjust_address (m_tramp, HImode, offset);
20272 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
20274 mem = adjust_address (m_tramp, DImode, offset + 2);
20275 emit_move_insn (mem, fnaddr);
20279 /* Load static chain using movabs to r10. */
20280 mem = adjust_address (m_tramp, HImode, offset);
20281 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
20283 mem = adjust_address (m_tramp, DImode, offset + 2);
20284 emit_move_insn (mem, chain_value);
20287 /* Jump to r11; the last (unused) byte is a nop, only there to
20288 pad the write out to a single 32-bit store. */
20289 mem = adjust_address (m_tramp, SImode, offset);
20290 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
20293 gcc_assert (offset <= TRAMPOLINE_SIZE);
20296 #ifdef ENABLE_EXECUTE_STACK
20297 #ifdef CHECK_EXECUTE_STACK_ENABLED
20298 if (CHECK_EXECUTE_STACK_ENABLED)
20300 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
20301 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
20305 /* The following file contains several enumerations and data structures
20306 built from the definitions in i386-builtin-types.def. */
20308 #include "i386-builtin-types.inc"
20310 /* Table for the ix86 builtin non-function types. */
20311 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
20313 /* Retrieve an element from the above table, building some of
20314 the types lazily. */
20317 ix86_get_builtin_type (enum ix86_builtin_type tcode)
20319 unsigned int index;
20322 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
20324 type = ix86_builtin_type_tab[(int) tcode];
20328 gcc_assert (tcode > IX86_BT_LAST_PRIM);
20329 if (tcode <= IX86_BT_LAST_VECT)
20331 enum machine_mode mode;
20333 index = tcode - IX86_BT_LAST_PRIM - 1;
20334 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
20335 mode = ix86_builtin_type_vect_mode[index];
20337 type = build_vector_type_for_mode (itype, mode);
20343 index = tcode - IX86_BT_LAST_VECT - 1;
20344 if (tcode <= IX86_BT_LAST_PTR)
20345 quals = TYPE_UNQUALIFIED;
20347 quals = TYPE_QUAL_CONST;
20349 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
20350 if (quals != TYPE_UNQUALIFIED)
20351 itype = build_qualified_type (itype, quals);
20353 type = build_pointer_type (itype);
20356 ix86_builtin_type_tab[(int) tcode] = type;
20360 /* Table for the ix86 builtin function types. */
20361 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
20363 /* Retrieve an element from the above table, building some of
20364 the types lazily. */
20367 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
20371 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
20373 type = ix86_builtin_func_type_tab[(int) tcode];
20377 if (tcode <= IX86_BT_LAST_FUNC)
20379 unsigned start = ix86_builtin_func_start[(int) tcode];
20380 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
20381 tree rtype, atype, args = void_list_node;
20384 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
20385 for (i = after - 1; i > start; --i)
20387 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
20388 args = tree_cons (NULL, atype, args);
20391 type = build_function_type (rtype, args);
20395 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
20396 enum ix86_builtin_func_type icode;
20398 icode = ix86_builtin_func_alias_base[index];
20399 type = ix86_get_builtin_func_type (icode);
20402 ix86_builtin_func_type_tab[(int) tcode] = type;
20407 /* Codes for all the SSE/MMX builtins. */
20410 IX86_BUILTIN_ADDPS,
20411 IX86_BUILTIN_ADDSS,
20412 IX86_BUILTIN_DIVPS,
20413 IX86_BUILTIN_DIVSS,
20414 IX86_BUILTIN_MULPS,
20415 IX86_BUILTIN_MULSS,
20416 IX86_BUILTIN_SUBPS,
20417 IX86_BUILTIN_SUBSS,
20419 IX86_BUILTIN_CMPEQPS,
20420 IX86_BUILTIN_CMPLTPS,
20421 IX86_BUILTIN_CMPLEPS,
20422 IX86_BUILTIN_CMPGTPS,
20423 IX86_BUILTIN_CMPGEPS,
20424 IX86_BUILTIN_CMPNEQPS,
20425 IX86_BUILTIN_CMPNLTPS,
20426 IX86_BUILTIN_CMPNLEPS,
20427 IX86_BUILTIN_CMPNGTPS,
20428 IX86_BUILTIN_CMPNGEPS,
20429 IX86_BUILTIN_CMPORDPS,
20430 IX86_BUILTIN_CMPUNORDPS,
20431 IX86_BUILTIN_CMPEQSS,
20432 IX86_BUILTIN_CMPLTSS,
20433 IX86_BUILTIN_CMPLESS,
20434 IX86_BUILTIN_CMPNEQSS,
20435 IX86_BUILTIN_CMPNLTSS,
20436 IX86_BUILTIN_CMPNLESS,
20437 IX86_BUILTIN_CMPNGTSS,
20438 IX86_BUILTIN_CMPNGESS,
20439 IX86_BUILTIN_CMPORDSS,
20440 IX86_BUILTIN_CMPUNORDSS,
20442 IX86_BUILTIN_COMIEQSS,
20443 IX86_BUILTIN_COMILTSS,
20444 IX86_BUILTIN_COMILESS,
20445 IX86_BUILTIN_COMIGTSS,
20446 IX86_BUILTIN_COMIGESS,
20447 IX86_BUILTIN_COMINEQSS,
20448 IX86_BUILTIN_UCOMIEQSS,
20449 IX86_BUILTIN_UCOMILTSS,
20450 IX86_BUILTIN_UCOMILESS,
20451 IX86_BUILTIN_UCOMIGTSS,
20452 IX86_BUILTIN_UCOMIGESS,
20453 IX86_BUILTIN_UCOMINEQSS,
20455 IX86_BUILTIN_CVTPI2PS,
20456 IX86_BUILTIN_CVTPS2PI,
20457 IX86_BUILTIN_CVTSI2SS,
20458 IX86_BUILTIN_CVTSI642SS,
20459 IX86_BUILTIN_CVTSS2SI,
20460 IX86_BUILTIN_CVTSS2SI64,
20461 IX86_BUILTIN_CVTTPS2PI,
20462 IX86_BUILTIN_CVTTSS2SI,
20463 IX86_BUILTIN_CVTTSS2SI64,
20465 IX86_BUILTIN_MAXPS,
20466 IX86_BUILTIN_MAXSS,
20467 IX86_BUILTIN_MINPS,
20468 IX86_BUILTIN_MINSS,
20470 IX86_BUILTIN_LOADUPS,
20471 IX86_BUILTIN_STOREUPS,
20472 IX86_BUILTIN_MOVSS,
20474 IX86_BUILTIN_MOVHLPS,
20475 IX86_BUILTIN_MOVLHPS,
20476 IX86_BUILTIN_LOADHPS,
20477 IX86_BUILTIN_LOADLPS,
20478 IX86_BUILTIN_STOREHPS,
20479 IX86_BUILTIN_STORELPS,
20481 IX86_BUILTIN_MASKMOVQ,
20482 IX86_BUILTIN_MOVMSKPS,
20483 IX86_BUILTIN_PMOVMSKB,
20485 IX86_BUILTIN_MOVNTPS,
20486 IX86_BUILTIN_MOVNTQ,
20488 IX86_BUILTIN_LOADDQU,
20489 IX86_BUILTIN_STOREDQU,
20491 IX86_BUILTIN_PACKSSWB,
20492 IX86_BUILTIN_PACKSSDW,
20493 IX86_BUILTIN_PACKUSWB,
20495 IX86_BUILTIN_PADDB,
20496 IX86_BUILTIN_PADDW,
20497 IX86_BUILTIN_PADDD,
20498 IX86_BUILTIN_PADDQ,
20499 IX86_BUILTIN_PADDSB,
20500 IX86_BUILTIN_PADDSW,
20501 IX86_BUILTIN_PADDUSB,
20502 IX86_BUILTIN_PADDUSW,
20503 IX86_BUILTIN_PSUBB,
20504 IX86_BUILTIN_PSUBW,
20505 IX86_BUILTIN_PSUBD,
20506 IX86_BUILTIN_PSUBQ,
20507 IX86_BUILTIN_PSUBSB,
20508 IX86_BUILTIN_PSUBSW,
20509 IX86_BUILTIN_PSUBUSB,
20510 IX86_BUILTIN_PSUBUSW,
20513 IX86_BUILTIN_PANDN,
20517 IX86_BUILTIN_PAVGB,
20518 IX86_BUILTIN_PAVGW,
20520 IX86_BUILTIN_PCMPEQB,
20521 IX86_BUILTIN_PCMPEQW,
20522 IX86_BUILTIN_PCMPEQD,
20523 IX86_BUILTIN_PCMPGTB,
20524 IX86_BUILTIN_PCMPGTW,
20525 IX86_BUILTIN_PCMPGTD,
20527 IX86_BUILTIN_PMADDWD,
20529 IX86_BUILTIN_PMAXSW,
20530 IX86_BUILTIN_PMAXUB,
20531 IX86_BUILTIN_PMINSW,
20532 IX86_BUILTIN_PMINUB,
20534 IX86_BUILTIN_PMULHUW,
20535 IX86_BUILTIN_PMULHW,
20536 IX86_BUILTIN_PMULLW,
20538 IX86_BUILTIN_PSADBW,
20539 IX86_BUILTIN_PSHUFW,
20541 IX86_BUILTIN_PSLLW,
20542 IX86_BUILTIN_PSLLD,
20543 IX86_BUILTIN_PSLLQ,
20544 IX86_BUILTIN_PSRAW,
20545 IX86_BUILTIN_PSRAD,
20546 IX86_BUILTIN_PSRLW,
20547 IX86_BUILTIN_PSRLD,
20548 IX86_BUILTIN_PSRLQ,
20549 IX86_BUILTIN_PSLLWI,
20550 IX86_BUILTIN_PSLLDI,
20551 IX86_BUILTIN_PSLLQI,
20552 IX86_BUILTIN_PSRAWI,
20553 IX86_BUILTIN_PSRADI,
20554 IX86_BUILTIN_PSRLWI,
20555 IX86_BUILTIN_PSRLDI,
20556 IX86_BUILTIN_PSRLQI,
20558 IX86_BUILTIN_PUNPCKHBW,
20559 IX86_BUILTIN_PUNPCKHWD,
20560 IX86_BUILTIN_PUNPCKHDQ,
20561 IX86_BUILTIN_PUNPCKLBW,
20562 IX86_BUILTIN_PUNPCKLWD,
20563 IX86_BUILTIN_PUNPCKLDQ,
20565 IX86_BUILTIN_SHUFPS,
20567 IX86_BUILTIN_RCPPS,
20568 IX86_BUILTIN_RCPSS,
20569 IX86_BUILTIN_RSQRTPS,
20570 IX86_BUILTIN_RSQRTPS_NR,
20571 IX86_BUILTIN_RSQRTSS,
20572 IX86_BUILTIN_RSQRTF,
20573 IX86_BUILTIN_SQRTPS,
20574 IX86_BUILTIN_SQRTPS_NR,
20575 IX86_BUILTIN_SQRTSS,
20577 IX86_BUILTIN_UNPCKHPS,
20578 IX86_BUILTIN_UNPCKLPS,
20580 IX86_BUILTIN_ANDPS,
20581 IX86_BUILTIN_ANDNPS,
20583 IX86_BUILTIN_XORPS,
20586 IX86_BUILTIN_LDMXCSR,
20587 IX86_BUILTIN_STMXCSR,
20588 IX86_BUILTIN_SFENCE,
20590 /* 3DNow! Original */
20591 IX86_BUILTIN_FEMMS,
20592 IX86_BUILTIN_PAVGUSB,
20593 IX86_BUILTIN_PF2ID,
20594 IX86_BUILTIN_PFACC,
20595 IX86_BUILTIN_PFADD,
20596 IX86_BUILTIN_PFCMPEQ,
20597 IX86_BUILTIN_PFCMPGE,
20598 IX86_BUILTIN_PFCMPGT,
20599 IX86_BUILTIN_PFMAX,
20600 IX86_BUILTIN_PFMIN,
20601 IX86_BUILTIN_PFMUL,
20602 IX86_BUILTIN_PFRCP,
20603 IX86_BUILTIN_PFRCPIT1,
20604 IX86_BUILTIN_PFRCPIT2,
20605 IX86_BUILTIN_PFRSQIT1,
20606 IX86_BUILTIN_PFRSQRT,
20607 IX86_BUILTIN_PFSUB,
20608 IX86_BUILTIN_PFSUBR,
20609 IX86_BUILTIN_PI2FD,
20610 IX86_BUILTIN_PMULHRW,
20612 /* 3DNow! Athlon Extensions */
20613 IX86_BUILTIN_PF2IW,
20614 IX86_BUILTIN_PFNACC,
20615 IX86_BUILTIN_PFPNACC,
20616 IX86_BUILTIN_PI2FW,
20617 IX86_BUILTIN_PSWAPDSI,
20618 IX86_BUILTIN_PSWAPDSF,
20621 IX86_BUILTIN_ADDPD,
20622 IX86_BUILTIN_ADDSD,
20623 IX86_BUILTIN_DIVPD,
20624 IX86_BUILTIN_DIVSD,
20625 IX86_BUILTIN_MULPD,
20626 IX86_BUILTIN_MULSD,
20627 IX86_BUILTIN_SUBPD,
20628 IX86_BUILTIN_SUBSD,
20630 IX86_BUILTIN_CMPEQPD,
20631 IX86_BUILTIN_CMPLTPD,
20632 IX86_BUILTIN_CMPLEPD,
20633 IX86_BUILTIN_CMPGTPD,
20634 IX86_BUILTIN_CMPGEPD,
20635 IX86_BUILTIN_CMPNEQPD,
20636 IX86_BUILTIN_CMPNLTPD,
20637 IX86_BUILTIN_CMPNLEPD,
20638 IX86_BUILTIN_CMPNGTPD,
20639 IX86_BUILTIN_CMPNGEPD,
20640 IX86_BUILTIN_CMPORDPD,
20641 IX86_BUILTIN_CMPUNORDPD,
20642 IX86_BUILTIN_CMPEQSD,
20643 IX86_BUILTIN_CMPLTSD,
20644 IX86_BUILTIN_CMPLESD,
20645 IX86_BUILTIN_CMPNEQSD,
20646 IX86_BUILTIN_CMPNLTSD,
20647 IX86_BUILTIN_CMPNLESD,
20648 IX86_BUILTIN_CMPORDSD,
20649 IX86_BUILTIN_CMPUNORDSD,
20651 IX86_BUILTIN_COMIEQSD,
20652 IX86_BUILTIN_COMILTSD,
20653 IX86_BUILTIN_COMILESD,
20654 IX86_BUILTIN_COMIGTSD,
20655 IX86_BUILTIN_COMIGESD,
20656 IX86_BUILTIN_COMINEQSD,
20657 IX86_BUILTIN_UCOMIEQSD,
20658 IX86_BUILTIN_UCOMILTSD,
20659 IX86_BUILTIN_UCOMILESD,
20660 IX86_BUILTIN_UCOMIGTSD,
20661 IX86_BUILTIN_UCOMIGESD,
20662 IX86_BUILTIN_UCOMINEQSD,
20664 IX86_BUILTIN_MAXPD,
20665 IX86_BUILTIN_MAXSD,
20666 IX86_BUILTIN_MINPD,
20667 IX86_BUILTIN_MINSD,
20669 IX86_BUILTIN_ANDPD,
20670 IX86_BUILTIN_ANDNPD,
20672 IX86_BUILTIN_XORPD,
20674 IX86_BUILTIN_SQRTPD,
20675 IX86_BUILTIN_SQRTSD,
20677 IX86_BUILTIN_UNPCKHPD,
20678 IX86_BUILTIN_UNPCKLPD,
20680 IX86_BUILTIN_SHUFPD,
20682 IX86_BUILTIN_LOADUPD,
20683 IX86_BUILTIN_STOREUPD,
20684 IX86_BUILTIN_MOVSD,
20686 IX86_BUILTIN_LOADHPD,
20687 IX86_BUILTIN_LOADLPD,
20689 IX86_BUILTIN_CVTDQ2PD,
20690 IX86_BUILTIN_CVTDQ2PS,
20692 IX86_BUILTIN_CVTPD2DQ,
20693 IX86_BUILTIN_CVTPD2PI,
20694 IX86_BUILTIN_CVTPD2PS,
20695 IX86_BUILTIN_CVTTPD2DQ,
20696 IX86_BUILTIN_CVTTPD2PI,
20698 IX86_BUILTIN_CVTPI2PD,
20699 IX86_BUILTIN_CVTSI2SD,
20700 IX86_BUILTIN_CVTSI642SD,
20702 IX86_BUILTIN_CVTSD2SI,
20703 IX86_BUILTIN_CVTSD2SI64,
20704 IX86_BUILTIN_CVTSD2SS,
20705 IX86_BUILTIN_CVTSS2SD,
20706 IX86_BUILTIN_CVTTSD2SI,
20707 IX86_BUILTIN_CVTTSD2SI64,
20709 IX86_BUILTIN_CVTPS2DQ,
20710 IX86_BUILTIN_CVTPS2PD,
20711 IX86_BUILTIN_CVTTPS2DQ,
20713 IX86_BUILTIN_MOVNTI,
20714 IX86_BUILTIN_MOVNTPD,
20715 IX86_BUILTIN_MOVNTDQ,
20717 IX86_BUILTIN_MOVQ128,
20720 IX86_BUILTIN_MASKMOVDQU,
20721 IX86_BUILTIN_MOVMSKPD,
20722 IX86_BUILTIN_PMOVMSKB128,
20724 IX86_BUILTIN_PACKSSWB128,
20725 IX86_BUILTIN_PACKSSDW128,
20726 IX86_BUILTIN_PACKUSWB128,
20728 IX86_BUILTIN_PADDB128,
20729 IX86_BUILTIN_PADDW128,
20730 IX86_BUILTIN_PADDD128,
20731 IX86_BUILTIN_PADDQ128,
20732 IX86_BUILTIN_PADDSB128,
20733 IX86_BUILTIN_PADDSW128,
20734 IX86_BUILTIN_PADDUSB128,
20735 IX86_BUILTIN_PADDUSW128,
20736 IX86_BUILTIN_PSUBB128,
20737 IX86_BUILTIN_PSUBW128,
20738 IX86_BUILTIN_PSUBD128,
20739 IX86_BUILTIN_PSUBQ128,
20740 IX86_BUILTIN_PSUBSB128,
20741 IX86_BUILTIN_PSUBSW128,
20742 IX86_BUILTIN_PSUBUSB128,
20743 IX86_BUILTIN_PSUBUSW128,
20745 IX86_BUILTIN_PAND128,
20746 IX86_BUILTIN_PANDN128,
20747 IX86_BUILTIN_POR128,
20748 IX86_BUILTIN_PXOR128,
20750 IX86_BUILTIN_PAVGB128,
20751 IX86_BUILTIN_PAVGW128,
20753 IX86_BUILTIN_PCMPEQB128,
20754 IX86_BUILTIN_PCMPEQW128,
20755 IX86_BUILTIN_PCMPEQD128,
20756 IX86_BUILTIN_PCMPGTB128,
20757 IX86_BUILTIN_PCMPGTW128,
20758 IX86_BUILTIN_PCMPGTD128,
20760 IX86_BUILTIN_PMADDWD128,
20762 IX86_BUILTIN_PMAXSW128,
20763 IX86_BUILTIN_PMAXUB128,
20764 IX86_BUILTIN_PMINSW128,
20765 IX86_BUILTIN_PMINUB128,
20767 IX86_BUILTIN_PMULUDQ,
20768 IX86_BUILTIN_PMULUDQ128,
20769 IX86_BUILTIN_PMULHUW128,
20770 IX86_BUILTIN_PMULHW128,
20771 IX86_BUILTIN_PMULLW128,
20773 IX86_BUILTIN_PSADBW128,
20774 IX86_BUILTIN_PSHUFHW,
20775 IX86_BUILTIN_PSHUFLW,
20776 IX86_BUILTIN_PSHUFD,
20778 IX86_BUILTIN_PSLLDQI128,
20779 IX86_BUILTIN_PSLLWI128,
20780 IX86_BUILTIN_PSLLDI128,
20781 IX86_BUILTIN_PSLLQI128,
20782 IX86_BUILTIN_PSRAWI128,
20783 IX86_BUILTIN_PSRADI128,
20784 IX86_BUILTIN_PSRLDQI128,
20785 IX86_BUILTIN_PSRLWI128,
20786 IX86_BUILTIN_PSRLDI128,
20787 IX86_BUILTIN_PSRLQI128,
20789 IX86_BUILTIN_PSLLDQ128,
20790 IX86_BUILTIN_PSLLW128,
20791 IX86_BUILTIN_PSLLD128,
20792 IX86_BUILTIN_PSLLQ128,
20793 IX86_BUILTIN_PSRAW128,
20794 IX86_BUILTIN_PSRAD128,
20795 IX86_BUILTIN_PSRLW128,
20796 IX86_BUILTIN_PSRLD128,
20797 IX86_BUILTIN_PSRLQ128,
20799 IX86_BUILTIN_PUNPCKHBW128,
20800 IX86_BUILTIN_PUNPCKHWD128,
20801 IX86_BUILTIN_PUNPCKHDQ128,
20802 IX86_BUILTIN_PUNPCKHQDQ128,
20803 IX86_BUILTIN_PUNPCKLBW128,
20804 IX86_BUILTIN_PUNPCKLWD128,
20805 IX86_BUILTIN_PUNPCKLDQ128,
20806 IX86_BUILTIN_PUNPCKLQDQ128,
20808 IX86_BUILTIN_CLFLUSH,
20809 IX86_BUILTIN_MFENCE,
20810 IX86_BUILTIN_LFENCE,
20812 IX86_BUILTIN_BSRSI,
20813 IX86_BUILTIN_BSRDI,
20814 IX86_BUILTIN_RDPMC,
20815 IX86_BUILTIN_RDTSC,
20816 IX86_BUILTIN_RDTSCP,
20817 IX86_BUILTIN_ROLQI,
20818 IX86_BUILTIN_ROLHI,
20819 IX86_BUILTIN_RORQI,
20820 IX86_BUILTIN_RORHI,
20823 IX86_BUILTIN_ADDSUBPS,
20824 IX86_BUILTIN_HADDPS,
20825 IX86_BUILTIN_HSUBPS,
20826 IX86_BUILTIN_MOVSHDUP,
20827 IX86_BUILTIN_MOVSLDUP,
20828 IX86_BUILTIN_ADDSUBPD,
20829 IX86_BUILTIN_HADDPD,
20830 IX86_BUILTIN_HSUBPD,
20831 IX86_BUILTIN_LDDQU,
20833 IX86_BUILTIN_MONITOR,
20834 IX86_BUILTIN_MWAIT,
20837 IX86_BUILTIN_PHADDW,
20838 IX86_BUILTIN_PHADDD,
20839 IX86_BUILTIN_PHADDSW,
20840 IX86_BUILTIN_PHSUBW,
20841 IX86_BUILTIN_PHSUBD,
20842 IX86_BUILTIN_PHSUBSW,
20843 IX86_BUILTIN_PMADDUBSW,
20844 IX86_BUILTIN_PMULHRSW,
20845 IX86_BUILTIN_PSHUFB,
20846 IX86_BUILTIN_PSIGNB,
20847 IX86_BUILTIN_PSIGNW,
20848 IX86_BUILTIN_PSIGND,
20849 IX86_BUILTIN_PALIGNR,
20850 IX86_BUILTIN_PABSB,
20851 IX86_BUILTIN_PABSW,
20852 IX86_BUILTIN_PABSD,
20854 IX86_BUILTIN_PHADDW128,
20855 IX86_BUILTIN_PHADDD128,
20856 IX86_BUILTIN_PHADDSW128,
20857 IX86_BUILTIN_PHSUBW128,
20858 IX86_BUILTIN_PHSUBD128,
20859 IX86_BUILTIN_PHSUBSW128,
20860 IX86_BUILTIN_PMADDUBSW128,
20861 IX86_BUILTIN_PMULHRSW128,
20862 IX86_BUILTIN_PSHUFB128,
20863 IX86_BUILTIN_PSIGNB128,
20864 IX86_BUILTIN_PSIGNW128,
20865 IX86_BUILTIN_PSIGND128,
20866 IX86_BUILTIN_PALIGNR128,
20867 IX86_BUILTIN_PABSB128,
20868 IX86_BUILTIN_PABSW128,
20869 IX86_BUILTIN_PABSD128,
20871 /* AMDFAM10 - SSE4A New Instructions. */
20872 IX86_BUILTIN_MOVNTSD,
20873 IX86_BUILTIN_MOVNTSS,
20874 IX86_BUILTIN_EXTRQI,
20875 IX86_BUILTIN_EXTRQ,
20876 IX86_BUILTIN_INSERTQI,
20877 IX86_BUILTIN_INSERTQ,
20880 IX86_BUILTIN_BLENDPD,
20881 IX86_BUILTIN_BLENDPS,
20882 IX86_BUILTIN_BLENDVPD,
20883 IX86_BUILTIN_BLENDVPS,
20884 IX86_BUILTIN_PBLENDVB128,
20885 IX86_BUILTIN_PBLENDW128,
20890 IX86_BUILTIN_INSERTPS128,
20892 IX86_BUILTIN_MOVNTDQA,
20893 IX86_BUILTIN_MPSADBW128,
20894 IX86_BUILTIN_PACKUSDW128,
20895 IX86_BUILTIN_PCMPEQQ,
20896 IX86_BUILTIN_PHMINPOSUW128,
20898 IX86_BUILTIN_PMAXSB128,
20899 IX86_BUILTIN_PMAXSD128,
20900 IX86_BUILTIN_PMAXUD128,
20901 IX86_BUILTIN_PMAXUW128,
20903 IX86_BUILTIN_PMINSB128,
20904 IX86_BUILTIN_PMINSD128,
20905 IX86_BUILTIN_PMINUD128,
20906 IX86_BUILTIN_PMINUW128,
20908 IX86_BUILTIN_PMOVSXBW128,
20909 IX86_BUILTIN_PMOVSXBD128,
20910 IX86_BUILTIN_PMOVSXBQ128,
20911 IX86_BUILTIN_PMOVSXWD128,
20912 IX86_BUILTIN_PMOVSXWQ128,
20913 IX86_BUILTIN_PMOVSXDQ128,
20915 IX86_BUILTIN_PMOVZXBW128,
20916 IX86_BUILTIN_PMOVZXBD128,
20917 IX86_BUILTIN_PMOVZXBQ128,
20918 IX86_BUILTIN_PMOVZXWD128,
20919 IX86_BUILTIN_PMOVZXWQ128,
20920 IX86_BUILTIN_PMOVZXDQ128,
20922 IX86_BUILTIN_PMULDQ128,
20923 IX86_BUILTIN_PMULLD128,
20925 IX86_BUILTIN_ROUNDPD,
20926 IX86_BUILTIN_ROUNDPS,
20927 IX86_BUILTIN_ROUNDSD,
20928 IX86_BUILTIN_ROUNDSS,
20930 IX86_BUILTIN_PTESTZ,
20931 IX86_BUILTIN_PTESTC,
20932 IX86_BUILTIN_PTESTNZC,
20934 IX86_BUILTIN_VEC_INIT_V2SI,
20935 IX86_BUILTIN_VEC_INIT_V4HI,
20936 IX86_BUILTIN_VEC_INIT_V8QI,
20937 IX86_BUILTIN_VEC_EXT_V2DF,
20938 IX86_BUILTIN_VEC_EXT_V2DI,
20939 IX86_BUILTIN_VEC_EXT_V4SF,
20940 IX86_BUILTIN_VEC_EXT_V4SI,
20941 IX86_BUILTIN_VEC_EXT_V8HI,
20942 IX86_BUILTIN_VEC_EXT_V2SI,
20943 IX86_BUILTIN_VEC_EXT_V4HI,
20944 IX86_BUILTIN_VEC_EXT_V16QI,
20945 IX86_BUILTIN_VEC_SET_V2DI,
20946 IX86_BUILTIN_VEC_SET_V4SF,
20947 IX86_BUILTIN_VEC_SET_V4SI,
20948 IX86_BUILTIN_VEC_SET_V8HI,
20949 IX86_BUILTIN_VEC_SET_V4HI,
20950 IX86_BUILTIN_VEC_SET_V16QI,
20952 IX86_BUILTIN_VEC_PACK_SFIX,
20955 IX86_BUILTIN_CRC32QI,
20956 IX86_BUILTIN_CRC32HI,
20957 IX86_BUILTIN_CRC32SI,
20958 IX86_BUILTIN_CRC32DI,
20960 IX86_BUILTIN_PCMPESTRI128,
20961 IX86_BUILTIN_PCMPESTRM128,
20962 IX86_BUILTIN_PCMPESTRA128,
20963 IX86_BUILTIN_PCMPESTRC128,
20964 IX86_BUILTIN_PCMPESTRO128,
20965 IX86_BUILTIN_PCMPESTRS128,
20966 IX86_BUILTIN_PCMPESTRZ128,
20967 IX86_BUILTIN_PCMPISTRI128,
20968 IX86_BUILTIN_PCMPISTRM128,
20969 IX86_BUILTIN_PCMPISTRA128,
20970 IX86_BUILTIN_PCMPISTRC128,
20971 IX86_BUILTIN_PCMPISTRO128,
20972 IX86_BUILTIN_PCMPISTRS128,
20973 IX86_BUILTIN_PCMPISTRZ128,
20975 IX86_BUILTIN_PCMPGTQ,
20977 /* AES instructions */
20978 IX86_BUILTIN_AESENC128,
20979 IX86_BUILTIN_AESENCLAST128,
20980 IX86_BUILTIN_AESDEC128,
20981 IX86_BUILTIN_AESDECLAST128,
20982 IX86_BUILTIN_AESIMC128,
20983 IX86_BUILTIN_AESKEYGENASSIST128,
20985 /* PCLMUL instruction */
20986 IX86_BUILTIN_PCLMULQDQ128,
20989 IX86_BUILTIN_ADDPD256,
20990 IX86_BUILTIN_ADDPS256,
20991 IX86_BUILTIN_ADDSUBPD256,
20992 IX86_BUILTIN_ADDSUBPS256,
20993 IX86_BUILTIN_ANDPD256,
20994 IX86_BUILTIN_ANDPS256,
20995 IX86_BUILTIN_ANDNPD256,
20996 IX86_BUILTIN_ANDNPS256,
20997 IX86_BUILTIN_BLENDPD256,
20998 IX86_BUILTIN_BLENDPS256,
20999 IX86_BUILTIN_BLENDVPD256,
21000 IX86_BUILTIN_BLENDVPS256,
21001 IX86_BUILTIN_DIVPD256,
21002 IX86_BUILTIN_DIVPS256,
21003 IX86_BUILTIN_DPPS256,
21004 IX86_BUILTIN_HADDPD256,
21005 IX86_BUILTIN_HADDPS256,
21006 IX86_BUILTIN_HSUBPD256,
21007 IX86_BUILTIN_HSUBPS256,
21008 IX86_BUILTIN_MAXPD256,
21009 IX86_BUILTIN_MAXPS256,
21010 IX86_BUILTIN_MINPD256,
21011 IX86_BUILTIN_MINPS256,
21012 IX86_BUILTIN_MULPD256,
21013 IX86_BUILTIN_MULPS256,
21014 IX86_BUILTIN_ORPD256,
21015 IX86_BUILTIN_ORPS256,
21016 IX86_BUILTIN_SHUFPD256,
21017 IX86_BUILTIN_SHUFPS256,
21018 IX86_BUILTIN_SUBPD256,
21019 IX86_BUILTIN_SUBPS256,
21020 IX86_BUILTIN_XORPD256,
21021 IX86_BUILTIN_XORPS256,
21022 IX86_BUILTIN_CMPSD,
21023 IX86_BUILTIN_CMPSS,
21024 IX86_BUILTIN_CMPPD,
21025 IX86_BUILTIN_CMPPS,
21026 IX86_BUILTIN_CMPPD256,
21027 IX86_BUILTIN_CMPPS256,
21028 IX86_BUILTIN_CVTDQ2PD256,
21029 IX86_BUILTIN_CVTDQ2PS256,
21030 IX86_BUILTIN_CVTPD2PS256,
21031 IX86_BUILTIN_CVTPS2DQ256,
21032 IX86_BUILTIN_CVTPS2PD256,
21033 IX86_BUILTIN_CVTTPD2DQ256,
21034 IX86_BUILTIN_CVTPD2DQ256,
21035 IX86_BUILTIN_CVTTPS2DQ256,
21036 IX86_BUILTIN_EXTRACTF128PD256,
21037 IX86_BUILTIN_EXTRACTF128PS256,
21038 IX86_BUILTIN_EXTRACTF128SI256,
21039 IX86_BUILTIN_VZEROALL,
21040 IX86_BUILTIN_VZEROUPPER,
21041 IX86_BUILTIN_VPERMILVARPD,
21042 IX86_BUILTIN_VPERMILVARPS,
21043 IX86_BUILTIN_VPERMILVARPD256,
21044 IX86_BUILTIN_VPERMILVARPS256,
21045 IX86_BUILTIN_VPERMILPD,
21046 IX86_BUILTIN_VPERMILPS,
21047 IX86_BUILTIN_VPERMILPD256,
21048 IX86_BUILTIN_VPERMILPS256,
21049 IX86_BUILTIN_VPERMIL2PD,
21050 IX86_BUILTIN_VPERMIL2PS,
21051 IX86_BUILTIN_VPERMIL2PD256,
21052 IX86_BUILTIN_VPERMIL2PS256,
21053 IX86_BUILTIN_VPERM2F128PD256,
21054 IX86_BUILTIN_VPERM2F128PS256,
21055 IX86_BUILTIN_VPERM2F128SI256,
21056 IX86_BUILTIN_VBROADCASTSS,
21057 IX86_BUILTIN_VBROADCASTSD256,
21058 IX86_BUILTIN_VBROADCASTSS256,
21059 IX86_BUILTIN_VBROADCASTPD256,
21060 IX86_BUILTIN_VBROADCASTPS256,
21061 IX86_BUILTIN_VINSERTF128PD256,
21062 IX86_BUILTIN_VINSERTF128PS256,
21063 IX86_BUILTIN_VINSERTF128SI256,
21064 IX86_BUILTIN_LOADUPD256,
21065 IX86_BUILTIN_LOADUPS256,
21066 IX86_BUILTIN_STOREUPD256,
21067 IX86_BUILTIN_STOREUPS256,
21068 IX86_BUILTIN_LDDQU256,
21069 IX86_BUILTIN_MOVNTDQ256,
21070 IX86_BUILTIN_MOVNTPD256,
21071 IX86_BUILTIN_MOVNTPS256,
21072 IX86_BUILTIN_LOADDQU256,
21073 IX86_BUILTIN_STOREDQU256,
21074 IX86_BUILTIN_MASKLOADPD,
21075 IX86_BUILTIN_MASKLOADPS,
21076 IX86_BUILTIN_MASKSTOREPD,
21077 IX86_BUILTIN_MASKSTOREPS,
21078 IX86_BUILTIN_MASKLOADPD256,
21079 IX86_BUILTIN_MASKLOADPS256,
21080 IX86_BUILTIN_MASKSTOREPD256,
21081 IX86_BUILTIN_MASKSTOREPS256,
21082 IX86_BUILTIN_MOVSHDUP256,
21083 IX86_BUILTIN_MOVSLDUP256,
21084 IX86_BUILTIN_MOVDDUP256,
21086 IX86_BUILTIN_SQRTPD256,
21087 IX86_BUILTIN_SQRTPS256,
21088 IX86_BUILTIN_SQRTPS_NR256,
21089 IX86_BUILTIN_RSQRTPS256,
21090 IX86_BUILTIN_RSQRTPS_NR256,
21092 IX86_BUILTIN_RCPPS256,
21094 IX86_BUILTIN_ROUNDPD256,
21095 IX86_BUILTIN_ROUNDPS256,
21097 IX86_BUILTIN_UNPCKHPD256,
21098 IX86_BUILTIN_UNPCKLPD256,
21099 IX86_BUILTIN_UNPCKHPS256,
21100 IX86_BUILTIN_UNPCKLPS256,
21102 IX86_BUILTIN_SI256_SI,
21103 IX86_BUILTIN_PS256_PS,
21104 IX86_BUILTIN_PD256_PD,
21105 IX86_BUILTIN_SI_SI256,
21106 IX86_BUILTIN_PS_PS256,
21107 IX86_BUILTIN_PD_PD256,
21109 IX86_BUILTIN_VTESTZPD,
21110 IX86_BUILTIN_VTESTCPD,
21111 IX86_BUILTIN_VTESTNZCPD,
21112 IX86_BUILTIN_VTESTZPS,
21113 IX86_BUILTIN_VTESTCPS,
21114 IX86_BUILTIN_VTESTNZCPS,
21115 IX86_BUILTIN_VTESTZPD256,
21116 IX86_BUILTIN_VTESTCPD256,
21117 IX86_BUILTIN_VTESTNZCPD256,
21118 IX86_BUILTIN_VTESTZPS256,
21119 IX86_BUILTIN_VTESTCPS256,
21120 IX86_BUILTIN_VTESTNZCPS256,
21121 IX86_BUILTIN_PTESTZ256,
21122 IX86_BUILTIN_PTESTC256,
21123 IX86_BUILTIN_PTESTNZC256,
21125 IX86_BUILTIN_MOVMSKPD256,
21126 IX86_BUILTIN_MOVMSKPS256,
21128 /* TFmode support builtins. */
21130 IX86_BUILTIN_HUGE_VALQ,
21131 IX86_BUILTIN_FABSQ,
21132 IX86_BUILTIN_COPYSIGNQ,
21134 /* Vectorizer support builtins. */
21135 IX86_BUILTIN_CPYSGNPS,
21136 IX86_BUILTIN_CPYSGNPD,
21138 IX86_BUILTIN_CVTUDQ2PS,
21140 IX86_BUILTIN_VEC_PERM_V2DF,
21141 IX86_BUILTIN_VEC_PERM_V4SF,
21142 IX86_BUILTIN_VEC_PERM_V2DI,
21143 IX86_BUILTIN_VEC_PERM_V4SI,
21144 IX86_BUILTIN_VEC_PERM_V8HI,
21145 IX86_BUILTIN_VEC_PERM_V16QI,
21146 IX86_BUILTIN_VEC_PERM_V2DI_U,
21147 IX86_BUILTIN_VEC_PERM_V4SI_U,
21148 IX86_BUILTIN_VEC_PERM_V8HI_U,
21149 IX86_BUILTIN_VEC_PERM_V16QI_U,
21150 IX86_BUILTIN_VEC_PERM_V4DF,
21151 IX86_BUILTIN_VEC_PERM_V8SF,
21153 /* FMA4 and XOP instructions. */
21154 IX86_BUILTIN_VFMADDSS,
21155 IX86_BUILTIN_VFMADDSD,
21156 IX86_BUILTIN_VFMADDPS,
21157 IX86_BUILTIN_VFMADDPD,
21158 IX86_BUILTIN_VFMSUBSS,
21159 IX86_BUILTIN_VFMSUBSD,
21160 IX86_BUILTIN_VFMSUBPS,
21161 IX86_BUILTIN_VFMSUBPD,
21162 IX86_BUILTIN_VFMADDSUBPS,
21163 IX86_BUILTIN_VFMADDSUBPD,
21164 IX86_BUILTIN_VFMSUBADDPS,
21165 IX86_BUILTIN_VFMSUBADDPD,
21166 IX86_BUILTIN_VFNMADDSS,
21167 IX86_BUILTIN_VFNMADDSD,
21168 IX86_BUILTIN_VFNMADDPS,
21169 IX86_BUILTIN_VFNMADDPD,
21170 IX86_BUILTIN_VFNMSUBSS,
21171 IX86_BUILTIN_VFNMSUBSD,
21172 IX86_BUILTIN_VFNMSUBPS,
21173 IX86_BUILTIN_VFNMSUBPD,
21174 IX86_BUILTIN_VFMADDPS256,
21175 IX86_BUILTIN_VFMADDPD256,
21176 IX86_BUILTIN_VFMSUBPS256,
21177 IX86_BUILTIN_VFMSUBPD256,
21178 IX86_BUILTIN_VFMADDSUBPS256,
21179 IX86_BUILTIN_VFMADDSUBPD256,
21180 IX86_BUILTIN_VFMSUBADDPS256,
21181 IX86_BUILTIN_VFMSUBADDPD256,
21182 IX86_BUILTIN_VFNMADDPS256,
21183 IX86_BUILTIN_VFNMADDPD256,
21184 IX86_BUILTIN_VFNMSUBPS256,
21185 IX86_BUILTIN_VFNMSUBPD256,
21187 IX86_BUILTIN_VPCMOV,
21188 IX86_BUILTIN_VPCMOV_V2DI,
21189 IX86_BUILTIN_VPCMOV_V4SI,
21190 IX86_BUILTIN_VPCMOV_V8HI,
21191 IX86_BUILTIN_VPCMOV_V16QI,
21192 IX86_BUILTIN_VPCMOV_V4SF,
21193 IX86_BUILTIN_VPCMOV_V2DF,
21194 IX86_BUILTIN_VPCMOV256,
21195 IX86_BUILTIN_VPCMOV_V4DI256,
21196 IX86_BUILTIN_VPCMOV_V8SI256,
21197 IX86_BUILTIN_VPCMOV_V16HI256,
21198 IX86_BUILTIN_VPCMOV_V32QI256,
21199 IX86_BUILTIN_VPCMOV_V8SF256,
21200 IX86_BUILTIN_VPCMOV_V4DF256,
21202 IX86_BUILTIN_VPPERM,
21204 IX86_BUILTIN_VPMACSSWW,
21205 IX86_BUILTIN_VPMACSWW,
21206 IX86_BUILTIN_VPMACSSWD,
21207 IX86_BUILTIN_VPMACSWD,
21208 IX86_BUILTIN_VPMACSSDD,
21209 IX86_BUILTIN_VPMACSDD,
21210 IX86_BUILTIN_VPMACSSDQL,
21211 IX86_BUILTIN_VPMACSSDQH,
21212 IX86_BUILTIN_VPMACSDQL,
21213 IX86_BUILTIN_VPMACSDQH,
21214 IX86_BUILTIN_VPMADCSSWD,
21215 IX86_BUILTIN_VPMADCSWD,
21217 IX86_BUILTIN_VPHADDBW,
21218 IX86_BUILTIN_VPHADDBD,
21219 IX86_BUILTIN_VPHADDBQ,
21220 IX86_BUILTIN_VPHADDWD,
21221 IX86_BUILTIN_VPHADDWQ,
21222 IX86_BUILTIN_VPHADDDQ,
21223 IX86_BUILTIN_VPHADDUBW,
21224 IX86_BUILTIN_VPHADDUBD,
21225 IX86_BUILTIN_VPHADDUBQ,
21226 IX86_BUILTIN_VPHADDUWD,
21227 IX86_BUILTIN_VPHADDUWQ,
21228 IX86_BUILTIN_VPHADDUDQ,
21229 IX86_BUILTIN_VPHSUBBW,
21230 IX86_BUILTIN_VPHSUBWD,
21231 IX86_BUILTIN_VPHSUBDQ,
21233 IX86_BUILTIN_VPROTB,
21234 IX86_BUILTIN_VPROTW,
21235 IX86_BUILTIN_VPROTD,
21236 IX86_BUILTIN_VPROTQ,
21237 IX86_BUILTIN_VPROTB_IMM,
21238 IX86_BUILTIN_VPROTW_IMM,
21239 IX86_BUILTIN_VPROTD_IMM,
21240 IX86_BUILTIN_VPROTQ_IMM,
21242 IX86_BUILTIN_VPSHLB,
21243 IX86_BUILTIN_VPSHLW,
21244 IX86_BUILTIN_VPSHLD,
21245 IX86_BUILTIN_VPSHLQ,
21246 IX86_BUILTIN_VPSHAB,
21247 IX86_BUILTIN_VPSHAW,
21248 IX86_BUILTIN_VPSHAD,
21249 IX86_BUILTIN_VPSHAQ,
21251 IX86_BUILTIN_VFRCZSS,
21252 IX86_BUILTIN_VFRCZSD,
21253 IX86_BUILTIN_VFRCZPS,
21254 IX86_BUILTIN_VFRCZPD,
21255 IX86_BUILTIN_VFRCZPS256,
21256 IX86_BUILTIN_VFRCZPD256,
21258 IX86_BUILTIN_VPCOMEQUB,
21259 IX86_BUILTIN_VPCOMNEUB,
21260 IX86_BUILTIN_VPCOMLTUB,
21261 IX86_BUILTIN_VPCOMLEUB,
21262 IX86_BUILTIN_VPCOMGTUB,
21263 IX86_BUILTIN_VPCOMGEUB,
21264 IX86_BUILTIN_VPCOMFALSEUB,
21265 IX86_BUILTIN_VPCOMTRUEUB,
21267 IX86_BUILTIN_VPCOMEQUW,
21268 IX86_BUILTIN_VPCOMNEUW,
21269 IX86_BUILTIN_VPCOMLTUW,
21270 IX86_BUILTIN_VPCOMLEUW,
21271 IX86_BUILTIN_VPCOMGTUW,
21272 IX86_BUILTIN_VPCOMGEUW,
21273 IX86_BUILTIN_VPCOMFALSEUW,
21274 IX86_BUILTIN_VPCOMTRUEUW,
21276 IX86_BUILTIN_VPCOMEQUD,
21277 IX86_BUILTIN_VPCOMNEUD,
21278 IX86_BUILTIN_VPCOMLTUD,
21279 IX86_BUILTIN_VPCOMLEUD,
21280 IX86_BUILTIN_VPCOMGTUD,
21281 IX86_BUILTIN_VPCOMGEUD,
21282 IX86_BUILTIN_VPCOMFALSEUD,
21283 IX86_BUILTIN_VPCOMTRUEUD,
21285 IX86_BUILTIN_VPCOMEQUQ,
21286 IX86_BUILTIN_VPCOMNEUQ,
21287 IX86_BUILTIN_VPCOMLTUQ,
21288 IX86_BUILTIN_VPCOMLEUQ,
21289 IX86_BUILTIN_VPCOMGTUQ,
21290 IX86_BUILTIN_VPCOMGEUQ,
21291 IX86_BUILTIN_VPCOMFALSEUQ,
21292 IX86_BUILTIN_VPCOMTRUEUQ,
21294 IX86_BUILTIN_VPCOMEQB,
21295 IX86_BUILTIN_VPCOMNEB,
21296 IX86_BUILTIN_VPCOMLTB,
21297 IX86_BUILTIN_VPCOMLEB,
21298 IX86_BUILTIN_VPCOMGTB,
21299 IX86_BUILTIN_VPCOMGEB,
21300 IX86_BUILTIN_VPCOMFALSEB,
21301 IX86_BUILTIN_VPCOMTRUEB,
21303 IX86_BUILTIN_VPCOMEQW,
21304 IX86_BUILTIN_VPCOMNEW,
21305 IX86_BUILTIN_VPCOMLTW,
21306 IX86_BUILTIN_VPCOMLEW,
21307 IX86_BUILTIN_VPCOMGTW,
21308 IX86_BUILTIN_VPCOMGEW,
21309 IX86_BUILTIN_VPCOMFALSEW,
21310 IX86_BUILTIN_VPCOMTRUEW,
21312 IX86_BUILTIN_VPCOMEQD,
21313 IX86_BUILTIN_VPCOMNED,
21314 IX86_BUILTIN_VPCOMLTD,
21315 IX86_BUILTIN_VPCOMLED,
21316 IX86_BUILTIN_VPCOMGTD,
21317 IX86_BUILTIN_VPCOMGED,
21318 IX86_BUILTIN_VPCOMFALSED,
21319 IX86_BUILTIN_VPCOMTRUED,
21321 IX86_BUILTIN_VPCOMEQQ,
21322 IX86_BUILTIN_VPCOMNEQ,
21323 IX86_BUILTIN_VPCOMLTQ,
21324 IX86_BUILTIN_VPCOMLEQ,
21325 IX86_BUILTIN_VPCOMGTQ,
21326 IX86_BUILTIN_VPCOMGEQ,
21327 IX86_BUILTIN_VPCOMFALSEQ,
21328 IX86_BUILTIN_VPCOMTRUEQ,
21330 /* LWP instructions. */
21331 IX86_BUILTIN_LLWPCB,
21332 IX86_BUILTIN_SLWPCB,
21333 IX86_BUILTIN_LWPVAL32,
21334 IX86_BUILTIN_LWPVAL64,
21335 IX86_BUILTIN_LWPINS32,
21336 IX86_BUILTIN_LWPINS64,
21343 /* Table for the ix86 builtin decls. */
21344 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
21346 /* Table of all of the builtin functions that are possible with different ISA's
21347 but are waiting to be built until a function is declared to use that
21349 struct builtin_isa {
21350 const char *name; /* function name */
21351 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
21352 int isa; /* isa_flags this builtin is defined for */
21353 bool const_p; /* true if the declaration is constant */
21354 bool set_and_not_built_p;
21357 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
21360 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
21361 of which isa_flags to use in the ix86_builtins_isa array. Stores the
21362 function decl in the ix86_builtins array. Returns the function decl or
21363 NULL_TREE, if the builtin was not added.
21365 If the front end has a special hook for builtin functions, delay adding
21366 builtin functions that aren't in the current ISA until the ISA is changed
21367 with function specific optimization. Doing so, can save about 300K for the
21368 default compiler. When the builtin is expanded, check at that time whether
21371 If the front end doesn't have a special hook, record all builtins, even if
21372 it isn't an instruction set in the current ISA in case the user uses
21373 function specific options for a different ISA, so that we don't get scope
21374 errors if a builtin is added in the middle of a function scope. */
21377 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
21378 enum ix86_builtins code)
21380 tree decl = NULL_TREE;
21382 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
21384 ix86_builtins_isa[(int) code].isa = mask;
21387 || (mask & ix86_isa_flags) != 0
21388 || (lang_hooks.builtin_function
21389 == lang_hooks.builtin_function_ext_scope))
21392 tree type = ix86_get_builtin_func_type (tcode);
21393 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
21395 ix86_builtins[(int) code] = decl;
21396 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
21400 ix86_builtins[(int) code] = NULL_TREE;
21401 ix86_builtins_isa[(int) code].tcode = tcode;
21402 ix86_builtins_isa[(int) code].name = name;
21403 ix86_builtins_isa[(int) code].const_p = false;
21404 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
21411 /* Like def_builtin, but also marks the function decl "const". */
21414 def_builtin_const (int mask, const char *name,
21415 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
21417 tree decl = def_builtin (mask, name, tcode, code);
21419 TREE_READONLY (decl) = 1;
21421 ix86_builtins_isa[(int) code].const_p = true;
21426 /* Add any new builtin functions for a given ISA that may not have been
21427 declared. This saves a bit of space compared to adding all of the
21428 declarations to the tree, even if we didn't use them. */
21431 ix86_add_new_builtins (int isa)
21435 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
21437 if ((ix86_builtins_isa[i].isa & isa) != 0
21438 && ix86_builtins_isa[i].set_and_not_built_p)
21442 /* Don't define the builtin again. */
21443 ix86_builtins_isa[i].set_and_not_built_p = false;
21445 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
21446 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
21447 type, i, BUILT_IN_MD, NULL,
21450 ix86_builtins[i] = decl;
21451 if (ix86_builtins_isa[i].const_p)
21452 TREE_READONLY (decl) = 1;
21457 /* Bits for builtin_description.flag. */
21459 /* Set when we don't support the comparison natively, and should
21460 swap_comparison in order to support it. */
21461 #define BUILTIN_DESC_SWAP_OPERANDS 1
21463 struct builtin_description
21465 const unsigned int mask;
21466 const enum insn_code icode;
21467 const char *const name;
21468 const enum ix86_builtins code;
21469 const enum rtx_code comparison;
21473 static const struct builtin_description bdesc_comi[] =
21475 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
21476 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
21477 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
21478 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
21479 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
21480 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
21481 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
21482 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
21483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
21484 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
21485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
21486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
21487 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
21488 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
21489 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
21490 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
21491 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
21492 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
21493 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
21494 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
21495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
21496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
21497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
21498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
21501 static const struct builtin_description bdesc_pcmpestr[] =
21504 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
21505 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
21506 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
21507 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
21508 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
21509 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
21510 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
21513 static const struct builtin_description bdesc_pcmpistr[] =
21516 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
21517 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
21518 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
21519 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
21520 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
21521 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
21522 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
21525 /* Special builtins with variable number of arguments. */
21526 static const struct builtin_description bdesc_special_args[] =
21528 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
21529 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
21532 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21535 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21538 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21539 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21540 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21542 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21543 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21544 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21545 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21547 /* SSE or 3DNow!A */
21548 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21549 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
21552 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21553 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21554 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21555 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21556 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21557 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21558 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21559 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21566 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21569 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21572 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21573 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21576 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21577 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21579 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21580 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21581 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21582 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21583 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21585 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21586 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21587 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21588 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21589 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21590 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21591 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21593 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21594 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21597 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21600 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21601 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21602 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21603 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21604 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21606 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
21607 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
21608 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
21609 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
21610 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
21611 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
21615 /* Builtins with variable number of arguments. */
21616 static const struct builtin_description bdesc_args[] =
21618 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
21619 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
21620 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
21621 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21622 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21623 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
21624 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
21627 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21628 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21629 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21630 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21631 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21632 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21634 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21635 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21636 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21637 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21638 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21639 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21640 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21641 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21643 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21644 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21646 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21647 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21648 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21649 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21651 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21652 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21653 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21654 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21655 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21656 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21658 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21659 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21660 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21661 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21662 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21663 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21665 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21666 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21667 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21669 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21671 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21672 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21673 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21674 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21675 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21676 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21678 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21679 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21680 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21681 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21682 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21683 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21685 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21686 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21687 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21688 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21691 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21692 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21693 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21694 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21696 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21697 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21698 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21699 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21700 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21701 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21702 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21703 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21704 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21705 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21706 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21707 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21708 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21709 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21710 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21713 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21714 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21715 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21716 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21717 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21718 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21721 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21722 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21723 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21724 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21725 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21726 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21727 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21728 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21729 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21730 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21731 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21732 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21734 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21736 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21737 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21738 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21739 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21740 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21741 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21742 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21743 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21745 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21746 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21747 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21748 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21749 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21750 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21751 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21752 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21753 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21754 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21755 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21756 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21757 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21758 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21759 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21760 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21761 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21762 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21763 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21764 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21765 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21766 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21768 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21769 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21771 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21773 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21775 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21776 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21778 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21780 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21781 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21782 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21783 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21784 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21786 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21787 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21788 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21790 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21792 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21793 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21794 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21796 /* SSE MMX or 3Dnow!A */
21797 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21798 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21799 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21801 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21802 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21803 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21804 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21806 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21807 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21809 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21812 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21814 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
21815 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
21816 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
21817 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
21818 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
21819 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21820 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
21821 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
21822 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
21823 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
21824 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
21825 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
21827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21829 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21830 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21831 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21832 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21834 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21835 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21836 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21837 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21838 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21840 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21842 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21843 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21844 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21845 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21847 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21848 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21849 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21851 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21852 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21853 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21854 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21855 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21856 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21857 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21861 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21863 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21872 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21875 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21876 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21878 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21882 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21886 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21888 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21889 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21891 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21893 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21894 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21895 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21897 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21899 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21900 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21901 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21902 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21903 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21904 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21905 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21906 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21909 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21910 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21911 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21912 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21913 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21914 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21917 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21918 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21920 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21921 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21922 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21923 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21926 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21928 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21931 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21935 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21936 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21937 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21938 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21940 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21941 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21942 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21943 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21944 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21945 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21946 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21947 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21953 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21956 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21962 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21966 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
21967 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21968 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21969 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21970 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21971 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21972 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
21975 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21976 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21977 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21978 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21979 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21980 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21982 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21983 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21984 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21985 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21987 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21993 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21994 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21996 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21999 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22000 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22003 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
22004 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22006 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22007 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22008 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22009 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22010 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22011 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22014 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
22015 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
22016 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22017 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
22018 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
22019 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22021 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22022 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22023 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22024 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22025 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22026 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22027 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22028 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22029 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22030 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22031 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22032 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22033 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
22034 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
22035 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22036 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22037 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22038 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22039 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22040 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22041 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22042 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22043 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22044 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22047 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
22048 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
22051 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22052 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22053 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
22054 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
22055 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22056 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22057 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22058 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
22059 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22060 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
22062 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22063 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22064 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22065 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22066 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22067 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22068 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22069 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22070 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22071 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22072 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22073 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22074 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22076 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22077 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22078 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22079 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22080 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22081 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22082 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22083 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22084 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22085 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22086 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22087 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22090 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22091 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22092 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22093 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22095 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22096 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22097 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22100 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22101 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22102 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22103 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22104 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22107 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22108 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22109 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22110 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22113 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22114 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22116 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22117 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22118 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22119 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22122 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
22125 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22126 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22129 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22130 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22131 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22132 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22133 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22134 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22135 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22136 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22137 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22138 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22139 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22140 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22141 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22142 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22143 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22144 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22145 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22146 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22147 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22148 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22149 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22150 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22152 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
22153 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
22154 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
22155 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
22157 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22158 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22159 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
22160 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
22161 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22162 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22163 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22164 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22165 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22166 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22167 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22168 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22169 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22170 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
22171 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
22172 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
22173 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
22174 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
22175 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
22176 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22177 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
22178 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22179 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
22180 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
22181 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22182 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22183 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
22184 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22185 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22186 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22187 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22188 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
22189 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
22190 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
22192 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22193 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22194 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22196 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
22197 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22198 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22199 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22200 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22202 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
22204 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
22205 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
22207 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22208 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22209 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22210 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
22213 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
22214 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
22215 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
22216 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
22217 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
22219 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22220 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22221 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
22222 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22224 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
22225 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
22228 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
22231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22232 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
22235 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
22236 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
22238 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
22241 /* FMA4 and XOP. */
22242 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
22243 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
22244 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
22245 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
22246 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
22247 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
22248 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
22249 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
22250 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
22251 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
22252 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
22253 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
22254 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
22255 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
22256 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
22257 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
22258 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
22259 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
22260 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
22261 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
22262 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
22263 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
22264 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
22265 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
22266 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
22267 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
22268 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
22269 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
22270 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
22271 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
22272 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
22273 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
22274 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
22275 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
22276 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
22277 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
22278 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
22279 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
22280 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
22281 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
22282 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
22283 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
22284 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
22285 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
22286 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
22287 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
22288 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
22289 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
22290 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
22291 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
22292 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
22293 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
22295 static const struct builtin_description bdesc_multi_arg[] =
22297 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22298 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22299 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22300 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22301 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22302 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22303 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22304 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22306 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22307 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22308 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22309 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22310 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
22311 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
22312 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22313 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22315 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22316 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22317 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
22318 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
22320 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
22321 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
22322 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
22323 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
22325 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
22326 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
22327 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
22328 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
22330 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
22331 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
22332 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
22333 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
22335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
22336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
22337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
22338 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
22339 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
22340 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
22341 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
22343 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
22344 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
22345 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
22346 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
22347 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
22348 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
22349 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
22351 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
22353 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22354 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
22355 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22356 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22357 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22358 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
22359 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22360 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22361 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22362 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
22363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
22366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
22368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
22369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
22370 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
22371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
22372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
22373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
22374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
22376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
22377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
22378 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
22379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
22380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
22381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
22383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
22384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
22385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
22386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
22387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
22388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
22390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
22398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
22399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
22401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
22403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
22404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
22406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
22407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
22409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
22410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
22411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
22412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
22414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
22415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
22417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
22418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
22419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
22420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
22422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
22423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
22425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
22426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
22427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
22428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
22430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
22433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
22434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
22435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
22436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
22438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
22439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
22441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
22442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
22443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
22444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
22446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
22447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
22449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
22450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
22451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
22452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
22454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
22455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
22457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
22458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
22459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
22460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
22462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
22463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
22465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
22466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
22467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
22468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
22470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22488 { OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
22489 { OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
22490 { OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
22491 { OPTION_MASK_ISA_AVX, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
22495 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22496 in the current target ISA to allow the user to compile particular modules
22497 with different target specific options that differ from the command line
22500 ix86_init_mmx_sse_builtins (void)
22502 const struct builtin_description * d;
22503 enum ix86_builtin_func_type ftype;
22506 /* Add all special builtins with variable number of operands. */
22507 for (i = 0, d = bdesc_special_args;
22508 i < ARRAY_SIZE (bdesc_special_args);
22514 ftype = (enum ix86_builtin_func_type) d->flag;
22515 def_builtin (d->mask, d->name, ftype, d->code);
22518 /* Add all builtins with variable number of operands. */
22519 for (i = 0, d = bdesc_args;
22520 i < ARRAY_SIZE (bdesc_args);
22526 ftype = (enum ix86_builtin_func_type) d->flag;
22527 def_builtin_const (d->mask, d->name, ftype, d->code);
22530 /* pcmpestr[im] insns. */
22531 for (i = 0, d = bdesc_pcmpestr;
22532 i < ARRAY_SIZE (bdesc_pcmpestr);
22535 if (d->code == IX86_BUILTIN_PCMPESTRM128)
22536 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
22538 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
22539 def_builtin_const (d->mask, d->name, ftype, d->code);
22542 /* pcmpistr[im] insns. */
22543 for (i = 0, d = bdesc_pcmpistr;
22544 i < ARRAY_SIZE (bdesc_pcmpistr);
22547 if (d->code == IX86_BUILTIN_PCMPISTRM128)
22548 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
22550 ftype = INT_FTYPE_V16QI_V16QI_INT;
22551 def_builtin_const (d->mask, d->name, ftype, d->code);
22554 /* comi/ucomi insns. */
22555 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
22557 if (d->mask == OPTION_MASK_ISA_SSE2)
22558 ftype = INT_FTYPE_V2DF_V2DF;
22560 ftype = INT_FTYPE_V4SF_V4SF;
22561 def_builtin_const (d->mask, d->name, ftype, d->code);
22565 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
22566 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
22567 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
22568 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
22570 /* SSE or 3DNow!A */
22571 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
22572 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
22573 IX86_BUILTIN_MASKMOVQ);
22576 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
22577 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
22579 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
22580 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
22581 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
22582 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
22585 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
22586 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
22587 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
22588 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
22591 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
22592 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
22593 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
22594 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
22595 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
22596 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
22597 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
22598 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
22599 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
22600 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
22601 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
22602 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
22605 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
22606 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
22608 /* MMX access to the vec_init patterns. */
22609 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
22610 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
22612 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
22613 V4HI_FTYPE_HI_HI_HI_HI,
22614 IX86_BUILTIN_VEC_INIT_V4HI);
22616 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
22617 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
22618 IX86_BUILTIN_VEC_INIT_V8QI);
22620 /* Access to the vec_extract patterns. */
22621 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
22622 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
22623 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
22624 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
22625 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
22626 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
22627 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
22628 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
22629 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
22630 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
22632 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
22633 "__builtin_ia32_vec_ext_v4hi",
22634 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
22636 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
22637 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
22639 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
22640 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
22642 /* Access to the vec_set patterns. */
22643 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
22644 "__builtin_ia32_vec_set_v2di",
22645 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
22647 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
22648 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
22650 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
22651 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
22653 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
22654 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
22656 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
22657 "__builtin_ia32_vec_set_v4hi",
22658 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
22660 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
22661 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
22663 /* Add FMA4 multi-arg argument instructions */
22664 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
22669 ftype = (enum ix86_builtin_func_type) d->flag;
22670 def_builtin_const (d->mask, d->name, ftype, d->code);
22674 /* Internal method for ix86_init_builtins. */
22677 ix86_init_builtins_va_builtins_abi (void)
22679 tree ms_va_ref, sysv_va_ref;
22680 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
22681 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
22682 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
22683 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
22687 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
22688 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
22689 ms_va_ref = build_reference_type (ms_va_list_type_node);
22691 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
22694 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22695 fnvoid_va_start_ms =
22696 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
22697 fnvoid_va_end_sysv =
22698 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
22699 fnvoid_va_start_sysv =
22700 build_varargs_function_type_list (void_type_node, sysv_va_ref,
22702 fnvoid_va_copy_ms =
22703 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
22705 fnvoid_va_copy_sysv =
22706 build_function_type_list (void_type_node, sysv_va_ref,
22707 sysv_va_ref, NULL_TREE);
22709 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
22710 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
22711 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
22712 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
22713 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
22714 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
22715 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
22716 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22717 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
22718 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22719 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
22720 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
22724 ix86_init_builtin_types (void)
22726 tree float128_type_node, float80_type_node;
22728 /* The __float80 type. */
22729 float80_type_node = long_double_type_node;
22730 if (TYPE_MODE (float80_type_node) != XFmode)
22732 /* The __float80 type. */
22733 float80_type_node = make_node (REAL_TYPE);
22735 TYPE_PRECISION (float80_type_node) = 80;
22736 layout_type (float80_type_node);
22738 (*lang_hooks.types.register_builtin_type) (float80_type_node, "__float80");
22740 /* The __float128 type. */
22741 float128_type_node = make_node (REAL_TYPE);
22742 TYPE_PRECISION (float128_type_node) = 128;
22743 layout_type (float128_type_node);
22744 (*lang_hooks.types.register_builtin_type) (float128_type_node, "__float128");
22746 /* This macro is built by i386-builtin-types.awk. */
22747 DEFINE_BUILTIN_PRIMITIVE_TYPES;
22751 ix86_init_builtins (void)
22755 ix86_init_builtin_types ();
22757 /* TFmode support builtins. */
22758 def_builtin_const (0, "__builtin_infq",
22759 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
22760 def_builtin_const (0, "__builtin_huge_valq",
22761 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
22763 /* We will expand them to normal call if SSE2 isn't available since
22764 they are used by libgcc. */
22765 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
22766 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
22767 BUILT_IN_MD, "__fabstf2", NULL_TREE);
22768 TREE_READONLY (t) = 1;
22769 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
22771 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
22772 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
22773 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
22774 TREE_READONLY (t) = 1;
22775 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
22777 ix86_init_mmx_sse_builtins ();
22780 ix86_init_builtins_va_builtins_abi ();
22783 /* Return the ix86 builtin for CODE. */
22786 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
22788 if (code >= IX86_BUILTIN_MAX)
22789 return error_mark_node;
22791 return ix86_builtins[code];
22794 /* Errors in the source file can cause expand_expr to return const0_rtx
22795 where we expect a vector. To avoid crashing, use one of the vector
22796 clear instructions. */
22798 safe_vector_operand (rtx x, enum machine_mode mode)
22800 if (x == const0_rtx)
22801 x = CONST0_RTX (mode);
22805 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
22808 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
22811 tree arg0 = CALL_EXPR_ARG (exp, 0);
22812 tree arg1 = CALL_EXPR_ARG (exp, 1);
22813 rtx op0 = expand_normal (arg0);
22814 rtx op1 = expand_normal (arg1);
22815 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22816 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
22817 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
22819 if (VECTOR_MODE_P (mode0))
22820 op0 = safe_vector_operand (op0, mode0);
22821 if (VECTOR_MODE_P (mode1))
22822 op1 = safe_vector_operand (op1, mode1);
22824 if (optimize || !target
22825 || GET_MODE (target) != tmode
22826 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22827 target = gen_reg_rtx (tmode);
22829 if (GET_MODE (op1) == SImode && mode1 == TImode)
22831 rtx x = gen_reg_rtx (V4SImode);
22832 emit_insn (gen_sse2_loadd (x, op1));
22833 op1 = gen_lowpart (TImode, x);
22836 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
22837 op0 = copy_to_mode_reg (mode0, op0);
22838 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
22839 op1 = copy_to_mode_reg (mode1, op1);
22841 pat = GEN_FCN (icode) (target, op0, op1);
22850 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
22853 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
22854 enum ix86_builtin_func_type m_type,
22855 enum rtx_code sub_code)
22860 bool comparison_p = false;
22862 bool last_arg_constant = false;
22863 int num_memory = 0;
22866 enum machine_mode mode;
22869 enum machine_mode tmode = insn_data[icode].operand[0].mode;
22873 case MULTI_ARG_4_DF2_DI_I:
22874 case MULTI_ARG_4_DF2_DI_I1:
22875 case MULTI_ARG_4_SF2_SI_I:
22876 case MULTI_ARG_4_SF2_SI_I1:
22878 last_arg_constant = true;
22881 case MULTI_ARG_3_SF:
22882 case MULTI_ARG_3_DF:
22883 case MULTI_ARG_3_SF2:
22884 case MULTI_ARG_3_DF2:
22885 case MULTI_ARG_3_DI:
22886 case MULTI_ARG_3_SI:
22887 case MULTI_ARG_3_SI_DI:
22888 case MULTI_ARG_3_HI:
22889 case MULTI_ARG_3_HI_SI:
22890 case MULTI_ARG_3_QI:
22891 case MULTI_ARG_3_DI2:
22892 case MULTI_ARG_3_SI2:
22893 case MULTI_ARG_3_HI2:
22894 case MULTI_ARG_3_QI2:
22898 case MULTI_ARG_2_SF:
22899 case MULTI_ARG_2_DF:
22900 case MULTI_ARG_2_DI:
22901 case MULTI_ARG_2_SI:
22902 case MULTI_ARG_2_HI:
22903 case MULTI_ARG_2_QI:
22907 case MULTI_ARG_2_DI_IMM:
22908 case MULTI_ARG_2_SI_IMM:
22909 case MULTI_ARG_2_HI_IMM:
22910 case MULTI_ARG_2_QI_IMM:
22912 last_arg_constant = true;
22915 case MULTI_ARG_1_SF:
22916 case MULTI_ARG_1_DF:
22917 case MULTI_ARG_1_SF2:
22918 case MULTI_ARG_1_DF2:
22919 case MULTI_ARG_1_DI:
22920 case MULTI_ARG_1_SI:
22921 case MULTI_ARG_1_HI:
22922 case MULTI_ARG_1_QI:
22923 case MULTI_ARG_1_SI_DI:
22924 case MULTI_ARG_1_HI_DI:
22925 case MULTI_ARG_1_HI_SI:
22926 case MULTI_ARG_1_QI_DI:
22927 case MULTI_ARG_1_QI_SI:
22928 case MULTI_ARG_1_QI_HI:
22932 case MULTI_ARG_2_DI_CMP:
22933 case MULTI_ARG_2_SI_CMP:
22934 case MULTI_ARG_2_HI_CMP:
22935 case MULTI_ARG_2_QI_CMP:
22937 comparison_p = true;
22940 case MULTI_ARG_2_SF_TF:
22941 case MULTI_ARG_2_DF_TF:
22942 case MULTI_ARG_2_DI_TF:
22943 case MULTI_ARG_2_SI_TF:
22944 case MULTI_ARG_2_HI_TF:
22945 case MULTI_ARG_2_QI_TF:
22951 gcc_unreachable ();
22954 if (optimize || !target
22955 || GET_MODE (target) != tmode
22956 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
22957 target = gen_reg_rtx (tmode);
22959 gcc_assert (nargs <= 4);
22961 for (i = 0; i < nargs; i++)
22963 tree arg = CALL_EXPR_ARG (exp, i);
22964 rtx op = expand_normal (arg);
22965 int adjust = (comparison_p) ? 1 : 0;
22966 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
22968 if (last_arg_constant && i == nargs-1)
22970 if (!CONST_INT_P (op))
22972 error ("last argument must be an immediate");
22973 return gen_reg_rtx (tmode);
22978 if (VECTOR_MODE_P (mode))
22979 op = safe_vector_operand (op, mode);
22981 /* If we aren't optimizing, only allow one memory operand to be
22983 if (memory_operand (op, mode))
22986 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
22989 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
22991 op = force_reg (mode, op);
22995 args[i].mode = mode;
23001 pat = GEN_FCN (icode) (target, args[0].op);
23006 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23007 GEN_INT ((int)sub_code));
23008 else if (! comparison_p)
23009 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23012 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23016 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23021 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23025 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
23029 gcc_unreachable ();
23039 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23040 insns with vec_merge. */
23043 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23047 tree arg0 = CALL_EXPR_ARG (exp, 0);
23048 rtx op1, op0 = expand_normal (arg0);
23049 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23050 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23052 if (optimize || !target
23053 || GET_MODE (target) != tmode
23054 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23055 target = gen_reg_rtx (tmode);
23057 if (VECTOR_MODE_P (mode0))
23058 op0 = safe_vector_operand (op0, mode0);
23060 if ((optimize && !register_operand (op0, mode0))
23061 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23062 op0 = copy_to_mode_reg (mode0, op0);
23065 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23066 op1 = copy_to_mode_reg (mode0, op1);
23068 pat = GEN_FCN (icode) (target, op0, op1);
23075 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23078 ix86_expand_sse_compare (const struct builtin_description *d,
23079 tree exp, rtx target, bool swap)
23082 tree arg0 = CALL_EXPR_ARG (exp, 0);
23083 tree arg1 = CALL_EXPR_ARG (exp, 1);
23084 rtx op0 = expand_normal (arg0);
23085 rtx op1 = expand_normal (arg1);
23087 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23088 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23089 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23090 enum rtx_code comparison = d->comparison;
23092 if (VECTOR_MODE_P (mode0))
23093 op0 = safe_vector_operand (op0, mode0);
23094 if (VECTOR_MODE_P (mode1))
23095 op1 = safe_vector_operand (op1, mode1);
23097 /* Swap operands if we have a comparison that isn't available in
23101 rtx tmp = gen_reg_rtx (mode1);
23102 emit_move_insn (tmp, op1);
23107 if (optimize || !target
23108 || GET_MODE (target) != tmode
23109 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23110 target = gen_reg_rtx (tmode);
23112 if ((optimize && !register_operand (op0, mode0))
23113 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23114 op0 = copy_to_mode_reg (mode0, op0);
23115 if ((optimize && !register_operand (op1, mode1))
23116 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23117 op1 = copy_to_mode_reg (mode1, op1);
23119 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23120 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23127 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23130 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23134 tree arg0 = CALL_EXPR_ARG (exp, 0);
23135 tree arg1 = CALL_EXPR_ARG (exp, 1);
23136 rtx op0 = expand_normal (arg0);
23137 rtx op1 = expand_normal (arg1);
23138 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23139 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23140 enum rtx_code comparison = d->comparison;
23142 if (VECTOR_MODE_P (mode0))
23143 op0 = safe_vector_operand (op0, mode0);
23144 if (VECTOR_MODE_P (mode1))
23145 op1 = safe_vector_operand (op1, mode1);
23147 /* Swap operands if we have a comparison that isn't available in
23149 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23156 target = gen_reg_rtx (SImode);
23157 emit_move_insn (target, const0_rtx);
23158 target = gen_rtx_SUBREG (QImode, target, 0);
23160 if ((optimize && !register_operand (op0, mode0))
23161 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23162 op0 = copy_to_mode_reg (mode0, op0);
23163 if ((optimize && !register_operand (op1, mode1))
23164 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23165 op1 = copy_to_mode_reg (mode1, op1);
23167 pat = GEN_FCN (d->icode) (op0, op1);
23171 emit_insn (gen_rtx_SET (VOIDmode,
23172 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23173 gen_rtx_fmt_ee (comparison, QImode,
23177 return SUBREG_REG (target);
23180 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
23183 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
23187 tree arg0 = CALL_EXPR_ARG (exp, 0);
23188 tree arg1 = CALL_EXPR_ARG (exp, 1);
23189 rtx op0 = expand_normal (arg0);
23190 rtx op1 = expand_normal (arg1);
23191 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23192 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23193 enum rtx_code comparison = d->comparison;
23195 if (VECTOR_MODE_P (mode0))
23196 op0 = safe_vector_operand (op0, mode0);
23197 if (VECTOR_MODE_P (mode1))
23198 op1 = safe_vector_operand (op1, mode1);
23200 target = gen_reg_rtx (SImode);
23201 emit_move_insn (target, const0_rtx);
23202 target = gen_rtx_SUBREG (QImode, target, 0);
23204 if ((optimize && !register_operand (op0, mode0))
23205 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
23206 op0 = copy_to_mode_reg (mode0, op0);
23207 if ((optimize && !register_operand (op1, mode1))
23208 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
23209 op1 = copy_to_mode_reg (mode1, op1);
23211 pat = GEN_FCN (d->icode) (op0, op1);
23215 emit_insn (gen_rtx_SET (VOIDmode,
23216 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23217 gen_rtx_fmt_ee (comparison, QImode,
23221 return SUBREG_REG (target);
23224 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
23227 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
23228 tree exp, rtx target)
23231 tree arg0 = CALL_EXPR_ARG (exp, 0);
23232 tree arg1 = CALL_EXPR_ARG (exp, 1);
23233 tree arg2 = CALL_EXPR_ARG (exp, 2);
23234 tree arg3 = CALL_EXPR_ARG (exp, 3);
23235 tree arg4 = CALL_EXPR_ARG (exp, 4);
23236 rtx scratch0, scratch1;
23237 rtx op0 = expand_normal (arg0);
23238 rtx op1 = expand_normal (arg1);
23239 rtx op2 = expand_normal (arg2);
23240 rtx op3 = expand_normal (arg3);
23241 rtx op4 = expand_normal (arg4);
23242 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
23244 tmode0 = insn_data[d->icode].operand[0].mode;
23245 tmode1 = insn_data[d->icode].operand[1].mode;
23246 modev2 = insn_data[d->icode].operand[2].mode;
23247 modei3 = insn_data[d->icode].operand[3].mode;
23248 modev4 = insn_data[d->icode].operand[4].mode;
23249 modei5 = insn_data[d->icode].operand[5].mode;
23250 modeimm = insn_data[d->icode].operand[6].mode;
23252 if (VECTOR_MODE_P (modev2))
23253 op0 = safe_vector_operand (op0, modev2);
23254 if (VECTOR_MODE_P (modev4))
23255 op2 = safe_vector_operand (op2, modev4);
23257 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23258 op0 = copy_to_mode_reg (modev2, op0);
23259 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
23260 op1 = copy_to_mode_reg (modei3, op1);
23261 if ((optimize && !register_operand (op2, modev4))
23262 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
23263 op2 = copy_to_mode_reg (modev4, op2);
23264 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
23265 op3 = copy_to_mode_reg (modei5, op3);
23267 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
23269 error ("the fifth argument must be a 8-bit immediate");
23273 if (d->code == IX86_BUILTIN_PCMPESTRI128)
23275 if (optimize || !target
23276 || GET_MODE (target) != tmode0
23277 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23278 target = gen_reg_rtx (tmode0);
23280 scratch1 = gen_reg_rtx (tmode1);
23282 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
23284 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
23286 if (optimize || !target
23287 || GET_MODE (target) != tmode1
23288 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23289 target = gen_reg_rtx (tmode1);
23291 scratch0 = gen_reg_rtx (tmode0);
23293 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
23297 gcc_assert (d->flag);
23299 scratch0 = gen_reg_rtx (tmode0);
23300 scratch1 = gen_reg_rtx (tmode1);
23302 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
23312 target = gen_reg_rtx (SImode);
23313 emit_move_insn (target, const0_rtx);
23314 target = gen_rtx_SUBREG (QImode, target, 0);
23317 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23318 gen_rtx_fmt_ee (EQ, QImode,
23319 gen_rtx_REG ((enum machine_mode) d->flag,
23322 return SUBREG_REG (target);
23329 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
23332 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
23333 tree exp, rtx target)
23336 tree arg0 = CALL_EXPR_ARG (exp, 0);
23337 tree arg1 = CALL_EXPR_ARG (exp, 1);
23338 tree arg2 = CALL_EXPR_ARG (exp, 2);
23339 rtx scratch0, scratch1;
23340 rtx op0 = expand_normal (arg0);
23341 rtx op1 = expand_normal (arg1);
23342 rtx op2 = expand_normal (arg2);
23343 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
23345 tmode0 = insn_data[d->icode].operand[0].mode;
23346 tmode1 = insn_data[d->icode].operand[1].mode;
23347 modev2 = insn_data[d->icode].operand[2].mode;
23348 modev3 = insn_data[d->icode].operand[3].mode;
23349 modeimm = insn_data[d->icode].operand[4].mode;
23351 if (VECTOR_MODE_P (modev2))
23352 op0 = safe_vector_operand (op0, modev2);
23353 if (VECTOR_MODE_P (modev3))
23354 op1 = safe_vector_operand (op1, modev3);
23356 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
23357 op0 = copy_to_mode_reg (modev2, op0);
23358 if ((optimize && !register_operand (op1, modev3))
23359 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
23360 op1 = copy_to_mode_reg (modev3, op1);
23362 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
23364 error ("the third argument must be a 8-bit immediate");
23368 if (d->code == IX86_BUILTIN_PCMPISTRI128)
23370 if (optimize || !target
23371 || GET_MODE (target) != tmode0
23372 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
23373 target = gen_reg_rtx (tmode0);
23375 scratch1 = gen_reg_rtx (tmode1);
23377 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
23379 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
23381 if (optimize || !target
23382 || GET_MODE (target) != tmode1
23383 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
23384 target = gen_reg_rtx (tmode1);
23386 scratch0 = gen_reg_rtx (tmode0);
23388 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
23392 gcc_assert (d->flag);
23394 scratch0 = gen_reg_rtx (tmode0);
23395 scratch1 = gen_reg_rtx (tmode1);
23397 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
23407 target = gen_reg_rtx (SImode);
23408 emit_move_insn (target, const0_rtx);
23409 target = gen_rtx_SUBREG (QImode, target, 0);
23412 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
23413 gen_rtx_fmt_ee (EQ, QImode,
23414 gen_rtx_REG ((enum machine_mode) d->flag,
23417 return SUBREG_REG (target);
23423 /* Subroutine of ix86_expand_builtin to take care of insns with
23424 variable number of operands. */
23427 ix86_expand_args_builtin (const struct builtin_description *d,
23428 tree exp, rtx target)
23430 rtx pat, real_target;
23431 unsigned int i, nargs;
23432 unsigned int nargs_constant = 0;
23433 int num_memory = 0;
23437 enum machine_mode mode;
23439 bool last_arg_count = false;
23440 enum insn_code icode = d->icode;
23441 const struct insn_data *insn_p = &insn_data[icode];
23442 enum machine_mode tmode = insn_p->operand[0].mode;
23443 enum machine_mode rmode = VOIDmode;
23445 enum rtx_code comparison = d->comparison;
23447 switch ((enum ix86_builtin_func_type) d->flag)
23449 case INT_FTYPE_V8SF_V8SF_PTEST:
23450 case INT_FTYPE_V4DI_V4DI_PTEST:
23451 case INT_FTYPE_V4DF_V4DF_PTEST:
23452 case INT_FTYPE_V4SF_V4SF_PTEST:
23453 case INT_FTYPE_V2DI_V2DI_PTEST:
23454 case INT_FTYPE_V2DF_V2DF_PTEST:
23455 return ix86_expand_sse_ptest (d, exp, target);
23456 case FLOAT128_FTYPE_FLOAT128:
23457 case FLOAT_FTYPE_FLOAT:
23458 case INT_FTYPE_INT:
23459 case UINT64_FTYPE_INT:
23460 case UINT16_FTYPE_UINT16:
23461 case INT64_FTYPE_INT64:
23462 case INT64_FTYPE_V4SF:
23463 case INT64_FTYPE_V2DF:
23464 case INT_FTYPE_V16QI:
23465 case INT_FTYPE_V8QI:
23466 case INT_FTYPE_V8SF:
23467 case INT_FTYPE_V4DF:
23468 case INT_FTYPE_V4SF:
23469 case INT_FTYPE_V2DF:
23470 case V16QI_FTYPE_V16QI:
23471 case V8SI_FTYPE_V8SF:
23472 case V8SI_FTYPE_V4SI:
23473 case V8HI_FTYPE_V8HI:
23474 case V8HI_FTYPE_V16QI:
23475 case V8QI_FTYPE_V8QI:
23476 case V8SF_FTYPE_V8SF:
23477 case V8SF_FTYPE_V8SI:
23478 case V8SF_FTYPE_V4SF:
23479 case V4SI_FTYPE_V4SI:
23480 case V4SI_FTYPE_V16QI:
23481 case V4SI_FTYPE_V4SF:
23482 case V4SI_FTYPE_V8SI:
23483 case V4SI_FTYPE_V8HI:
23484 case V4SI_FTYPE_V4DF:
23485 case V4SI_FTYPE_V2DF:
23486 case V4HI_FTYPE_V4HI:
23487 case V4DF_FTYPE_V4DF:
23488 case V4DF_FTYPE_V4SI:
23489 case V4DF_FTYPE_V4SF:
23490 case V4DF_FTYPE_V2DF:
23491 case V4SF_FTYPE_V4SF:
23492 case V4SF_FTYPE_V4SI:
23493 case V4SF_FTYPE_V8SF:
23494 case V4SF_FTYPE_V4DF:
23495 case V4SF_FTYPE_V2DF:
23496 case V2DI_FTYPE_V2DI:
23497 case V2DI_FTYPE_V16QI:
23498 case V2DI_FTYPE_V8HI:
23499 case V2DI_FTYPE_V4SI:
23500 case V2DF_FTYPE_V2DF:
23501 case V2DF_FTYPE_V4SI:
23502 case V2DF_FTYPE_V4DF:
23503 case V2DF_FTYPE_V4SF:
23504 case V2DF_FTYPE_V2SI:
23505 case V2SI_FTYPE_V2SI:
23506 case V2SI_FTYPE_V4SF:
23507 case V2SI_FTYPE_V2SF:
23508 case V2SI_FTYPE_V2DF:
23509 case V2SF_FTYPE_V2SF:
23510 case V2SF_FTYPE_V2SI:
23513 case V4SF_FTYPE_V4SF_VEC_MERGE:
23514 case V2DF_FTYPE_V2DF_VEC_MERGE:
23515 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
23516 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
23517 case V16QI_FTYPE_V16QI_V16QI:
23518 case V16QI_FTYPE_V8HI_V8HI:
23519 case V8QI_FTYPE_V8QI_V8QI:
23520 case V8QI_FTYPE_V4HI_V4HI:
23521 case V8HI_FTYPE_V8HI_V8HI:
23522 case V8HI_FTYPE_V16QI_V16QI:
23523 case V8HI_FTYPE_V4SI_V4SI:
23524 case V8SF_FTYPE_V8SF_V8SF:
23525 case V8SF_FTYPE_V8SF_V8SI:
23526 case V4SI_FTYPE_V4SI_V4SI:
23527 case V4SI_FTYPE_V8HI_V8HI:
23528 case V4SI_FTYPE_V4SF_V4SF:
23529 case V4SI_FTYPE_V2DF_V2DF:
23530 case V4HI_FTYPE_V4HI_V4HI:
23531 case V4HI_FTYPE_V8QI_V8QI:
23532 case V4HI_FTYPE_V2SI_V2SI:
23533 case V4DF_FTYPE_V4DF_V4DF:
23534 case V4DF_FTYPE_V4DF_V4DI:
23535 case V4SF_FTYPE_V4SF_V4SF:
23536 case V4SF_FTYPE_V4SF_V4SI:
23537 case V4SF_FTYPE_V4SF_V2SI:
23538 case V4SF_FTYPE_V4SF_V2DF:
23539 case V4SF_FTYPE_V4SF_DI:
23540 case V4SF_FTYPE_V4SF_SI:
23541 case V2DI_FTYPE_V2DI_V2DI:
23542 case V2DI_FTYPE_V16QI_V16QI:
23543 case V2DI_FTYPE_V4SI_V4SI:
23544 case V2DI_FTYPE_V2DI_V16QI:
23545 case V2DI_FTYPE_V2DF_V2DF:
23546 case V2SI_FTYPE_V2SI_V2SI:
23547 case V2SI_FTYPE_V4HI_V4HI:
23548 case V2SI_FTYPE_V2SF_V2SF:
23549 case V2DF_FTYPE_V2DF_V2DF:
23550 case V2DF_FTYPE_V2DF_V4SF:
23551 case V2DF_FTYPE_V2DF_V2DI:
23552 case V2DF_FTYPE_V2DF_DI:
23553 case V2DF_FTYPE_V2DF_SI:
23554 case V2SF_FTYPE_V2SF_V2SF:
23555 case V1DI_FTYPE_V1DI_V1DI:
23556 case V1DI_FTYPE_V8QI_V8QI:
23557 case V1DI_FTYPE_V2SI_V2SI:
23558 if (comparison == UNKNOWN)
23559 return ix86_expand_binop_builtin (icode, exp, target);
23562 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23563 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23564 gcc_assert (comparison != UNKNOWN);
23568 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23569 case V8HI_FTYPE_V8HI_SI_COUNT:
23570 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23571 case V4SI_FTYPE_V4SI_SI_COUNT:
23572 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23573 case V4HI_FTYPE_V4HI_SI_COUNT:
23574 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23575 case V2DI_FTYPE_V2DI_SI_COUNT:
23576 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23577 case V2SI_FTYPE_V2SI_SI_COUNT:
23578 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23579 case V1DI_FTYPE_V1DI_SI_COUNT:
23581 last_arg_count = true;
23583 case UINT64_FTYPE_UINT64_UINT64:
23584 case UINT_FTYPE_UINT_UINT:
23585 case UINT_FTYPE_UINT_USHORT:
23586 case UINT_FTYPE_UINT_UCHAR:
23587 case UINT16_FTYPE_UINT16_INT:
23588 case UINT8_FTYPE_UINT8_INT:
23591 case V2DI_FTYPE_V2DI_INT_CONVERT:
23594 nargs_constant = 1;
23596 case V8HI_FTYPE_V8HI_INT:
23597 case V8SF_FTYPE_V8SF_INT:
23598 case V4SI_FTYPE_V4SI_INT:
23599 case V4SI_FTYPE_V8SI_INT:
23600 case V4HI_FTYPE_V4HI_INT:
23601 case V4DF_FTYPE_V4DF_INT:
23602 case V4SF_FTYPE_V4SF_INT:
23603 case V4SF_FTYPE_V8SF_INT:
23604 case V2DI_FTYPE_V2DI_INT:
23605 case V2DF_FTYPE_V2DF_INT:
23606 case V2DF_FTYPE_V4DF_INT:
23608 nargs_constant = 1;
23610 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23611 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23612 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23613 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23614 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23617 case V16QI_FTYPE_V16QI_V16QI_INT:
23618 case V8HI_FTYPE_V8HI_V8HI_INT:
23619 case V8SI_FTYPE_V8SI_V8SI_INT:
23620 case V8SI_FTYPE_V8SI_V4SI_INT:
23621 case V8SF_FTYPE_V8SF_V8SF_INT:
23622 case V8SF_FTYPE_V8SF_V4SF_INT:
23623 case V4SI_FTYPE_V4SI_V4SI_INT:
23624 case V4DF_FTYPE_V4DF_V4DF_INT:
23625 case V4DF_FTYPE_V4DF_V2DF_INT:
23626 case V4SF_FTYPE_V4SF_V4SF_INT:
23627 case V2DI_FTYPE_V2DI_V2DI_INT:
23628 case V2DF_FTYPE_V2DF_V2DF_INT:
23630 nargs_constant = 1;
23632 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
23635 nargs_constant = 1;
23637 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
23640 nargs_constant = 1;
23642 case V2DI_FTYPE_V2DI_UINT_UINT:
23644 nargs_constant = 2;
23646 case MULTI_ARG_4_DF2_DI_I:
23647 case MULTI_ARG_4_DF2_DI_I1:
23648 case MULTI_ARG_4_SF2_SI_I:
23649 case MULTI_ARG_4_SF2_SI_I1:
23651 nargs_constant = 1;
23653 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23655 nargs_constant = 2;
23658 gcc_unreachable ();
23661 gcc_assert (nargs <= ARRAY_SIZE (args));
23663 if (comparison != UNKNOWN)
23665 gcc_assert (nargs == 2);
23666 return ix86_expand_sse_compare (d, exp, target, swap);
23669 if (rmode == VOIDmode || rmode == tmode)
23673 || GET_MODE (target) != tmode
23674 || ! (*insn_p->operand[0].predicate) (target, tmode))
23675 target = gen_reg_rtx (tmode);
23676 real_target = target;
23680 target = gen_reg_rtx (rmode);
23681 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
23684 for (i = 0; i < nargs; i++)
23686 tree arg = CALL_EXPR_ARG (exp, i);
23687 rtx op = expand_normal (arg);
23688 enum machine_mode mode = insn_p->operand[i + 1].mode;
23689 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
23691 if (last_arg_count && (i + 1) == nargs)
23693 /* SIMD shift insns take either an 8-bit immediate or
23694 register as count. But builtin functions take int as
23695 count. If count doesn't match, we put it in register. */
23698 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
23699 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
23700 op = copy_to_reg (op);
23703 else if ((nargs - i) <= nargs_constant)
23708 case CODE_FOR_sse4_1_roundpd:
23709 case CODE_FOR_sse4_1_roundps:
23710 case CODE_FOR_sse4_1_roundsd:
23711 case CODE_FOR_sse4_1_roundss:
23712 case CODE_FOR_sse4_1_blendps:
23713 case CODE_FOR_avx_blendpd256:
23714 case CODE_FOR_avx_vpermilv4df:
23715 case CODE_FOR_avx_roundpd256:
23716 case CODE_FOR_avx_roundps256:
23717 error ("the last argument must be a 4-bit immediate");
23720 case CODE_FOR_sse4_1_blendpd:
23721 case CODE_FOR_avx_vpermilv2df:
23722 case CODE_FOR_xop_vpermil2v2df3:
23723 case CODE_FOR_xop_vpermil2v4sf3:
23724 case CODE_FOR_xop_vpermil2v4df3:
23725 case CODE_FOR_xop_vpermil2v8sf3:
23726 error ("the last argument must be a 2-bit immediate");
23729 case CODE_FOR_avx_vextractf128v4df:
23730 case CODE_FOR_avx_vextractf128v8sf:
23731 case CODE_FOR_avx_vextractf128v8si:
23732 case CODE_FOR_avx_vinsertf128v4df:
23733 case CODE_FOR_avx_vinsertf128v8sf:
23734 case CODE_FOR_avx_vinsertf128v8si:
23735 error ("the last argument must be a 1-bit immediate");
23738 case CODE_FOR_avx_cmpsdv2df3:
23739 case CODE_FOR_avx_cmpssv4sf3:
23740 case CODE_FOR_avx_cmppdv2df3:
23741 case CODE_FOR_avx_cmppsv4sf3:
23742 case CODE_FOR_avx_cmppdv4df3:
23743 case CODE_FOR_avx_cmppsv8sf3:
23744 error ("the last argument must be a 5-bit immediate");
23748 switch (nargs_constant)
23751 if ((nargs - i) == nargs_constant)
23753 error ("the next to last argument must be an 8-bit immediate");
23757 error ("the last argument must be an 8-bit immediate");
23760 gcc_unreachable ();
23767 if (VECTOR_MODE_P (mode))
23768 op = safe_vector_operand (op, mode);
23770 /* If we aren't optimizing, only allow one memory operand to
23772 if (memory_operand (op, mode))
23775 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
23777 if (optimize || !match || num_memory > 1)
23778 op = copy_to_mode_reg (mode, op);
23782 op = copy_to_reg (op);
23783 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
23788 args[i].mode = mode;
23794 pat = GEN_FCN (icode) (real_target, args[0].op);
23797 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
23800 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23804 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
23805 args[2].op, args[3].op);
23808 gcc_unreachable ();
23818 /* Subroutine of ix86_expand_builtin to take care of special insns
23819 with variable number of operands. */
23822 ix86_expand_special_args_builtin (const struct builtin_description *d,
23823 tree exp, rtx target)
23827 unsigned int i, nargs, arg_adjust, memory;
23831 enum machine_mode mode;
23833 enum insn_code icode = d->icode;
23834 bool last_arg_constant = false;
23835 const struct insn_data *insn_p = &insn_data[icode];
23836 enum machine_mode tmode = insn_p->operand[0].mode;
23837 enum { load, store } klass;
23839 switch ((enum ix86_builtin_func_type) d->flag)
23841 case VOID_FTYPE_VOID:
23842 emit_insn (GEN_FCN (icode) (target));
23844 case UINT64_FTYPE_VOID:
23849 case UINT64_FTYPE_PUNSIGNED:
23850 case V2DI_FTYPE_PV2DI:
23851 case V32QI_FTYPE_PCCHAR:
23852 case V16QI_FTYPE_PCCHAR:
23853 case V8SF_FTYPE_PCV4SF:
23854 case V8SF_FTYPE_PCFLOAT:
23855 case V4SF_FTYPE_PCFLOAT:
23856 case V4DF_FTYPE_PCV2DF:
23857 case V4DF_FTYPE_PCDOUBLE:
23858 case V2DF_FTYPE_PCDOUBLE:
23859 case VOID_FTYPE_PVOID:
23864 case VOID_FTYPE_PV2SF_V4SF:
23865 case VOID_FTYPE_PV4DI_V4DI:
23866 case VOID_FTYPE_PV2DI_V2DI:
23867 case VOID_FTYPE_PCHAR_V32QI:
23868 case VOID_FTYPE_PCHAR_V16QI:
23869 case VOID_FTYPE_PFLOAT_V8SF:
23870 case VOID_FTYPE_PFLOAT_V4SF:
23871 case VOID_FTYPE_PDOUBLE_V4DF:
23872 case VOID_FTYPE_PDOUBLE_V2DF:
23873 case VOID_FTYPE_PULONGLONG_ULONGLONG:
23874 case VOID_FTYPE_PINT_INT:
23877 /* Reserve memory operand for target. */
23878 memory = ARRAY_SIZE (args);
23880 case V4SF_FTYPE_V4SF_PCV2SF:
23881 case V2DF_FTYPE_V2DF_PCDOUBLE:
23886 case V8SF_FTYPE_PCV8SF_V8SF:
23887 case V4DF_FTYPE_PCV4DF_V4DF:
23888 case V4SF_FTYPE_PCV4SF_V4SF:
23889 case V2DF_FTYPE_PCV2DF_V2DF:
23894 case VOID_FTYPE_PV8SF_V8SF_V8SF:
23895 case VOID_FTYPE_PV4DF_V4DF_V4DF:
23896 case VOID_FTYPE_PV4SF_V4SF_V4SF:
23897 case VOID_FTYPE_PV2DF_V2DF_V2DF:
23900 /* Reserve memory operand for target. */
23901 memory = ARRAY_SIZE (args);
23903 case VOID_FTYPE_UINT_UINT_UINT:
23904 case VOID_FTYPE_UINT64_UINT_UINT:
23905 case UCHAR_FTYPE_UINT_UINT_UINT:
23906 case UCHAR_FTYPE_UINT64_UINT_UINT:
23909 memory = ARRAY_SIZE (args);
23910 last_arg_constant = true;
23913 gcc_unreachable ();
23916 gcc_assert (nargs <= ARRAY_SIZE (args));
23918 if (klass == store)
23920 arg = CALL_EXPR_ARG (exp, 0);
23921 op = expand_normal (arg);
23922 gcc_assert (target == 0);
23923 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
23931 || GET_MODE (target) != tmode
23932 || ! (*insn_p->operand[0].predicate) (target, tmode))
23933 target = gen_reg_rtx (tmode);
23936 for (i = 0; i < nargs; i++)
23938 enum machine_mode mode = insn_p->operand[i + 1].mode;
23941 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
23942 op = expand_normal (arg);
23943 match = (*insn_p->operand[i + 1].predicate) (op, mode);
23945 if (last_arg_constant && (i + 1) == nargs)
23949 if (icode == CODE_FOR_lwp_lwpvalsi3
23950 || icode == CODE_FOR_lwp_lwpinssi3
23951 || icode == CODE_FOR_lwp_lwpvaldi3
23952 || icode == CODE_FOR_lwp_lwpinsdi3)
23953 error ("the last argument must be a 32-bit immediate");
23955 error ("the last argument must be an 8-bit immediate");
23963 /* This must be the memory operand. */
23964 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
23965 gcc_assert (GET_MODE (op) == mode
23966 || GET_MODE (op) == VOIDmode);
23970 /* This must be register. */
23971 if (VECTOR_MODE_P (mode))
23972 op = safe_vector_operand (op, mode);
23974 gcc_assert (GET_MODE (op) == mode
23975 || GET_MODE (op) == VOIDmode);
23976 op = copy_to_mode_reg (mode, op);
23981 args[i].mode = mode;
23987 pat = GEN_FCN (icode) (target);
23990 pat = GEN_FCN (icode) (target, args[0].op);
23993 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23996 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23999 gcc_unreachable ();
24005 return klass == store ? 0 : target;
24008 /* Return the integer constant in ARG. Constrain it to be in the range
24009 of the subparts of VEC_TYPE; issue an error if not. */
24012 get_element_number (tree vec_type, tree arg)
24014 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24016 if (!host_integerp (arg, 1)
24017 || (elt = tree_low_cst (arg, 1), elt > max))
24019 error ("selector must be an integer constant in the range 0..%wi", max);
24026 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24027 ix86_expand_vector_init. We DO have language-level syntax for this, in
24028 the form of (type){ init-list }. Except that since we can't place emms
24029 instructions from inside the compiler, we can't allow the use of MMX
24030 registers unless the user explicitly asks for it. So we do *not* define
24031 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24032 we have builtins invoked by mmintrin.h that gives us license to emit
24033 these sorts of instructions. */
24036 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24038 enum machine_mode tmode = TYPE_MODE (type);
24039 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24040 int i, n_elt = GET_MODE_NUNITS (tmode);
24041 rtvec v = rtvec_alloc (n_elt);
24043 gcc_assert (VECTOR_MODE_P (tmode));
24044 gcc_assert (call_expr_nargs (exp) == n_elt);
24046 for (i = 0; i < n_elt; ++i)
24048 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24049 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24052 if (!target || !register_operand (target, tmode))
24053 target = gen_reg_rtx (tmode);
24055 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24059 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24060 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24061 had a language-level syntax for referencing vector elements. */
24064 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24066 enum machine_mode tmode, mode0;
24071 arg0 = CALL_EXPR_ARG (exp, 0);
24072 arg1 = CALL_EXPR_ARG (exp, 1);
24074 op0 = expand_normal (arg0);
24075 elt = get_element_number (TREE_TYPE (arg0), arg1);
24077 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24078 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24079 gcc_assert (VECTOR_MODE_P (mode0));
24081 op0 = force_reg (mode0, op0);
24083 if (optimize || !target || !register_operand (target, tmode))
24084 target = gen_reg_rtx (tmode);
24086 ix86_expand_vector_extract (true, target, op0, elt);
24091 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24092 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24093 a language-level syntax for referencing vector elements. */
24096 ix86_expand_vec_set_builtin (tree exp)
24098 enum machine_mode tmode, mode1;
24099 tree arg0, arg1, arg2;
24101 rtx op0, op1, target;
24103 arg0 = CALL_EXPR_ARG (exp, 0);
24104 arg1 = CALL_EXPR_ARG (exp, 1);
24105 arg2 = CALL_EXPR_ARG (exp, 2);
24107 tmode = TYPE_MODE (TREE_TYPE (arg0));
24108 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24109 gcc_assert (VECTOR_MODE_P (tmode));
24111 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24112 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24113 elt = get_element_number (TREE_TYPE (arg0), arg2);
24115 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24116 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24118 op0 = force_reg (tmode, op0);
24119 op1 = force_reg (mode1, op1);
24121 /* OP0 is the source of these builtin functions and shouldn't be
24122 modified. Create a copy, use it and return it as target. */
24123 target = gen_reg_rtx (tmode);
24124 emit_move_insn (target, op0);
24125 ix86_expand_vector_set (true, target, op1, elt);
24130 /* Expand an expression EXP that calls a built-in function,
24131 with result going to TARGET if that's convenient
24132 (and in mode MODE if that's convenient).
24133 SUBTARGET may be used as the target for computing one of EXP's operands.
24134 IGNORE is nonzero if the value is to be ignored. */
24137 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24138 enum machine_mode mode ATTRIBUTE_UNUSED,
24139 int ignore ATTRIBUTE_UNUSED)
24141 const struct builtin_description *d;
24143 enum insn_code icode;
24144 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24145 tree arg0, arg1, arg2;
24146 rtx op0, op1, op2, pat;
24147 enum machine_mode mode0, mode1, mode2;
24148 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24150 /* Determine whether the builtin function is available under the current ISA.
24151 Originally the builtin was not created if it wasn't applicable to the
24152 current ISA based on the command line switches. With function specific
24153 options, we need to check in the context of the function making the call
24154 whether it is supported. */
24155 if (ix86_builtins_isa[fcode].isa
24156 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24158 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24159 NULL, NULL, false);
24162 error ("%qE needs unknown isa option", fndecl);
24165 gcc_assert (opts != NULL);
24166 error ("%qE needs isa option %s", fndecl, opts);
24174 case IX86_BUILTIN_MASKMOVQ:
24175 case IX86_BUILTIN_MASKMOVDQU:
24176 icode = (fcode == IX86_BUILTIN_MASKMOVQ
24177 ? CODE_FOR_mmx_maskmovq
24178 : CODE_FOR_sse2_maskmovdqu);
24179 /* Note the arg order is different from the operand order. */
24180 arg1 = CALL_EXPR_ARG (exp, 0);
24181 arg2 = CALL_EXPR_ARG (exp, 1);
24182 arg0 = CALL_EXPR_ARG (exp, 2);
24183 op0 = expand_normal (arg0);
24184 op1 = expand_normal (arg1);
24185 op2 = expand_normal (arg2);
24186 mode0 = insn_data[icode].operand[0].mode;
24187 mode1 = insn_data[icode].operand[1].mode;
24188 mode2 = insn_data[icode].operand[2].mode;
24190 op0 = force_reg (Pmode, op0);
24191 op0 = gen_rtx_MEM (mode1, op0);
24193 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
24194 op0 = copy_to_mode_reg (mode0, op0);
24195 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
24196 op1 = copy_to_mode_reg (mode1, op1);
24197 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
24198 op2 = copy_to_mode_reg (mode2, op2);
24199 pat = GEN_FCN (icode) (op0, op1, op2);
24205 case IX86_BUILTIN_LDMXCSR:
24206 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
24207 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24208 emit_move_insn (target, op0);
24209 emit_insn (gen_sse_ldmxcsr (target));
24212 case IX86_BUILTIN_STMXCSR:
24213 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
24214 emit_insn (gen_sse_stmxcsr (target));
24215 return copy_to_mode_reg (SImode, target);
24217 case IX86_BUILTIN_CLFLUSH:
24218 arg0 = CALL_EXPR_ARG (exp, 0);
24219 op0 = expand_normal (arg0);
24220 icode = CODE_FOR_sse2_clflush;
24221 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24222 op0 = copy_to_mode_reg (Pmode, op0);
24224 emit_insn (gen_sse2_clflush (op0));
24227 case IX86_BUILTIN_MONITOR:
24228 arg0 = CALL_EXPR_ARG (exp, 0);
24229 arg1 = CALL_EXPR_ARG (exp, 1);
24230 arg2 = CALL_EXPR_ARG (exp, 2);
24231 op0 = expand_normal (arg0);
24232 op1 = expand_normal (arg1);
24233 op2 = expand_normal (arg2);
24235 op0 = copy_to_mode_reg (Pmode, op0);
24237 op1 = copy_to_mode_reg (SImode, op1);
24239 op2 = copy_to_mode_reg (SImode, op2);
24240 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
24243 case IX86_BUILTIN_MWAIT:
24244 arg0 = CALL_EXPR_ARG (exp, 0);
24245 arg1 = CALL_EXPR_ARG (exp, 1);
24246 op0 = expand_normal (arg0);
24247 op1 = expand_normal (arg1);
24249 op0 = copy_to_mode_reg (SImode, op0);
24251 op1 = copy_to_mode_reg (SImode, op1);
24252 emit_insn (gen_sse3_mwait (op0, op1));
24255 case IX86_BUILTIN_VEC_INIT_V2SI:
24256 case IX86_BUILTIN_VEC_INIT_V4HI:
24257 case IX86_BUILTIN_VEC_INIT_V8QI:
24258 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
24260 case IX86_BUILTIN_VEC_EXT_V2DF:
24261 case IX86_BUILTIN_VEC_EXT_V2DI:
24262 case IX86_BUILTIN_VEC_EXT_V4SF:
24263 case IX86_BUILTIN_VEC_EXT_V4SI:
24264 case IX86_BUILTIN_VEC_EXT_V8HI:
24265 case IX86_BUILTIN_VEC_EXT_V2SI:
24266 case IX86_BUILTIN_VEC_EXT_V4HI:
24267 case IX86_BUILTIN_VEC_EXT_V16QI:
24268 return ix86_expand_vec_ext_builtin (exp, target);
24270 case IX86_BUILTIN_VEC_SET_V2DI:
24271 case IX86_BUILTIN_VEC_SET_V4SF:
24272 case IX86_BUILTIN_VEC_SET_V4SI:
24273 case IX86_BUILTIN_VEC_SET_V8HI:
24274 case IX86_BUILTIN_VEC_SET_V4HI:
24275 case IX86_BUILTIN_VEC_SET_V16QI:
24276 return ix86_expand_vec_set_builtin (exp);
24278 case IX86_BUILTIN_VEC_PERM_V2DF:
24279 case IX86_BUILTIN_VEC_PERM_V4SF:
24280 case IX86_BUILTIN_VEC_PERM_V2DI:
24281 case IX86_BUILTIN_VEC_PERM_V4SI:
24282 case IX86_BUILTIN_VEC_PERM_V8HI:
24283 case IX86_BUILTIN_VEC_PERM_V16QI:
24284 case IX86_BUILTIN_VEC_PERM_V2DI_U:
24285 case IX86_BUILTIN_VEC_PERM_V4SI_U:
24286 case IX86_BUILTIN_VEC_PERM_V8HI_U:
24287 case IX86_BUILTIN_VEC_PERM_V16QI_U:
24288 case IX86_BUILTIN_VEC_PERM_V4DF:
24289 case IX86_BUILTIN_VEC_PERM_V8SF:
24290 return ix86_expand_vec_perm_builtin (exp);
24292 case IX86_BUILTIN_INFQ:
24293 case IX86_BUILTIN_HUGE_VALQ:
24295 REAL_VALUE_TYPE inf;
24299 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
24301 tmp = validize_mem (force_const_mem (mode, tmp));
24304 target = gen_reg_rtx (mode);
24306 emit_move_insn (target, tmp);
24310 case IX86_BUILTIN_LLWPCB:
24311 arg0 = CALL_EXPR_ARG (exp, 0);
24312 op0 = expand_normal (arg0);
24313 icode = CODE_FOR_lwp_llwpcb;
24314 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
24315 op0 = copy_to_mode_reg (Pmode, op0);
24316 emit_insn (gen_lwp_llwpcb (op0));
24319 case IX86_BUILTIN_SLWPCB:
24320 icode = CODE_FOR_lwp_slwpcb;
24322 || ! (*insn_data[icode].operand[0].predicate) (target, Pmode))
24323 target = gen_reg_rtx (Pmode);
24324 emit_insn (gen_lwp_slwpcb (target));
24331 for (i = 0, d = bdesc_special_args;
24332 i < ARRAY_SIZE (bdesc_special_args);
24334 if (d->code == fcode)
24335 return ix86_expand_special_args_builtin (d, exp, target);
24337 for (i = 0, d = bdesc_args;
24338 i < ARRAY_SIZE (bdesc_args);
24340 if (d->code == fcode)
24343 case IX86_BUILTIN_FABSQ:
24344 case IX86_BUILTIN_COPYSIGNQ:
24346 /* Emit a normal call if SSE2 isn't available. */
24347 return expand_call (exp, target, ignore);
24349 return ix86_expand_args_builtin (d, exp, target);
24352 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
24353 if (d->code == fcode)
24354 return ix86_expand_sse_comi (d, exp, target);
24356 for (i = 0, d = bdesc_pcmpestr;
24357 i < ARRAY_SIZE (bdesc_pcmpestr);
24359 if (d->code == fcode)
24360 return ix86_expand_sse_pcmpestr (d, exp, target);
24362 for (i = 0, d = bdesc_pcmpistr;
24363 i < ARRAY_SIZE (bdesc_pcmpistr);
24365 if (d->code == fcode)
24366 return ix86_expand_sse_pcmpistr (d, exp, target);
24368 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
24369 if (d->code == fcode)
24370 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
24371 (enum ix86_builtin_func_type)
24372 d->flag, d->comparison);
24374 gcc_unreachable ();
24377 /* Returns a function decl for a vectorized version of the builtin function
24378 with builtin function code FN and the result vector type TYPE, or NULL_TREE
24379 if it is not available. */
24382 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
24385 enum machine_mode in_mode, out_mode;
24388 if (TREE_CODE (type_out) != VECTOR_TYPE
24389 || TREE_CODE (type_in) != VECTOR_TYPE)
24392 out_mode = TYPE_MODE (TREE_TYPE (type_out));
24393 out_n = TYPE_VECTOR_SUBPARTS (type_out);
24394 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24395 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24399 case BUILT_IN_SQRT:
24400 if (out_mode == DFmode && out_n == 2
24401 && in_mode == DFmode && in_n == 2)
24402 return ix86_builtins[IX86_BUILTIN_SQRTPD];
24405 case BUILT_IN_SQRTF:
24406 if (out_mode == SFmode && out_n == 4
24407 && in_mode == SFmode && in_n == 4)
24408 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
24411 case BUILT_IN_LRINT:
24412 if (out_mode == SImode && out_n == 4
24413 && in_mode == DFmode && in_n == 2)
24414 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
24417 case BUILT_IN_LRINTF:
24418 if (out_mode == SImode && out_n == 4
24419 && in_mode == SFmode && in_n == 4)
24420 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
24423 case BUILT_IN_COPYSIGN:
24424 if (out_mode == DFmode && out_n == 2
24425 && in_mode == DFmode && in_n == 2)
24426 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
24429 case BUILT_IN_COPYSIGNF:
24430 if (out_mode == SFmode && out_n == 4
24431 && in_mode == SFmode && in_n == 4)
24432 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
24439 /* Dispatch to a handler for a vectorization library. */
24440 if (ix86_veclib_handler)
24441 return (*ix86_veclib_handler) ((enum built_in_function) fn, type_out,
24447 /* Handler for an SVML-style interface to
24448 a library with vectorized intrinsics. */
24451 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
24454 tree fntype, new_fndecl, args;
24457 enum machine_mode el_mode, in_mode;
24460 /* The SVML is suitable for unsafe math only. */
24461 if (!flag_unsafe_math_optimizations)
24464 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24465 n = TYPE_VECTOR_SUBPARTS (type_out);
24466 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24467 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24468 if (el_mode != in_mode
24476 case BUILT_IN_LOG10:
24478 case BUILT_IN_TANH:
24480 case BUILT_IN_ATAN:
24481 case BUILT_IN_ATAN2:
24482 case BUILT_IN_ATANH:
24483 case BUILT_IN_CBRT:
24484 case BUILT_IN_SINH:
24486 case BUILT_IN_ASINH:
24487 case BUILT_IN_ASIN:
24488 case BUILT_IN_COSH:
24490 case BUILT_IN_ACOSH:
24491 case BUILT_IN_ACOS:
24492 if (el_mode != DFmode || n != 2)
24496 case BUILT_IN_EXPF:
24497 case BUILT_IN_LOGF:
24498 case BUILT_IN_LOG10F:
24499 case BUILT_IN_POWF:
24500 case BUILT_IN_TANHF:
24501 case BUILT_IN_TANF:
24502 case BUILT_IN_ATANF:
24503 case BUILT_IN_ATAN2F:
24504 case BUILT_IN_ATANHF:
24505 case BUILT_IN_CBRTF:
24506 case BUILT_IN_SINHF:
24507 case BUILT_IN_SINF:
24508 case BUILT_IN_ASINHF:
24509 case BUILT_IN_ASINF:
24510 case BUILT_IN_COSHF:
24511 case BUILT_IN_COSF:
24512 case BUILT_IN_ACOSHF:
24513 case BUILT_IN_ACOSF:
24514 if (el_mode != SFmode || n != 4)
24522 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24524 if (fn == BUILT_IN_LOGF)
24525 strcpy (name, "vmlsLn4");
24526 else if (fn == BUILT_IN_LOG)
24527 strcpy (name, "vmldLn2");
24530 sprintf (name, "vmls%s", bname+10);
24531 name[strlen (name)-1] = '4';
24534 sprintf (name, "vmld%s2", bname+10);
24536 /* Convert to uppercase. */
24540 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24541 args = TREE_CHAIN (args))
24545 fntype = build_function_type_list (type_out, type_in, NULL);
24547 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24549 /* Build a function declaration for the vectorized function. */
24550 new_fndecl = build_decl (BUILTINS_LOCATION,
24551 FUNCTION_DECL, get_identifier (name), fntype);
24552 TREE_PUBLIC (new_fndecl) = 1;
24553 DECL_EXTERNAL (new_fndecl) = 1;
24554 DECL_IS_NOVOPS (new_fndecl) = 1;
24555 TREE_READONLY (new_fndecl) = 1;
24560 /* Handler for an ACML-style interface to
24561 a library with vectorized intrinsics. */
24564 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
24566 char name[20] = "__vr.._";
24567 tree fntype, new_fndecl, args;
24570 enum machine_mode el_mode, in_mode;
24573 /* The ACML is 64bits only and suitable for unsafe math only as
24574 it does not correctly support parts of IEEE with the required
24575 precision such as denormals. */
24577 || !flag_unsafe_math_optimizations)
24580 el_mode = TYPE_MODE (TREE_TYPE (type_out));
24581 n = TYPE_VECTOR_SUBPARTS (type_out);
24582 in_mode = TYPE_MODE (TREE_TYPE (type_in));
24583 in_n = TYPE_VECTOR_SUBPARTS (type_in);
24584 if (el_mode != in_mode
24594 case BUILT_IN_LOG2:
24595 case BUILT_IN_LOG10:
24598 if (el_mode != DFmode
24603 case BUILT_IN_SINF:
24604 case BUILT_IN_COSF:
24605 case BUILT_IN_EXPF:
24606 case BUILT_IN_POWF:
24607 case BUILT_IN_LOGF:
24608 case BUILT_IN_LOG2F:
24609 case BUILT_IN_LOG10F:
24612 if (el_mode != SFmode
24621 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
24622 sprintf (name + 7, "%s", bname+10);
24625 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
24626 args = TREE_CHAIN (args))
24630 fntype = build_function_type_list (type_out, type_in, NULL);
24632 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
24634 /* Build a function declaration for the vectorized function. */
24635 new_fndecl = build_decl (BUILTINS_LOCATION,
24636 FUNCTION_DECL, get_identifier (name), fntype);
24637 TREE_PUBLIC (new_fndecl) = 1;
24638 DECL_EXTERNAL (new_fndecl) = 1;
24639 DECL_IS_NOVOPS (new_fndecl) = 1;
24640 TREE_READONLY (new_fndecl) = 1;
24646 /* Returns a decl of a function that implements conversion of an integer vector
24647 into a floating-point vector, or vice-versa. TYPE is the type of the integer
24648 side of the conversion.
24649 Return NULL_TREE if it is not available. */
24652 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
24654 if (! (TARGET_SSE2 && TREE_CODE (type) == VECTOR_TYPE))
24660 switch (TYPE_MODE (type))
24663 return TYPE_UNSIGNED (type)
24664 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
24665 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
24670 case FIX_TRUNC_EXPR:
24671 switch (TYPE_MODE (type))
24674 return TYPE_UNSIGNED (type)
24676 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
24686 /* Returns a code for a target-specific builtin that implements
24687 reciprocal of the function, or NULL_TREE if not available. */
24690 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
24691 bool sqrt ATTRIBUTE_UNUSED)
24693 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
24694 && flag_finite_math_only && !flag_trapping_math
24695 && flag_unsafe_math_optimizations))
24699 /* Machine dependent builtins. */
24702 /* Vectorized version of sqrt to rsqrt conversion. */
24703 case IX86_BUILTIN_SQRTPS_NR:
24704 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
24710 /* Normal builtins. */
24713 /* Sqrt to rsqrt conversion. */
24714 case BUILT_IN_SQRTF:
24715 return ix86_builtins[IX86_BUILTIN_RSQRTF];
24722 /* Helper for avx_vpermilps256_operand et al. This is also used by
24723 the expansion functions to turn the parallel back into a mask.
24724 The return value is 0 for no match and the imm8+1 for a match. */
24727 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
24729 unsigned i, nelt = GET_MODE_NUNITS (mode);
24731 unsigned char ipar[8];
24733 if (XVECLEN (par, 0) != (int) nelt)
24736 /* Validate that all of the elements are constants, and not totally
24737 out of range. Copy the data into an integral array to make the
24738 subsequent checks easier. */
24739 for (i = 0; i < nelt; ++i)
24741 rtx er = XVECEXP (par, 0, i);
24742 unsigned HOST_WIDE_INT ei;
24744 if (!CONST_INT_P (er))
24755 /* In the 256-bit DFmode case, we can only move elements within
24757 for (i = 0; i < 2; ++i)
24761 mask |= ipar[i] << i;
24763 for (i = 2; i < 4; ++i)
24767 mask |= (ipar[i] - 2) << i;
24772 /* In the 256-bit SFmode case, we have full freedom of movement
24773 within the low 128-bit lane, but the high 128-bit lane must
24774 mirror the exact same pattern. */
24775 for (i = 0; i < 4; ++i)
24776 if (ipar[i] + 4 != ipar[i + 4])
24783 /* In the 128-bit case, we've full freedom in the placement of
24784 the elements from the source operand. */
24785 for (i = 0; i < nelt; ++i)
24786 mask |= ipar[i] << (i * (nelt / 2));
24790 gcc_unreachable ();
24793 /* Make sure success has a non-zero value by adding one. */
24797 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
24798 the expansion functions to turn the parallel back into a mask.
24799 The return value is 0 for no match and the imm8+1 for a match. */
24802 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
24804 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
24806 unsigned char ipar[8];
24808 if (XVECLEN (par, 0) != (int) nelt)
24811 /* Validate that all of the elements are constants, and not totally
24812 out of range. Copy the data into an integral array to make the
24813 subsequent checks easier. */
24814 for (i = 0; i < nelt; ++i)
24816 rtx er = XVECEXP (par, 0, i);
24817 unsigned HOST_WIDE_INT ei;
24819 if (!CONST_INT_P (er))
24822 if (ei >= 2 * nelt)
24827 /* Validate that the halves of the permute are halves. */
24828 for (i = 0; i < nelt2 - 1; ++i)
24829 if (ipar[i] + 1 != ipar[i + 1])
24831 for (i = nelt2; i < nelt - 1; ++i)
24832 if (ipar[i] + 1 != ipar[i + 1])
24835 /* Reconstruct the mask. */
24836 for (i = 0; i < 2; ++i)
24838 unsigned e = ipar[i * nelt2];
24842 mask |= e << (i * 4);
24845 /* Make sure success has a non-zero value by adding one. */
24850 /* Store OPERAND to the memory after reload is completed. This means
24851 that we can't easily use assign_stack_local. */
24853 ix86_force_to_memory (enum machine_mode mode, rtx operand)
24857 gcc_assert (reload_completed);
24858 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
24860 result = gen_rtx_MEM (mode,
24861 gen_rtx_PLUS (Pmode,
24863 GEN_INT (-RED_ZONE_SIZE)));
24864 emit_move_insn (result, operand);
24866 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
24872 operand = gen_lowpart (DImode, operand);
24876 gen_rtx_SET (VOIDmode,
24877 gen_rtx_MEM (DImode,
24878 gen_rtx_PRE_DEC (DImode,
24879 stack_pointer_rtx)),
24883 gcc_unreachable ();
24885 result = gen_rtx_MEM (mode, stack_pointer_rtx);
24894 split_di (&operand, 1, operands, operands + 1);
24896 gen_rtx_SET (VOIDmode,
24897 gen_rtx_MEM (SImode,
24898 gen_rtx_PRE_DEC (Pmode,
24899 stack_pointer_rtx)),
24902 gen_rtx_SET (VOIDmode,
24903 gen_rtx_MEM (SImode,
24904 gen_rtx_PRE_DEC (Pmode,
24905 stack_pointer_rtx)),
24910 /* Store HImodes as SImodes. */
24911 operand = gen_lowpart (SImode, operand);
24915 gen_rtx_SET (VOIDmode,
24916 gen_rtx_MEM (GET_MODE (operand),
24917 gen_rtx_PRE_DEC (SImode,
24918 stack_pointer_rtx)),
24922 gcc_unreachable ();
24924 result = gen_rtx_MEM (mode, stack_pointer_rtx);
24929 /* Free operand from the memory. */
24931 ix86_free_from_memory (enum machine_mode mode)
24933 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
24937 if (mode == DImode || TARGET_64BIT)
24941 /* Use LEA to deallocate stack space. In peephole2 it will be converted
24942 to pop or add instruction if registers are available. */
24943 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
24944 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
24949 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
24950 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
24952 static const enum reg_class *
24953 i386_ira_cover_classes (void)
24955 static const enum reg_class sse_fpmath_classes[] = {
24956 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
24958 static const enum reg_class no_sse_fpmath_classes[] = {
24959 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
24962 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
24965 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
24966 QImode must go into class Q_REGS.
24967 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
24968 movdf to do mem-to-mem moves through integer regs. */
24970 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
24972 enum machine_mode mode = GET_MODE (x);
24974 /* We're only allowed to return a subclass of CLASS. Many of the
24975 following checks fail for NO_REGS, so eliminate that early. */
24976 if (regclass == NO_REGS)
24979 /* All classes can load zeros. */
24980 if (x == CONST0_RTX (mode))
24983 /* Force constants into memory if we are loading a (nonzero) constant into
24984 an MMX or SSE register. This is because there are no MMX/SSE instructions
24985 to load from a constant. */
24987 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
24990 /* Prefer SSE regs only, if we can use them for math. */
24991 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
24992 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
24994 /* Floating-point constants need more complex checks. */
24995 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
24997 /* General regs can load everything. */
24998 if (reg_class_subset_p (regclass, GENERAL_REGS))
25001 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25002 zero above. We only want to wind up preferring 80387 registers if
25003 we plan on doing computation with them. */
25005 && standard_80387_constant_p (x))
25007 /* Limit class to non-sse. */
25008 if (regclass == FLOAT_SSE_REGS)
25010 if (regclass == FP_TOP_SSE_REGS)
25012 if (regclass == FP_SECOND_SSE_REGS)
25013 return FP_SECOND_REG;
25014 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25021 /* Generally when we see PLUS here, it's the function invariant
25022 (plus soft-fp const_int). Which can only be computed into general
25024 if (GET_CODE (x) == PLUS)
25025 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25027 /* QImode constants are easy to load, but non-constant QImode data
25028 must go into Q_REGS. */
25029 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25031 if (reg_class_subset_p (regclass, Q_REGS))
25033 if (reg_class_subset_p (Q_REGS, regclass))
25041 /* Discourage putting floating-point values in SSE registers unless
25042 SSE math is being used, and likewise for the 387 registers. */
25044 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25046 enum machine_mode mode = GET_MODE (x);
25048 /* Restrict the output reload class to the register bank that we are doing
25049 math on. If we would like not to return a subset of CLASS, reject this
25050 alternative: if reload cannot do this, it will still use its choice. */
25051 mode = GET_MODE (x);
25052 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25053 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25055 if (X87_FLOAT_MODE_P (mode))
25057 if (regclass == FP_TOP_SSE_REGS)
25059 else if (regclass == FP_SECOND_SSE_REGS)
25060 return FP_SECOND_REG;
25062 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25068 static enum reg_class
25069 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25070 enum machine_mode mode,
25071 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25073 /* QImode spills from non-QI registers require
25074 intermediate register on 32bit targets. */
25075 if (!in_p && mode == QImode && !TARGET_64BIT
25076 && (rclass == GENERAL_REGS
25077 || rclass == LEGACY_REGS
25078 || rclass == INDEX_REGS))
25087 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25088 regno = true_regnum (x);
25090 /* Return Q_REGS if the operand is in memory. */
25098 /* If we are copying between general and FP registers, we need a memory
25099 location. The same is true for SSE and MMX registers.
25101 To optimize register_move_cost performance, allow inline variant.
25103 The macro can't work reliably when one of the CLASSES is class containing
25104 registers from multiple units (SSE, MMX, integer). We avoid this by never
25105 combining those units in single alternative in the machine description.
25106 Ensure that this constraint holds to avoid unexpected surprises.
25108 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25109 enforce these sanity checks. */
25112 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25113 enum machine_mode mode, int strict)
25115 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25116 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25117 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25118 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25119 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25120 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25122 gcc_assert (!strict);
25126 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25129 /* ??? This is a lie. We do have moves between mmx/general, and for
25130 mmx/sse2. But by saying we need secondary memory we discourage the
25131 register allocator from using the mmx registers unless needed. */
25132 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25135 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25137 /* SSE1 doesn't have any direct moves from other classes. */
25141 /* If the target says that inter-unit moves are more expensive
25142 than moving through memory, then don't generate them. */
25143 if (!TARGET_INTER_UNIT_MOVES)
25146 /* Between SSE and general, we have moves no larger than word size. */
25147 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25155 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25156 enum machine_mode mode, int strict)
25158 return inline_secondary_memory_needed (class1, class2, mode, strict);
25161 /* Return true if the registers in CLASS cannot represent the change from
25162 modes FROM to TO. */
25165 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25166 enum reg_class regclass)
25171 /* x87 registers can't do subreg at all, as all values are reformatted
25172 to extended precision. */
25173 if (MAYBE_FLOAT_CLASS_P (regclass))
25176 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25178 /* Vector registers do not support QI or HImode loads. If we don't
25179 disallow a change to these modes, reload will assume it's ok to
25180 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
25181 the vec_dupv4hi pattern. */
25182 if (GET_MODE_SIZE (from) < 4)
25185 /* Vector registers do not support subreg with nonzero offsets, which
25186 are otherwise valid for integer registers. Since we can't see
25187 whether we have a nonzero offset from here, prohibit all
25188 nonparadoxical subregs changing size. */
25189 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25196 /* Return the cost of moving data of mode M between a
25197 register and memory. A value of 2 is the default; this cost is
25198 relative to those in `REGISTER_MOVE_COST'.
25200 This function is used extensively by register_move_cost that is used to
25201 build tables at startup. Make it inline in this case.
25202 When IN is 2, return maximum of in and out move cost.
25204 If moving between registers and memory is more expensive than
25205 between two registers, you should define this macro to express the
25208 Model also increased moving costs of QImode registers in non
25212 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25216 if (FLOAT_CLASS_P (regclass))
25234 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25235 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25237 if (SSE_CLASS_P (regclass))
25240 switch (GET_MODE_SIZE (mode))
25255 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25256 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25258 if (MMX_CLASS_P (regclass))
25261 switch (GET_MODE_SIZE (mode))
25273 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25274 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25276 switch (GET_MODE_SIZE (mode))
25279 if (Q_CLASS_P (regclass) || TARGET_64BIT)
25282 return ix86_cost->int_store[0];
25283 if (TARGET_PARTIAL_REG_DEPENDENCY
25284 && optimize_function_for_speed_p (cfun))
25285 cost = ix86_cost->movzbl_load;
25287 cost = ix86_cost->int_load[0];
25289 return MAX (cost, ix86_cost->int_store[0]);
25295 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25297 return ix86_cost->movzbl_load;
25299 return ix86_cost->int_store[0] + 4;
25304 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25305 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25307 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
25308 if (mode == TFmode)
25311 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25313 cost = ix86_cost->int_load[2];
25315 cost = ix86_cost->int_store[2];
25316 return (cost * (((int) GET_MODE_SIZE (mode)
25317 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25322 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25324 return inline_memory_move_cost (mode, regclass, in);
25328 /* Return the cost of moving data from a register in class CLASS1 to
25329 one in class CLASS2.
25331 It is not required that the cost always equal 2 when FROM is the same as TO;
25332 on some machines it is expensive to move between registers if they are not
25333 general registers. */
25336 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25337 enum reg_class class2)
25339 /* In case we require secondary memory, compute cost of the store followed
25340 by load. In order to avoid bad register allocation choices, we need
25341 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
25343 if (inline_secondary_memory_needed (class1, class2, mode, 0))
25347 cost += inline_memory_move_cost (mode, class1, 2);
25348 cost += inline_memory_move_cost (mode, class2, 2);
25350 /* In case of copying from general_purpose_register we may emit multiple
25351 stores followed by single load causing memory size mismatch stall.
25352 Count this as arbitrarily high cost of 20. */
25353 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25356 /* In the case of FP/MMX moves, the registers actually overlap, and we
25357 have to switch modes in order to treat them differently. */
25358 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25359 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25365 /* Moves between SSE/MMX and integer unit are expensive. */
25366 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25367 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25369 /* ??? By keeping returned value relatively high, we limit the number
25370 of moves between integer and MMX/SSE registers for all targets.
25371 Additionally, high value prevents problem with x86_modes_tieable_p(),
25372 where integer modes in MMX/SSE registers are not tieable
25373 because of missing QImode and HImode moves to, from or between
25374 MMX/SSE registers. */
25375 return MAX (8, ix86_cost->mmxsse_to_integer);
25377 if (MAYBE_FLOAT_CLASS_P (class1))
25378 return ix86_cost->fp_move;
25379 if (MAYBE_SSE_CLASS_P (class1))
25380 return ix86_cost->sse_move;
25381 if (MAYBE_MMX_CLASS_P (class1))
25382 return ix86_cost->mmx_move;
25386 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
25389 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
25391 /* Flags and only flags can only hold CCmode values. */
25392 if (CC_REGNO_P (regno))
25393 return GET_MODE_CLASS (mode) == MODE_CC;
25394 if (GET_MODE_CLASS (mode) == MODE_CC
25395 || GET_MODE_CLASS (mode) == MODE_RANDOM
25396 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
25398 if (FP_REGNO_P (regno))
25399 return VALID_FP_MODE_P (mode);
25400 if (SSE_REGNO_P (regno))
25402 /* We implement the move patterns for all vector modes into and
25403 out of SSE registers, even when no operation instructions
25404 are available. OImode move is available only when AVX is
25406 return ((TARGET_AVX && mode == OImode)
25407 || VALID_AVX256_REG_MODE (mode)
25408 || VALID_SSE_REG_MODE (mode)
25409 || VALID_SSE2_REG_MODE (mode)
25410 || VALID_MMX_REG_MODE (mode)
25411 || VALID_MMX_REG_MODE_3DNOW (mode));
25413 if (MMX_REGNO_P (regno))
25415 /* We implement the move patterns for 3DNOW modes even in MMX mode,
25416 so if the register is available at all, then we can move data of
25417 the given mode into or out of it. */
25418 return (VALID_MMX_REG_MODE (mode)
25419 || VALID_MMX_REG_MODE_3DNOW (mode));
25422 if (mode == QImode)
25424 /* Take care for QImode values - they can be in non-QI regs,
25425 but then they do cause partial register stalls. */
25426 if (regno <= BX_REG || TARGET_64BIT)
25428 if (!TARGET_PARTIAL_REG_STALL)
25430 return reload_in_progress || reload_completed;
25432 /* We handle both integer and floats in the general purpose registers. */
25433 else if (VALID_INT_MODE_P (mode))
25435 else if (VALID_FP_MODE_P (mode))
25437 else if (VALID_DFP_MODE_P (mode))
25439 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
25440 on to use that value in smaller contexts, this can easily force a
25441 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
25442 supporting DImode, allow it. */
25443 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
25449 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
25450 tieable integer mode. */
25453 ix86_tieable_integer_mode_p (enum machine_mode mode)
25462 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
25465 return TARGET_64BIT;
25472 /* Return true if MODE1 is accessible in a register that can hold MODE2
25473 without copying. That is, all register classes that can hold MODE2
25474 can also hold MODE1. */
25477 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
25479 if (mode1 == mode2)
25482 if (ix86_tieable_integer_mode_p (mode1)
25483 && ix86_tieable_integer_mode_p (mode2))
25486 /* MODE2 being XFmode implies fp stack or general regs, which means we
25487 can tie any smaller floating point modes to it. Note that we do not
25488 tie this with TFmode. */
25489 if (mode2 == XFmode)
25490 return mode1 == SFmode || mode1 == DFmode;
25492 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
25493 that we can tie it with SFmode. */
25494 if (mode2 == DFmode)
25495 return mode1 == SFmode;
25497 /* If MODE2 is only appropriate for an SSE register, then tie with
25498 any other mode acceptable to SSE registers. */
25499 if (GET_MODE_SIZE (mode2) == 16
25500 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
25501 return (GET_MODE_SIZE (mode1) == 16
25502 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
25504 /* If MODE2 is appropriate for an MMX register, then tie
25505 with any other mode acceptable to MMX registers. */
25506 if (GET_MODE_SIZE (mode2) == 8
25507 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
25508 return (GET_MODE_SIZE (mode1) == 8
25509 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
25514 /* Compute a (partial) cost for rtx X. Return true if the complete
25515 cost has been computed, and false if subexpressions should be
25516 scanned. In either case, *TOTAL contains the cost result. */
25519 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
25521 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
25522 enum machine_mode mode = GET_MODE (x);
25523 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
25531 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
25533 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
25535 else if (flag_pic && SYMBOLIC_CONST (x)
25537 || (!GET_CODE (x) != LABEL_REF
25538 && (GET_CODE (x) != SYMBOL_REF
25539 || !SYMBOL_REF_LOCAL_P (x)))))
25546 if (mode == VOIDmode)
25549 switch (standard_80387_constant_p (x))
25554 default: /* Other constants */
25559 /* Start with (MEM (SYMBOL_REF)), since that's where
25560 it'll probably end up. Add a penalty for size. */
25561 *total = (COSTS_N_INSNS (1)
25562 + (flag_pic != 0 && !TARGET_64BIT)
25563 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
25569 /* The zero extensions is often completely free on x86_64, so make
25570 it as cheap as possible. */
25571 if (TARGET_64BIT && mode == DImode
25572 && GET_MODE (XEXP (x, 0)) == SImode)
25574 else if (TARGET_ZERO_EXTEND_WITH_AND)
25575 *total = cost->add;
25577 *total = cost->movzx;
25581 *total = cost->movsx;
25585 if (CONST_INT_P (XEXP (x, 1))
25586 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
25588 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25591 *total = cost->add;
25594 if ((value == 2 || value == 3)
25595 && cost->lea <= cost->shift_const)
25597 *total = cost->lea;
25607 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
25609 if (CONST_INT_P (XEXP (x, 1)))
25611 if (INTVAL (XEXP (x, 1)) > 32)
25612 *total = cost->shift_const + COSTS_N_INSNS (2);
25614 *total = cost->shift_const * 2;
25618 if (GET_CODE (XEXP (x, 1)) == AND)
25619 *total = cost->shift_var * 2;
25621 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
25626 if (CONST_INT_P (XEXP (x, 1)))
25627 *total = cost->shift_const;
25629 *total = cost->shift_var;
25634 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25636 /* ??? SSE scalar cost should be used here. */
25637 *total = cost->fmul;
25640 else if (X87_FLOAT_MODE_P (mode))
25642 *total = cost->fmul;
25645 else if (FLOAT_MODE_P (mode))
25647 /* ??? SSE vector cost should be used here. */
25648 *total = cost->fmul;
25653 rtx op0 = XEXP (x, 0);
25654 rtx op1 = XEXP (x, 1);
25656 if (CONST_INT_P (XEXP (x, 1)))
25658 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
25659 for (nbits = 0; value != 0; value &= value - 1)
25663 /* This is arbitrary. */
25666 /* Compute costs correctly for widening multiplication. */
25667 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
25668 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
25669 == GET_MODE_SIZE (mode))
25671 int is_mulwiden = 0;
25672 enum machine_mode inner_mode = GET_MODE (op0);
25674 if (GET_CODE (op0) == GET_CODE (op1))
25675 is_mulwiden = 1, op1 = XEXP (op1, 0);
25676 else if (CONST_INT_P (op1))
25678 if (GET_CODE (op0) == SIGN_EXTEND)
25679 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
25682 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
25686 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
25689 *total = (cost->mult_init[MODE_INDEX (mode)]
25690 + nbits * cost->mult_bit
25691 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
25700 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25701 /* ??? SSE cost should be used here. */
25702 *total = cost->fdiv;
25703 else if (X87_FLOAT_MODE_P (mode))
25704 *total = cost->fdiv;
25705 else if (FLOAT_MODE_P (mode))
25706 /* ??? SSE vector cost should be used here. */
25707 *total = cost->fdiv;
25709 *total = cost->divide[MODE_INDEX (mode)];
25713 if (GET_MODE_CLASS (mode) == MODE_INT
25714 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
25716 if (GET_CODE (XEXP (x, 0)) == PLUS
25717 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
25718 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
25719 && CONSTANT_P (XEXP (x, 1)))
25721 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
25722 if (val == 2 || val == 4 || val == 8)
25724 *total = cost->lea;
25725 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25726 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
25727 outer_code, speed);
25728 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25732 else if (GET_CODE (XEXP (x, 0)) == MULT
25733 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
25735 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
25736 if (val == 2 || val == 4 || val == 8)
25738 *total = cost->lea;
25739 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25740 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25744 else if (GET_CODE (XEXP (x, 0)) == PLUS)
25746 *total = cost->lea;
25747 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
25748 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
25749 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
25756 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25758 /* ??? SSE cost should be used here. */
25759 *total = cost->fadd;
25762 else if (X87_FLOAT_MODE_P (mode))
25764 *total = cost->fadd;
25767 else if (FLOAT_MODE_P (mode))
25769 /* ??? SSE vector cost should be used here. */
25770 *total = cost->fadd;
25778 if (!TARGET_64BIT && mode == DImode)
25780 *total = (cost->add * 2
25781 + (rtx_cost (XEXP (x, 0), outer_code, speed)
25782 << (GET_MODE (XEXP (x, 0)) != DImode))
25783 + (rtx_cost (XEXP (x, 1), outer_code, speed)
25784 << (GET_MODE (XEXP (x, 1)) != DImode)));
25790 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25792 /* ??? SSE cost should be used here. */
25793 *total = cost->fchs;
25796 else if (X87_FLOAT_MODE_P (mode))
25798 *total = cost->fchs;
25801 else if (FLOAT_MODE_P (mode))
25803 /* ??? SSE vector cost should be used here. */
25804 *total = cost->fchs;
25810 if (!TARGET_64BIT && mode == DImode)
25811 *total = cost->add * 2;
25813 *total = cost->add;
25817 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
25818 && XEXP (XEXP (x, 0), 1) == const1_rtx
25819 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
25820 && XEXP (x, 1) == const0_rtx)
25822 /* This kind of construct is implemented using test[bwl].
25823 Treat it as if we had an AND. */
25824 *total = (cost->add
25825 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
25826 + rtx_cost (const1_rtx, outer_code, speed));
25832 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
25837 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25838 /* ??? SSE cost should be used here. */
25839 *total = cost->fabs;
25840 else if (X87_FLOAT_MODE_P (mode))
25841 *total = cost->fabs;
25842 else if (FLOAT_MODE_P (mode))
25843 /* ??? SSE vector cost should be used here. */
25844 *total = cost->fabs;
25848 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
25849 /* ??? SSE cost should be used here. */
25850 *total = cost->fsqrt;
25851 else if (X87_FLOAT_MODE_P (mode))
25852 *total = cost->fsqrt;
25853 else if (FLOAT_MODE_P (mode))
25854 /* ??? SSE vector cost should be used here. */
25855 *total = cost->fsqrt;
25859 if (XINT (x, 1) == UNSPEC_TP)
25866 case VEC_DUPLICATE:
25867 /* ??? Assume all of these vector manipulation patterns are
25868 recognizable. In which case they all pretty much have the
25870 *total = COSTS_N_INSNS (1);
25880 static int current_machopic_label_num;
25882 /* Given a symbol name and its associated stub, write out the
25883 definition of the stub. */
25886 machopic_output_stub (FILE *file, const char *symb, const char *stub)
25888 unsigned int length;
25889 char *binder_name, *symbol_name, lazy_ptr_name[32];
25890 int label = ++current_machopic_label_num;
25892 /* For 64-bit we shouldn't get here. */
25893 gcc_assert (!TARGET_64BIT);
25895 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
25896 symb = (*targetm.strip_name_encoding) (symb);
25898 length = strlen (stub);
25899 binder_name = XALLOCAVEC (char, length + 32);
25900 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
25902 length = strlen (symb);
25903 symbol_name = XALLOCAVEC (char, length + 32);
25904 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
25906 sprintf (lazy_ptr_name, "L%d$lz", label);
25909 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
25911 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
25913 fprintf (file, "%s:\n", stub);
25914 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25918 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
25919 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
25920 fprintf (file, "\tjmp\t*%%edx\n");
25923 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
25925 fprintf (file, "%s:\n", binder_name);
25929 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
25930 fputs ("\tpushl\t%eax\n", file);
25933 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
25935 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
25937 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
25938 fprintf (file, "%s:\n", lazy_ptr_name);
25939 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
25940 fprintf (file, ASM_LONG "%s\n", binder_name);
25944 darwin_x86_file_end (void)
25946 darwin_file_end ();
25949 #endif /* TARGET_MACHO */
25951 /* Order the registers for register allocator. */
25954 x86_order_regs_for_local_alloc (void)
25959 /* First allocate the local general purpose registers. */
25960 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25961 if (GENERAL_REGNO_P (i) && call_used_regs[i])
25962 reg_alloc_order [pos++] = i;
25964 /* Global general purpose registers. */
25965 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
25966 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
25967 reg_alloc_order [pos++] = i;
25969 /* x87 registers come first in case we are doing FP math
25971 if (!TARGET_SSE_MATH)
25972 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25973 reg_alloc_order [pos++] = i;
25975 /* SSE registers. */
25976 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
25977 reg_alloc_order [pos++] = i;
25978 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
25979 reg_alloc_order [pos++] = i;
25981 /* x87 registers. */
25982 if (TARGET_SSE_MATH)
25983 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
25984 reg_alloc_order [pos++] = i;
25986 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
25987 reg_alloc_order [pos++] = i;
25989 /* Initialize the rest of array as we do not allocate some registers
25991 while (pos < FIRST_PSEUDO_REGISTER)
25992 reg_alloc_order [pos++] = 0;
25995 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
25996 struct attribute_spec.handler. */
25998 ix86_handle_abi_attribute (tree *node, tree name,
25999 tree args ATTRIBUTE_UNUSED,
26000 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26002 if (TREE_CODE (*node) != FUNCTION_TYPE
26003 && TREE_CODE (*node) != METHOD_TYPE
26004 && TREE_CODE (*node) != FIELD_DECL
26005 && TREE_CODE (*node) != TYPE_DECL)
26007 warning (OPT_Wattributes, "%qE attribute only applies to functions",
26009 *no_add_attrs = true;
26014 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
26016 *no_add_attrs = true;
26020 /* Can combine regparm with all attributes but fastcall. */
26021 if (is_attribute_p ("ms_abi", name))
26023 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26025 error ("ms_abi and sysv_abi attributes are not compatible");
26030 else if (is_attribute_p ("sysv_abi", name))
26032 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26034 error ("ms_abi and sysv_abi attributes are not compatible");
26043 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26044 struct attribute_spec.handler. */
26046 ix86_handle_struct_attribute (tree *node, tree name,
26047 tree args ATTRIBUTE_UNUSED,
26048 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26051 if (DECL_P (*node))
26053 if (TREE_CODE (*node) == TYPE_DECL)
26054 type = &TREE_TYPE (*node);
26059 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26060 || TREE_CODE (*type) == UNION_TYPE)))
26062 warning (OPT_Wattributes, "%qE attribute ignored",
26064 *no_add_attrs = true;
26067 else if ((is_attribute_p ("ms_struct", name)
26068 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26069 || ((is_attribute_p ("gcc_struct", name)
26070 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26072 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
26074 *no_add_attrs = true;
26081 ix86_handle_fndecl_attribute (tree *node, tree name,
26082 tree args ATTRIBUTE_UNUSED,
26083 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26085 if (TREE_CODE (*node) != FUNCTION_DECL)
26087 warning (OPT_Wattributes, "%qE attribute only applies to functions",
26089 *no_add_attrs = true;
26095 warning (OPT_Wattributes, "%qE attribute only available for 32-bit",
26100 #ifndef HAVE_AS_IX86_SWAP
26101 sorry ("ms_hook_prologue attribute needs assembler swap suffix support");
26108 ix86_ms_bitfield_layout_p (const_tree record_type)
26110 return (TARGET_MS_BITFIELD_LAYOUT &&
26111 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26112 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26115 /* Returns an expression indicating where the this parameter is
26116 located on entry to the FUNCTION. */
26119 x86_this_parameter (tree function)
26121 tree type = TREE_TYPE (function);
26122 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26127 const int *parm_regs;
26129 if (ix86_function_type_abi (type) == MS_ABI)
26130 parm_regs = x86_64_ms_abi_int_parameter_registers;
26132 parm_regs = x86_64_int_parameter_registers;
26133 return gen_rtx_REG (DImode, parm_regs[aggr]);
26136 nregs = ix86_function_regparm (type, function);
26138 if (nregs > 0 && !stdarg_p (type))
26142 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26143 regno = aggr ? DX_REG : CX_REG;
26151 return gen_rtx_MEM (SImode,
26152 plus_constant (stack_pointer_rtx, 4));
26155 return gen_rtx_REG (SImode, regno);
26158 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26161 /* Determine whether x86_output_mi_thunk can succeed. */
26164 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26165 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26166 HOST_WIDE_INT vcall_offset, const_tree function)
26168 /* 64-bit can handle anything. */
26172 /* For 32-bit, everything's fine if we have one free register. */
26173 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26176 /* Need a free register for vcall_offset. */
26180 /* Need a free register for GOT references. */
26181 if (flag_pic && !(*targetm.binds_local_p) (function))
26184 /* Otherwise ok. */
26188 /* Output the assembler code for a thunk function. THUNK_DECL is the
26189 declaration for the thunk function itself, FUNCTION is the decl for
26190 the target function. DELTA is an immediate constant offset to be
26191 added to THIS. If VCALL_OFFSET is nonzero, the word at
26192 *(*this + vcall_offset) should be added to THIS. */
26195 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26196 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26197 HOST_WIDE_INT vcall_offset, tree function)
26200 rtx this_param = x86_this_parameter (function);
26203 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
26204 pull it in now and let DELTA benefit. */
26205 if (REG_P (this_param))
26206 this_reg = this_param;
26207 else if (vcall_offset)
26209 /* Put the this parameter into %eax. */
26210 xops[0] = this_param;
26211 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26212 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26215 this_reg = NULL_RTX;
26217 /* Adjust the this parameter by a fixed constant. */
26220 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
26221 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
26222 bool sub = delta < 0 || delta == 128;
26223 xops[0] = GEN_INT (sub ? -delta : delta);
26224 xops[1] = this_reg ? this_reg : this_param;
26227 if (!x86_64_general_operand (xops[0], DImode))
26229 tmp = gen_rtx_REG (DImode, R10_REG);
26231 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26233 xops[1] = this_param;
26236 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
26238 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26241 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
26243 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26246 /* Adjust the this parameter by a value stored in the vtable. */
26250 tmp = gen_rtx_REG (DImode, R10_REG);
26253 int tmp_regno = CX_REG;
26254 if (lookup_attribute ("fastcall",
26255 TYPE_ATTRIBUTES (TREE_TYPE (function))))
26256 tmp_regno = AX_REG;
26257 tmp = gen_rtx_REG (SImode, tmp_regno);
26260 xops[0] = gen_rtx_MEM (Pmode, this_reg);
26262 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26264 /* Adjust the this parameter. */
26265 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26266 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26268 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26269 xops[0] = GEN_INT (vcall_offset);
26271 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26272 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26274 xops[1] = this_reg;
26275 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26278 /* If necessary, drop THIS back to its stack slot. */
26279 if (this_reg && this_reg != this_param)
26281 xops[0] = this_reg;
26282 xops[1] = this_param;
26283 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26286 xops[0] = XEXP (DECL_RTL (function), 0);
26289 if (!flag_pic || (*targetm.binds_local_p) (function))
26290 output_asm_insn ("jmp\t%P0", xops);
26291 /* All thunks should be in the same object as their target,
26292 and thus binds_local_p should be true. */
26293 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26294 gcc_unreachable ();
26297 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26298 tmp = gen_rtx_CONST (Pmode, tmp);
26299 tmp = gen_rtx_MEM (QImode, tmp);
26301 output_asm_insn ("jmp\t%A0", xops);
26306 if (!flag_pic || (*targetm.binds_local_p) (function))
26307 output_asm_insn ("jmp\t%P0", xops);
26312 rtx sym_ref = XEXP (DECL_RTL (function), 0);
26313 tmp = (gen_rtx_SYMBOL_REF
26315 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26316 tmp = gen_rtx_MEM (QImode, tmp);
26318 output_asm_insn ("jmp\t%0", xops);
26321 #endif /* TARGET_MACHO */
26323 tmp = gen_rtx_REG (SImode, CX_REG);
26324 output_set_got (tmp, NULL_RTX);
26327 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26328 output_asm_insn ("jmp\t{*}%1", xops);
26334 x86_file_start (void)
26336 default_file_start ();
26338 darwin_file_start ();
26340 if (X86_FILE_START_VERSION_DIRECTIVE)
26341 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26342 if (X86_FILE_START_FLTUSED)
26343 fputs ("\t.global\t__fltused\n", asm_out_file);
26344 if (ix86_asm_dialect == ASM_INTEL)
26345 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26349 x86_field_alignment (tree field, int computed)
26351 enum machine_mode mode;
26352 tree type = TREE_TYPE (field);
26354 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26356 mode = TYPE_MODE (strip_array_types (type));
26357 if (mode == DFmode || mode == DCmode
26358 || GET_MODE_CLASS (mode) == MODE_INT
26359 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26360 return MIN (32, computed);
26364 /* Output assembler code to FILE to increment profiler label # LABELNO
26365 for profiling a function entry. */
26367 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26371 #ifndef NO_PROFILE_COUNTERS
26372 fprintf (file, "\tleaq\t" LPREFIX "P%d(%%rip),%%r11\n", labelno);
26375 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26376 fputs ("\tcall\t*" MCOUNT_NAME "@GOTPCREL(%rip)\n", file);
26378 fputs ("\tcall\t" MCOUNT_NAME "\n", file);
26382 #ifndef NO_PROFILE_COUNTERS
26383 fprintf (file, "\tleal\t" LPREFIX "P%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
26386 fputs ("\tcall\t*" MCOUNT_NAME "@GOT(%ebx)\n", file);
26390 #ifndef NO_PROFILE_COUNTERS
26391 fprintf (file, "\tmovl\t$" LPREFIX "P%d,%%" PROFILE_COUNT_REGISTER "\n",
26394 fputs ("\tcall\t" MCOUNT_NAME "\n", file);
26398 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
26399 /* We don't have exact information about the insn sizes, but we may assume
26400 quite safely that we are informed about all 1 byte insns and memory
26401 address sizes. This is enough to eliminate unnecessary padding in
26405 min_insn_size (rtx insn)
26409 if (!INSN_P (insn) || !active_insn_p (insn))
26412 /* Discard alignments we've emit and jump instructions. */
26413 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26414 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26416 if (JUMP_TABLE_DATA_P (insn))
26419 /* Important case - calls are always 5 bytes.
26420 It is common to have many calls in the row. */
26422 && symbolic_reference_mentioned_p (PATTERN (insn))
26423 && !SIBLING_CALL_P (insn))
26425 len = get_attr_length (insn);
26429 /* For normal instructions we rely on get_attr_length being exact,
26430 with a few exceptions. */
26431 if (!JUMP_P (insn))
26433 enum attr_type type = get_attr_type (insn);
26438 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
26439 || asm_noperands (PATTERN (insn)) >= 0)
26446 /* Otherwise trust get_attr_length. */
26450 l = get_attr_length_address (insn);
26451 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
26460 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
26464 ix86_avoid_jump_mispredicts (void)
26466 rtx insn, start = get_insns ();
26467 int nbytes = 0, njumps = 0;
26470 /* Look for all minimal intervals of instructions containing 4 jumps.
26471 The intervals are bounded by START and INSN. NBYTES is the total
26472 size of instructions in the interval including INSN and not including
26473 START. When the NBYTES is smaller than 16 bytes, it is possible
26474 that the end of START and INSN ends up in the same 16byte page.
26476 The smallest offset in the page INSN can start is the case where START
26477 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
26478 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
26480 for (insn = start; insn; insn = NEXT_INSN (insn))
26484 if (LABEL_P (insn))
26486 int align = label_to_alignment (insn);
26487 int max_skip = label_to_max_skip (insn);
26491 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
26492 already in the current 16 byte page, because otherwise
26493 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
26494 bytes to reach 16 byte boundary. */
26496 || (align <= 3 && max_skip != (1 << align) - 1))
26499 fprintf (dump_file, "Label %i with max_skip %i\n",
26500 INSN_UID (insn), max_skip);
26503 while (nbytes + max_skip >= 16)
26505 start = NEXT_INSN (start);
26506 if ((JUMP_P (start)
26507 && GET_CODE (PATTERN (start)) != ADDR_VEC
26508 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26510 njumps--, isjump = 1;
26513 nbytes -= min_insn_size (start);
26519 min_size = min_insn_size (insn);
26520 nbytes += min_size;
26522 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
26523 INSN_UID (insn), min_size);
26525 && GET_CODE (PATTERN (insn)) != ADDR_VEC
26526 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
26534 start = NEXT_INSN (start);
26535 if ((JUMP_P (start)
26536 && GET_CODE (PATTERN (start)) != ADDR_VEC
26537 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
26539 njumps--, isjump = 1;
26542 nbytes -= min_insn_size (start);
26544 gcc_assert (njumps >= 0);
26546 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
26547 INSN_UID (start), INSN_UID (insn), nbytes);
26549 if (njumps == 3 && isjump && nbytes < 16)
26551 int padsize = 15 - nbytes + min_insn_size (insn);
26554 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
26555 INSN_UID (insn), padsize);
26556 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
26562 /* AMD Athlon works faster
26563 when RET is not destination of conditional jump or directly preceded
26564 by other jump instruction. We avoid the penalty by inserting NOP just
26565 before the RET instructions in such cases. */
26567 ix86_pad_returns (void)
26572 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
26574 basic_block bb = e->src;
26575 rtx ret = BB_END (bb);
26577 bool replace = false;
26579 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
26580 || optimize_bb_for_size_p (bb))
26582 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
26583 if (active_insn_p (prev) || LABEL_P (prev))
26585 if (prev && LABEL_P (prev))
26590 FOR_EACH_EDGE (e, ei, bb->preds)
26591 if (EDGE_FREQUENCY (e) && e->src->index >= 0
26592 && !(e->flags & EDGE_FALLTHRU))
26597 prev = prev_active_insn (ret);
26599 && ((JUMP_P (prev) && any_condjump_p (prev))
26602 /* Empty functions get branch mispredict even when the jump destination
26603 is not visible to us. */
26604 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
26609 emit_jump_insn_before (gen_return_internal_long (), ret);
26615 /* Implement machine specific optimizations. We implement padding of returns
26616 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
26620 if (optimize && optimize_function_for_speed_p (cfun))
26622 if (TARGET_PAD_RETURNS)
26623 ix86_pad_returns ();
26624 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
26625 if (TARGET_FOUR_JUMP_LIMIT)
26626 ix86_avoid_jump_mispredicts ();
26631 /* Return nonzero when QImode register that must be represented via REX prefix
26634 x86_extended_QIreg_mentioned_p (rtx insn)
26637 extract_insn_cached (insn);
26638 for (i = 0; i < recog_data.n_operands; i++)
26639 if (REG_P (recog_data.operand[i])
26640 && REGNO (recog_data.operand[i]) > BX_REG)
26645 /* Return nonzero when P points to register encoded via REX prefix.
26646 Called via for_each_rtx. */
26648 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
26650 unsigned int regno;
26653 regno = REGNO (*p);
26654 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
26657 /* Return true when INSN mentions register that must be encoded using REX
26660 x86_extended_reg_mentioned_p (rtx insn)
26662 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
26663 extended_reg_mentioned_1, NULL);
26666 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
26667 optabs would emit if we didn't have TFmode patterns. */
26670 x86_emit_floatuns (rtx operands[2])
26672 rtx neglab, donelab, i0, i1, f0, in, out;
26673 enum machine_mode mode, inmode;
26675 inmode = GET_MODE (operands[1]);
26676 gcc_assert (inmode == SImode || inmode == DImode);
26679 in = force_reg (inmode, operands[1]);
26680 mode = GET_MODE (out);
26681 neglab = gen_label_rtx ();
26682 donelab = gen_label_rtx ();
26683 f0 = gen_reg_rtx (mode);
26685 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
26687 expand_float (out, in, 0);
26689 emit_jump_insn (gen_jump (donelab));
26692 emit_label (neglab);
26694 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
26696 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
26698 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
26700 expand_float (f0, i0, 0);
26702 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
26704 emit_label (donelab);
26707 /* AVX does not support 32-byte integer vector operations,
26708 thus the longest vector we are faced with is V16QImode. */
26709 #define MAX_VECT_LEN 16
26711 struct expand_vec_perm_d
26713 rtx target, op0, op1;
26714 unsigned char perm[MAX_VECT_LEN];
26715 enum machine_mode vmode;
26716 unsigned char nelt;
26720 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
26721 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
26723 /* Get a vector mode of the same size as the original but with elements
26724 twice as wide. This is only guaranteed to apply to integral vectors. */
26726 static inline enum machine_mode
26727 get_mode_wider_vector (enum machine_mode o)
26729 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
26730 enum machine_mode n = GET_MODE_WIDER_MODE (o);
26731 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
26732 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
26736 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26737 with all elements equal to VAR. Return true if successful. */
26740 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
26741 rtx target, rtx val)
26764 /* First attempt to recognize VAL as-is. */
26765 dup = gen_rtx_VEC_DUPLICATE (mode, val);
26766 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
26767 if (recog_memoized (insn) < 0)
26770 /* If that fails, force VAL into a register. */
26773 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
26774 seq = get_insns ();
26777 emit_insn_before (seq, insn);
26779 ok = recog_memoized (insn) >= 0;
26788 if (TARGET_SSE || TARGET_3DNOW_A)
26792 val = gen_lowpart (SImode, val);
26793 x = gen_rtx_TRUNCATE (HImode, val);
26794 x = gen_rtx_VEC_DUPLICATE (mode, x);
26795 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26808 struct expand_vec_perm_d dperm;
26812 memset (&dperm, 0, sizeof (dperm));
26813 dperm.target = target;
26814 dperm.vmode = mode;
26815 dperm.nelt = GET_MODE_NUNITS (mode);
26816 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
26818 /* Extend to SImode using a paradoxical SUBREG. */
26819 tmp1 = gen_reg_rtx (SImode);
26820 emit_move_insn (tmp1, gen_lowpart (SImode, val));
26822 /* Insert the SImode value as low element of a V4SImode vector. */
26823 tmp2 = gen_lowpart (V4SImode, dperm.op0);
26824 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
26826 ok = (expand_vec_perm_1 (&dperm)
26827 || expand_vec_perm_broadcast_1 (&dperm));
26839 /* Replicate the value once into the next wider mode and recurse. */
26841 enum machine_mode smode, wsmode, wvmode;
26844 smode = GET_MODE_INNER (mode);
26845 wvmode = get_mode_wider_vector (mode);
26846 wsmode = GET_MODE_INNER (wvmode);
26848 val = convert_modes (wsmode, smode, val, true);
26849 x = expand_simple_binop (wsmode, ASHIFT, val,
26850 GEN_INT (GET_MODE_BITSIZE (smode)),
26851 NULL_RTX, 1, OPTAB_LIB_WIDEN);
26852 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
26854 x = gen_lowpart (wvmode, target);
26855 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
26863 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
26864 rtx x = gen_reg_rtx (hvmode);
26866 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
26869 x = gen_rtx_VEC_CONCAT (mode, x, x);
26870 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26879 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
26880 whose ONE_VAR element is VAR, and other elements are zero. Return true
26884 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
26885 rtx target, rtx var, int one_var)
26887 enum machine_mode vsimode;
26890 bool use_vector_set = false;
26895 /* For SSE4.1, we normally use vector set. But if the second
26896 element is zero and inter-unit moves are OK, we use movq
26898 use_vector_set = (TARGET_64BIT
26900 && !(TARGET_INTER_UNIT_MOVES
26906 use_vector_set = TARGET_SSE4_1;
26909 use_vector_set = TARGET_SSE2;
26912 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
26919 use_vector_set = TARGET_AVX;
26922 /* Use ix86_expand_vector_set in 64bit mode only. */
26923 use_vector_set = TARGET_AVX && TARGET_64BIT;
26929 if (use_vector_set)
26931 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
26932 var = force_reg (GET_MODE_INNER (mode), var);
26933 ix86_expand_vector_set (mmx_ok, target, var, one_var);
26949 var = force_reg (GET_MODE_INNER (mode), var);
26950 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
26951 emit_insn (gen_rtx_SET (VOIDmode, target, x));
26956 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
26957 new_target = gen_reg_rtx (mode);
26959 new_target = target;
26960 var = force_reg (GET_MODE_INNER (mode), var);
26961 x = gen_rtx_VEC_DUPLICATE (mode, var);
26962 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
26963 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
26966 /* We need to shuffle the value to the correct position, so
26967 create a new pseudo to store the intermediate result. */
26969 /* With SSE2, we can use the integer shuffle insns. */
26970 if (mode != V4SFmode && TARGET_SSE2)
26972 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
26974 GEN_INT (one_var == 1 ? 0 : 1),
26975 GEN_INT (one_var == 2 ? 0 : 1),
26976 GEN_INT (one_var == 3 ? 0 : 1)));
26977 if (target != new_target)
26978 emit_move_insn (target, new_target);
26982 /* Otherwise convert the intermediate result to V4SFmode and
26983 use the SSE1 shuffle instructions. */
26984 if (mode != V4SFmode)
26986 tmp = gen_reg_rtx (V4SFmode);
26987 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
26992 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
26994 GEN_INT (one_var == 1 ? 0 : 1),
26995 GEN_INT (one_var == 2 ? 0+4 : 1+4),
26996 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
26998 if (mode != V4SFmode)
26999 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27000 else if (tmp != target)
27001 emit_move_insn (target, tmp);
27003 else if (target != new_target)
27004 emit_move_insn (target, new_target);
27009 vsimode = V4SImode;
27015 vsimode = V2SImode;
27021 /* Zero extend the variable element to SImode and recurse. */
27022 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27024 x = gen_reg_rtx (vsimode);
27025 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27027 gcc_unreachable ();
27029 emit_move_insn (target, gen_lowpart (mode, x));
27037 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27038 consisting of the values in VALS. It is known that all elements
27039 except ONE_VAR are constants. Return true if successful. */
27042 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27043 rtx target, rtx vals, int one_var)
27045 rtx var = XVECEXP (vals, 0, one_var);
27046 enum machine_mode wmode;
27049 const_vec = copy_rtx (vals);
27050 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27051 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27059 /* For the two element vectors, it's just as easy to use
27060 the general case. */
27064 /* Use ix86_expand_vector_set in 64bit mode only. */
27087 /* There's no way to set one QImode entry easily. Combine
27088 the variable value with its adjacent constant value, and
27089 promote to an HImode set. */
27090 x = XVECEXP (vals, 0, one_var ^ 1);
27093 var = convert_modes (HImode, QImode, var, true);
27094 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27095 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27096 x = GEN_INT (INTVAL (x) & 0xff);
27100 var = convert_modes (HImode, QImode, var, true);
27101 x = gen_int_mode (INTVAL (x) << 8, HImode);
27103 if (x != const0_rtx)
27104 var = expand_simple_binop (HImode, IOR, var, x, var,
27105 1, OPTAB_LIB_WIDEN);
27107 x = gen_reg_rtx (wmode);
27108 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27109 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27111 emit_move_insn (target, gen_lowpart (mode, x));
27118 emit_move_insn (target, const_vec);
27119 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27123 /* A subroutine of ix86_expand_vector_init_general. Use vector
27124 concatenate to handle the most general case: all values variable,
27125 and none identical. */
27128 ix86_expand_vector_init_concat (enum machine_mode mode,
27129 rtx target, rtx *ops, int n)
27131 enum machine_mode cmode, hmode = VOIDmode;
27132 rtx first[8], second[4];
27172 gcc_unreachable ();
27175 if (!register_operand (ops[1], cmode))
27176 ops[1] = force_reg (cmode, ops[1]);
27177 if (!register_operand (ops[0], cmode))
27178 ops[0] = force_reg (cmode, ops[0]);
27179 emit_insn (gen_rtx_SET (VOIDmode, target,
27180 gen_rtx_VEC_CONCAT (mode, ops[0],
27200 gcc_unreachable ();
27216 gcc_unreachable ();
27221 /* FIXME: We process inputs backward to help RA. PR 36222. */
27224 for (; i > 0; i -= 2, j--)
27226 first[j] = gen_reg_rtx (cmode);
27227 v = gen_rtvec (2, ops[i - 1], ops[i]);
27228 ix86_expand_vector_init (false, first[j],
27229 gen_rtx_PARALLEL (cmode, v));
27235 gcc_assert (hmode != VOIDmode);
27236 for (i = j = 0; i < n; i += 2, j++)
27238 second[j] = gen_reg_rtx (hmode);
27239 ix86_expand_vector_init_concat (hmode, second [j],
27243 ix86_expand_vector_init_concat (mode, target, second, n);
27246 ix86_expand_vector_init_concat (mode, target, first, n);
27250 gcc_unreachable ();
27254 /* A subroutine of ix86_expand_vector_init_general. Use vector
27255 interleave to handle the most general case: all values variable,
27256 and none identical. */
27259 ix86_expand_vector_init_interleave (enum machine_mode mode,
27260 rtx target, rtx *ops, int n)
27262 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27265 rtx (*gen_load_even) (rtx, rtx, rtx);
27266 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27267 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27272 gen_load_even = gen_vec_setv8hi;
27273 gen_interleave_first_low = gen_vec_interleave_lowv4si;
27274 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27275 inner_mode = HImode;
27276 first_imode = V4SImode;
27277 second_imode = V2DImode;
27278 third_imode = VOIDmode;
27281 gen_load_even = gen_vec_setv16qi;
27282 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27283 gen_interleave_second_low = gen_vec_interleave_lowv4si;
27284 inner_mode = QImode;
27285 first_imode = V8HImode;
27286 second_imode = V4SImode;
27287 third_imode = V2DImode;
27290 gcc_unreachable ();
27293 for (i = 0; i < n; i++)
27295 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
27296 op0 = gen_reg_rtx (SImode);
27297 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27299 /* Insert the SImode value as low element of V4SImode vector. */
27300 op1 = gen_reg_rtx (V4SImode);
27301 op0 = gen_rtx_VEC_MERGE (V4SImode,
27302 gen_rtx_VEC_DUPLICATE (V4SImode,
27304 CONST0_RTX (V4SImode),
27306 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27308 /* Cast the V4SImode vector back to a vector in orignal mode. */
27309 op0 = gen_reg_rtx (mode);
27310 emit_move_insn (op0, gen_lowpart (mode, op1));
27312 /* Load even elements into the second positon. */
27313 emit_insn ((*gen_load_even) (op0,
27314 force_reg (inner_mode,
27318 /* Cast vector to FIRST_IMODE vector. */
27319 ops[i] = gen_reg_rtx (first_imode);
27320 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27323 /* Interleave low FIRST_IMODE vectors. */
27324 for (i = j = 0; i < n; i += 2, j++)
27326 op0 = gen_reg_rtx (first_imode);
27327 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27329 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
27330 ops[j] = gen_reg_rtx (second_imode);
27331 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27334 /* Interleave low SECOND_IMODE vectors. */
27335 switch (second_imode)
27338 for (i = j = 0; i < n / 2; i += 2, j++)
27340 op0 = gen_reg_rtx (second_imode);
27341 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27344 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27346 ops[j] = gen_reg_rtx (third_imode);
27347 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27349 second_imode = V2DImode;
27350 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27354 op0 = gen_reg_rtx (second_imode);
27355 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27358 /* Cast the SECOND_IMODE vector back to a vector on original
27360 emit_insn (gen_rtx_SET (VOIDmode, target,
27361 gen_lowpart (mode, op0)));
27365 gcc_unreachable ();
27369 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
27370 all values variable, and none identical. */
27373 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27374 rtx target, rtx vals)
27376 rtx ops[32], op0, op1;
27377 enum machine_mode half_mode = VOIDmode;
27384 if (!mmx_ok && !TARGET_SSE)
27396 n = GET_MODE_NUNITS (mode);
27397 for (i = 0; i < n; i++)
27398 ops[i] = XVECEXP (vals, 0, i);
27399 ix86_expand_vector_init_concat (mode, target, ops, n);
27403 half_mode = V16QImode;
27407 half_mode = V8HImode;
27411 n = GET_MODE_NUNITS (mode);
27412 for (i = 0; i < n; i++)
27413 ops[i] = XVECEXP (vals, 0, i);
27414 op0 = gen_reg_rtx (half_mode);
27415 op1 = gen_reg_rtx (half_mode);
27416 ix86_expand_vector_init_interleave (half_mode, op0, ops,
27418 ix86_expand_vector_init_interleave (half_mode, op1,
27419 &ops [n >> 1], n >> 2);
27420 emit_insn (gen_rtx_SET (VOIDmode, target,
27421 gen_rtx_VEC_CONCAT (mode, op0, op1)));
27425 if (!TARGET_SSE4_1)
27433 /* Don't use ix86_expand_vector_init_interleave if we can't
27434 move from GPR to SSE register directly. */
27435 if (!TARGET_INTER_UNIT_MOVES)
27438 n = GET_MODE_NUNITS (mode);
27439 for (i = 0; i < n; i++)
27440 ops[i] = XVECEXP (vals, 0, i);
27441 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27449 gcc_unreachable ();
27453 int i, j, n_elts, n_words, n_elt_per_word;
27454 enum machine_mode inner_mode;
27455 rtx words[4], shift;
27457 inner_mode = GET_MODE_INNER (mode);
27458 n_elts = GET_MODE_NUNITS (mode);
27459 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27460 n_elt_per_word = n_elts / n_words;
27461 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27463 for (i = 0; i < n_words; ++i)
27465 rtx word = NULL_RTX;
27467 for (j = 0; j < n_elt_per_word; ++j)
27469 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27470 elt = convert_modes (word_mode, inner_mode, elt, true);
27476 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27477 word, 1, OPTAB_LIB_WIDEN);
27478 word = expand_simple_binop (word_mode, IOR, word, elt,
27479 word, 1, OPTAB_LIB_WIDEN);
27487 emit_move_insn (target, gen_lowpart (mode, words[0]));
27488 else if (n_words == 2)
27490 rtx tmp = gen_reg_rtx (mode);
27491 emit_clobber (tmp);
27492 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27493 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27494 emit_move_insn (target, tmp);
27496 else if (n_words == 4)
27498 rtx tmp = gen_reg_rtx (V4SImode);
27499 gcc_assert (word_mode == SImode);
27500 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
27501 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
27502 emit_move_insn (target, gen_lowpart (mode, tmp));
27505 gcc_unreachable ();
27509 /* Initialize vector TARGET via VALS. Suppress the use of MMX
27510 instructions unless MMX_OK is true. */
27513 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
27515 enum machine_mode mode = GET_MODE (target);
27516 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27517 int n_elts = GET_MODE_NUNITS (mode);
27518 int n_var = 0, one_var = -1;
27519 bool all_same = true, all_const_zero = true;
27523 for (i = 0; i < n_elts; ++i)
27525 x = XVECEXP (vals, 0, i);
27526 if (!(CONST_INT_P (x)
27527 || GET_CODE (x) == CONST_DOUBLE
27528 || GET_CODE (x) == CONST_FIXED))
27529 n_var++, one_var = i;
27530 else if (x != CONST0_RTX (inner_mode))
27531 all_const_zero = false;
27532 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
27536 /* Constants are best loaded from the constant pool. */
27539 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
27543 /* If all values are identical, broadcast the value. */
27545 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
27546 XVECEXP (vals, 0, 0)))
27549 /* Values where only one field is non-constant are best loaded from
27550 the pool and overwritten via move later. */
27554 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
27555 XVECEXP (vals, 0, one_var),
27559 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
27563 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
27567 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
27569 enum machine_mode mode = GET_MODE (target);
27570 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27571 enum machine_mode half_mode;
27572 bool use_vec_merge = false;
27574 static rtx (*gen_extract[6][2]) (rtx, rtx)
27576 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
27577 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
27578 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
27579 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
27580 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
27581 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
27583 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
27585 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
27586 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
27587 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
27588 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
27589 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
27590 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
27600 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
27601 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
27603 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
27605 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
27606 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27612 use_vec_merge = TARGET_SSE4_1;
27620 /* For the two element vectors, we implement a VEC_CONCAT with
27621 the extraction of the other element. */
27623 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
27624 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
27627 op0 = val, op1 = tmp;
27629 op0 = tmp, op1 = val;
27631 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
27632 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27637 use_vec_merge = TARGET_SSE4_1;
27644 use_vec_merge = true;
27648 /* tmp = target = A B C D */
27649 tmp = copy_to_reg (target);
27650 /* target = A A B B */
27651 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
27652 /* target = X A B B */
27653 ix86_expand_vector_set (false, target, val, 0);
27654 /* target = A X C D */
27655 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27656 const1_rtx, const0_rtx,
27657 GEN_INT (2+4), GEN_INT (3+4)));
27661 /* tmp = target = A B C D */
27662 tmp = copy_to_reg (target);
27663 /* tmp = X B C D */
27664 ix86_expand_vector_set (false, tmp, val, 0);
27665 /* target = A B X D */
27666 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27667 const0_rtx, const1_rtx,
27668 GEN_INT (0+4), GEN_INT (3+4)));
27672 /* tmp = target = A B C D */
27673 tmp = copy_to_reg (target);
27674 /* tmp = X B C D */
27675 ix86_expand_vector_set (false, tmp, val, 0);
27676 /* target = A B X D */
27677 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
27678 const0_rtx, const1_rtx,
27679 GEN_INT (2+4), GEN_INT (0+4)));
27683 gcc_unreachable ();
27688 use_vec_merge = TARGET_SSE4_1;
27692 /* Element 0 handled by vec_merge below. */
27695 use_vec_merge = true;
27701 /* With SSE2, use integer shuffles to swap element 0 and ELT,
27702 store into element 0, then shuffle them back. */
27706 order[0] = GEN_INT (elt);
27707 order[1] = const1_rtx;
27708 order[2] = const2_rtx;
27709 order[3] = GEN_INT (3);
27710 order[elt] = const0_rtx;
27712 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27713 order[1], order[2], order[3]));
27715 ix86_expand_vector_set (false, target, val, 0);
27717 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
27718 order[1], order[2], order[3]));
27722 /* For SSE1, we have to reuse the V4SF code. */
27723 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
27724 gen_lowpart (SFmode, val), elt);
27729 use_vec_merge = TARGET_SSE2;
27732 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27736 use_vec_merge = TARGET_SSE4_1;
27743 half_mode = V16QImode;
27749 half_mode = V8HImode;
27755 half_mode = V4SImode;
27761 half_mode = V2DImode;
27767 half_mode = V4SFmode;
27773 half_mode = V2DFmode;
27779 /* Compute offset. */
27783 gcc_assert (i <= 1);
27785 /* Extract the half. */
27786 tmp = gen_reg_rtx (half_mode);
27787 emit_insn ((*gen_extract[j][i]) (tmp, target));
27789 /* Put val in tmp at elt. */
27790 ix86_expand_vector_set (false, tmp, val, elt);
27793 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
27802 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
27803 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
27804 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27808 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27810 emit_move_insn (mem, target);
27812 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27813 emit_move_insn (tmp, val);
27815 emit_move_insn (target, mem);
27820 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
27822 enum machine_mode mode = GET_MODE (vec);
27823 enum machine_mode inner_mode = GET_MODE_INNER (mode);
27824 bool use_vec_extr = false;
27837 use_vec_extr = true;
27841 use_vec_extr = TARGET_SSE4_1;
27853 tmp = gen_reg_rtx (mode);
27854 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
27855 GEN_INT (elt), GEN_INT (elt),
27856 GEN_INT (elt+4), GEN_INT (elt+4)));
27860 tmp = gen_reg_rtx (mode);
27861 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
27865 gcc_unreachable ();
27868 use_vec_extr = true;
27873 use_vec_extr = TARGET_SSE4_1;
27887 tmp = gen_reg_rtx (mode);
27888 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
27889 GEN_INT (elt), GEN_INT (elt),
27890 GEN_INT (elt), GEN_INT (elt)));
27894 tmp = gen_reg_rtx (mode);
27895 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
27899 gcc_unreachable ();
27902 use_vec_extr = true;
27907 /* For SSE1, we have to reuse the V4SF code. */
27908 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
27909 gen_lowpart (V4SFmode, vec), elt);
27915 use_vec_extr = TARGET_SSE2;
27918 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
27922 use_vec_extr = TARGET_SSE4_1;
27926 /* ??? Could extract the appropriate HImode element and shift. */
27933 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
27934 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
27936 /* Let the rtl optimizers know about the zero extension performed. */
27937 if (inner_mode == QImode || inner_mode == HImode)
27939 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
27940 target = gen_lowpart (SImode, target);
27943 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
27947 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
27949 emit_move_insn (mem, vec);
27951 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
27952 emit_move_insn (target, tmp);
27956 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
27957 pattern to reduce; DEST is the destination; IN is the input vector. */
27960 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
27962 rtx tmp1, tmp2, tmp3;
27964 tmp1 = gen_reg_rtx (V4SFmode);
27965 tmp2 = gen_reg_rtx (V4SFmode);
27966 tmp3 = gen_reg_rtx (V4SFmode);
27968 emit_insn (gen_sse_movhlps (tmp1, in, in));
27969 emit_insn (fn (tmp2, tmp1, in));
27971 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
27972 const1_rtx, const1_rtx,
27973 GEN_INT (1+4), GEN_INT (1+4)));
27974 emit_insn (fn (dest, tmp2, tmp3));
27977 /* Target hook for scalar_mode_supported_p. */
27979 ix86_scalar_mode_supported_p (enum machine_mode mode)
27981 if (DECIMAL_FLOAT_MODE_P (mode))
27982 return default_decimal_float_supported_p ();
27983 else if (mode == TFmode)
27986 return default_scalar_mode_supported_p (mode);
27989 /* Implements target hook vector_mode_supported_p. */
27991 ix86_vector_mode_supported_p (enum machine_mode mode)
27993 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
27995 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
27997 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
27999 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28001 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28006 /* Target hook for c_mode_for_suffix. */
28007 static enum machine_mode
28008 ix86_c_mode_for_suffix (char suffix)
28018 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28020 We do this in the new i386 backend to maintain source compatibility
28021 with the old cc0-based compiler. */
28024 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28025 tree inputs ATTRIBUTE_UNUSED,
28028 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28030 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28035 /* Implements target vector targetm.asm.encode_section_info. This
28036 is not used by netware. */
28038 static void ATTRIBUTE_UNUSED
28039 ix86_encode_section_info (tree decl, rtx rtl, int first)
28041 default_encode_section_info (decl, rtl, first);
28043 if (TREE_CODE (decl) == VAR_DECL
28044 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28045 && ix86_in_large_data_p (decl))
28046 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28049 /* Worker function for REVERSE_CONDITION. */
28052 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28054 return (mode != CCFPmode && mode != CCFPUmode
28055 ? reverse_condition (code)
28056 : reverse_condition_maybe_unordered (code));
28059 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28063 output_387_reg_move (rtx insn, rtx *operands)
28065 if (REG_P (operands[0]))
28067 if (REG_P (operands[1])
28068 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28070 if (REGNO (operands[0]) == FIRST_STACK_REG)
28071 return output_387_ffreep (operands, 0);
28072 return "fstp\t%y0";
28074 if (STACK_TOP_P (operands[0]))
28075 return "fld%Z1\t%y1";
28078 else if (MEM_P (operands[0]))
28080 gcc_assert (REG_P (operands[1]));
28081 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28082 return "fstp%Z0\t%y0";
28085 /* There is no non-popping store to memory for XFmode.
28086 So if we need one, follow the store with a load. */
28087 if (GET_MODE (operands[0]) == XFmode)
28088 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
28090 return "fst%Z0\t%y0";
28097 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28098 FP status register is set. */
28101 ix86_emit_fp_unordered_jump (rtx label)
28103 rtx reg = gen_reg_rtx (HImode);
28106 emit_insn (gen_x86_fnstsw_1 (reg));
28108 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28110 emit_insn (gen_x86_sahf_1 (reg));
28112 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28113 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28117 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28119 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28120 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28123 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28124 gen_rtx_LABEL_REF (VOIDmode, label),
28126 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28128 emit_jump_insn (temp);
28129 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28132 /* Output code to perform a log1p XFmode calculation. */
28134 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28136 rtx label1 = gen_label_rtx ();
28137 rtx label2 = gen_label_rtx ();
28139 rtx tmp = gen_reg_rtx (XFmode);
28140 rtx tmp2 = gen_reg_rtx (XFmode);
28143 emit_insn (gen_absxf2 (tmp, op1));
28144 test = gen_rtx_GE (VOIDmode, tmp,
28145 CONST_DOUBLE_FROM_REAL_VALUE (
28146 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28148 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
28150 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28151 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28152 emit_jump (label2);
28154 emit_label (label1);
28155 emit_move_insn (tmp, CONST1_RTX (XFmode));
28156 emit_insn (gen_addxf3 (tmp, op1, tmp));
28157 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28158 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28160 emit_label (label2);
28163 /* Output code to perform a Newton-Rhapson approximation of a single precision
28164 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28166 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28168 rtx x0, x1, e0, e1, two;
28170 x0 = gen_reg_rtx (mode);
28171 e0 = gen_reg_rtx (mode);
28172 e1 = gen_reg_rtx (mode);
28173 x1 = gen_reg_rtx (mode);
28175 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28177 if (VECTOR_MODE_P (mode))
28178 two = ix86_build_const_vector (SFmode, true, two);
28180 two = force_reg (mode, two);
28182 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28184 /* x0 = rcp(b) estimate */
28185 emit_insn (gen_rtx_SET (VOIDmode, x0,
28186 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28189 emit_insn (gen_rtx_SET (VOIDmode, e0,
28190 gen_rtx_MULT (mode, x0, a)));
28192 emit_insn (gen_rtx_SET (VOIDmode, e1,
28193 gen_rtx_MULT (mode, x0, b)));
28195 emit_insn (gen_rtx_SET (VOIDmode, x1,
28196 gen_rtx_MINUS (mode, two, e1)));
28197 /* res = e0 * x1 */
28198 emit_insn (gen_rtx_SET (VOIDmode, res,
28199 gen_rtx_MULT (mode, e0, x1)));
28202 /* Output code to perform a Newton-Rhapson approximation of a
28203 single precision floating point [reciprocal] square root. */
28205 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28208 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28211 x0 = gen_reg_rtx (mode);
28212 e0 = gen_reg_rtx (mode);
28213 e1 = gen_reg_rtx (mode);
28214 e2 = gen_reg_rtx (mode);
28215 e3 = gen_reg_rtx (mode);
28217 real_from_integer (&r, VOIDmode, -3, -1, 0);
28218 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28220 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28221 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28223 if (VECTOR_MODE_P (mode))
28225 mthree = ix86_build_const_vector (SFmode, true, mthree);
28226 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28229 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28230 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28232 /* x0 = rsqrt(a) estimate */
28233 emit_insn (gen_rtx_SET (VOIDmode, x0,
28234 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28237 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28242 zero = gen_reg_rtx (mode);
28243 mask = gen_reg_rtx (mode);
28245 zero = force_reg (mode, CONST0_RTX(mode));
28246 emit_insn (gen_rtx_SET (VOIDmode, mask,
28247 gen_rtx_NE (mode, zero, a)));
28249 emit_insn (gen_rtx_SET (VOIDmode, x0,
28250 gen_rtx_AND (mode, x0, mask)));
28254 emit_insn (gen_rtx_SET (VOIDmode, e0,
28255 gen_rtx_MULT (mode, x0, a)));
28257 emit_insn (gen_rtx_SET (VOIDmode, e1,
28258 gen_rtx_MULT (mode, e0, x0)));
28261 mthree = force_reg (mode, mthree);
28262 emit_insn (gen_rtx_SET (VOIDmode, e2,
28263 gen_rtx_PLUS (mode, e1, mthree)));
28265 mhalf = force_reg (mode, mhalf);
28267 /* e3 = -.5 * x0 */
28268 emit_insn (gen_rtx_SET (VOIDmode, e3,
28269 gen_rtx_MULT (mode, x0, mhalf)));
28271 /* e3 = -.5 * e0 */
28272 emit_insn (gen_rtx_SET (VOIDmode, e3,
28273 gen_rtx_MULT (mode, e0, mhalf)));
28274 /* ret = e2 * e3 */
28275 emit_insn (gen_rtx_SET (VOIDmode, res,
28276 gen_rtx_MULT (mode, e2, e3)));
28279 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
28281 static void ATTRIBUTE_UNUSED
28282 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28285 /* With Binutils 2.15, the "@unwind" marker must be specified on
28286 every occurrence of the ".eh_frame" section, not just the first
28289 && strcmp (name, ".eh_frame") == 0)
28291 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28292 flags & SECTION_WRITE ? "aw" : "a");
28295 default_elf_asm_named_section (name, flags, decl);
28298 /* Return the mangling of TYPE if it is an extended fundamental type. */
28300 static const char *
28301 ix86_mangle_type (const_tree type)
28303 type = TYPE_MAIN_VARIANT (type);
28305 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28306 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28309 switch (TYPE_MODE (type))
28312 /* __float128 is "g". */
28315 /* "long double" or __float80 is "e". */
28322 /* For 32-bit code we can save PIC register setup by using
28323 __stack_chk_fail_local hidden function instead of calling
28324 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
28325 register, so it is better to call __stack_chk_fail directly. */
28328 ix86_stack_protect_fail (void)
28330 return TARGET_64BIT
28331 ? default_external_stack_protect_fail ()
28332 : default_hidden_stack_protect_fail ();
28335 /* Select a format to encode pointers in exception handling data. CODE
28336 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
28337 true if the symbol may be affected by dynamic relocations.
28339 ??? All x86 object file formats are capable of representing this.
28340 After all, the relocation needed is the same as for the call insn.
28341 Whether or not a particular assembler allows us to enter such, I
28342 guess we'll have to see. */
28344 asm_preferred_eh_data_format (int code, int global)
28348 int type = DW_EH_PE_sdata8;
28350 || ix86_cmodel == CM_SMALL_PIC
28351 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28352 type = DW_EH_PE_sdata4;
28353 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28355 if (ix86_cmodel == CM_SMALL
28356 || (ix86_cmodel == CM_MEDIUM && code))
28357 return DW_EH_PE_udata4;
28358 return DW_EH_PE_absptr;
28361 /* Expand copysign from SIGN to the positive value ABS_VALUE
28362 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
28365 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28367 enum machine_mode mode = GET_MODE (sign);
28368 rtx sgn = gen_reg_rtx (mode);
28369 if (mask == NULL_RTX)
28371 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28372 if (!VECTOR_MODE_P (mode))
28374 /* We need to generate a scalar mode mask in this case. */
28375 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28376 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28377 mask = gen_reg_rtx (mode);
28378 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28382 mask = gen_rtx_NOT (mode, mask);
28383 emit_insn (gen_rtx_SET (VOIDmode, sgn,
28384 gen_rtx_AND (mode, mask, sign)));
28385 emit_insn (gen_rtx_SET (VOIDmode, result,
28386 gen_rtx_IOR (mode, abs_value, sgn)));
28389 /* Expand fabs (OP0) and return a new rtx that holds the result. The
28390 mask for masking out the sign-bit is stored in *SMASK, if that is
28393 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28395 enum machine_mode mode = GET_MODE (op0);
28398 xa = gen_reg_rtx (mode);
28399 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28400 if (!VECTOR_MODE_P (mode))
28402 /* We need to generate a scalar mode mask in this case. */
28403 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28404 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28405 mask = gen_reg_rtx (mode);
28406 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28408 emit_insn (gen_rtx_SET (VOIDmode, xa,
28409 gen_rtx_AND (mode, op0, mask)));
28417 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28418 swapping the operands if SWAP_OPERANDS is true. The expanded
28419 code is a forward jump to a newly created label in case the
28420 comparison is true. The generated label rtx is returned. */
28422 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28423 bool swap_operands)
28434 label = gen_label_rtx ();
28435 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28436 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28437 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28438 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28439 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28440 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28441 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28442 JUMP_LABEL (tmp) = label;
28447 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28448 using comparison code CODE. Operands are swapped for the comparison if
28449 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
28451 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28452 bool swap_operands)
28454 enum machine_mode mode = GET_MODE (op0);
28455 rtx mask = gen_reg_rtx (mode);
28464 if (mode == DFmode)
28465 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28466 gen_rtx_fmt_ee (code, mode, op0, op1)));
28468 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28469 gen_rtx_fmt_ee (code, mode, op0, op1)));
28474 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28475 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
28477 ix86_gen_TWO52 (enum machine_mode mode)
28479 REAL_VALUE_TYPE TWO52r;
28482 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28483 TWO52 = const_double_from_real_value (TWO52r, mode);
28484 TWO52 = force_reg (mode, TWO52);
28489 /* Expand SSE sequence for computing lround from OP1 storing
28492 ix86_expand_lround (rtx op0, rtx op1)
28494 /* C code for the stuff we're doing below:
28495 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28498 enum machine_mode mode = GET_MODE (op1);
28499 const struct real_format *fmt;
28500 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28503 /* load nextafter (0.5, 0.0) */
28504 fmt = REAL_MODE_FORMAT (mode);
28505 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28506 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28508 /* adj = copysign (0.5, op1) */
28509 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
28510 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
28512 /* adj = op1 + adj */
28513 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
28515 /* op0 = (imode)adj */
28516 expand_fix (op0, adj, 0);
28519 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
28522 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
28524 /* C code for the stuff we're doing below (for do_floor):
28526 xi -= (double)xi > op1 ? 1 : 0;
28529 enum machine_mode fmode = GET_MODE (op1);
28530 enum machine_mode imode = GET_MODE (op0);
28531 rtx ireg, freg, label, tmp;
28533 /* reg = (long)op1 */
28534 ireg = gen_reg_rtx (imode);
28535 expand_fix (ireg, op1, 0);
28537 /* freg = (double)reg */
28538 freg = gen_reg_rtx (fmode);
28539 expand_float (freg, ireg, 0);
28541 /* ireg = (freg > op1) ? ireg - 1 : ireg */
28542 label = ix86_expand_sse_compare_and_jump (UNLE,
28543 freg, op1, !do_floor);
28544 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
28545 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
28546 emit_move_insn (ireg, tmp);
28548 emit_label (label);
28549 LABEL_NUSES (label) = 1;
28551 emit_move_insn (op0, ireg);
28554 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
28555 result in OPERAND0. */
28557 ix86_expand_rint (rtx operand0, rtx operand1)
28559 /* C code for the stuff we're doing below:
28560 xa = fabs (operand1);
28561 if (!isless (xa, 2**52))
28563 xa = xa + 2**52 - 2**52;
28564 return copysign (xa, operand1);
28566 enum machine_mode mode = GET_MODE (operand0);
28567 rtx res, xa, label, TWO52, mask;
28569 res = gen_reg_rtx (mode);
28570 emit_move_insn (res, operand1);
28572 /* xa = abs (operand1) */
28573 xa = ix86_expand_sse_fabs (res, &mask);
28575 /* if (!isless (xa, TWO52)) goto label; */
28576 TWO52 = ix86_gen_TWO52 (mode);
28577 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28579 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28580 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28582 ix86_sse_copysign_to_positive (res, xa, res, mask);
28584 emit_label (label);
28585 LABEL_NUSES (label) = 1;
28587 emit_move_insn (operand0, res);
28590 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28593 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
28595 /* C code for the stuff we expand below.
28596 double xa = fabs (x), x2;
28597 if (!isless (xa, TWO52))
28599 xa = xa + TWO52 - TWO52;
28600 x2 = copysign (xa, x);
28609 enum machine_mode mode = GET_MODE (operand0);
28610 rtx xa, TWO52, tmp, label, one, res, mask;
28612 TWO52 = ix86_gen_TWO52 (mode);
28614 /* Temporary for holding the result, initialized to the input
28615 operand to ease control flow. */
28616 res = gen_reg_rtx (mode);
28617 emit_move_insn (res, operand1);
28619 /* xa = abs (operand1) */
28620 xa = ix86_expand_sse_fabs (res, &mask);
28622 /* if (!isless (xa, TWO52)) goto label; */
28623 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28625 /* xa = xa + TWO52 - TWO52; */
28626 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28627 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
28629 /* xa = copysign (xa, operand1) */
28630 ix86_sse_copysign_to_positive (xa, xa, res, mask);
28632 /* generate 1.0 or -1.0 */
28633 one = force_reg (mode,
28634 const_double_from_real_value (do_floor
28635 ? dconst1 : dconstm1, mode));
28637 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28638 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28639 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28640 gen_rtx_AND (mode, one, tmp)));
28641 /* We always need to subtract here to preserve signed zero. */
28642 tmp = expand_simple_binop (mode, MINUS,
28643 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28644 emit_move_insn (res, tmp);
28646 emit_label (label);
28647 LABEL_NUSES (label) = 1;
28649 emit_move_insn (operand0, res);
28652 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
28655 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
28657 /* C code for the stuff we expand below.
28658 double xa = fabs (x), x2;
28659 if (!isless (xa, TWO52))
28661 x2 = (double)(long)x;
28668 if (HONOR_SIGNED_ZEROS (mode))
28669 return copysign (x2, x);
28672 enum machine_mode mode = GET_MODE (operand0);
28673 rtx xa, xi, TWO52, tmp, label, one, res, mask;
28675 TWO52 = ix86_gen_TWO52 (mode);
28677 /* Temporary for holding the result, initialized to the input
28678 operand to ease control flow. */
28679 res = gen_reg_rtx (mode);
28680 emit_move_insn (res, operand1);
28682 /* xa = abs (operand1) */
28683 xa = ix86_expand_sse_fabs (res, &mask);
28685 /* if (!isless (xa, TWO52)) goto label; */
28686 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28688 /* xa = (double)(long)x */
28689 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28690 expand_fix (xi, res, 0);
28691 expand_float (xa, xi, 0);
28694 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28696 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
28697 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
28698 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28699 gen_rtx_AND (mode, one, tmp)));
28700 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
28701 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28702 emit_move_insn (res, tmp);
28704 if (HONOR_SIGNED_ZEROS (mode))
28705 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28707 emit_label (label);
28708 LABEL_NUSES (label) = 1;
28710 emit_move_insn (operand0, res);
28713 /* Expand SSE sequence for computing round from OPERAND1 storing
28714 into OPERAND0. Sequence that works without relying on DImode truncation
28715 via cvttsd2siq that is only available on 64bit targets. */
28717 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
28719 /* C code for the stuff we expand below.
28720 double xa = fabs (x), xa2, x2;
28721 if (!isless (xa, TWO52))
28723 Using the absolute value and copying back sign makes
28724 -0.0 -> -0.0 correct.
28725 xa2 = xa + TWO52 - TWO52;
28730 else if (dxa > 0.5)
28732 x2 = copysign (xa2, x);
28735 enum machine_mode mode = GET_MODE (operand0);
28736 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
28738 TWO52 = ix86_gen_TWO52 (mode);
28740 /* Temporary for holding the result, initialized to the input
28741 operand to ease control flow. */
28742 res = gen_reg_rtx (mode);
28743 emit_move_insn (res, operand1);
28745 /* xa = abs (operand1) */
28746 xa = ix86_expand_sse_fabs (res, &mask);
28748 /* if (!isless (xa, TWO52)) goto label; */
28749 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28751 /* xa2 = xa + TWO52 - TWO52; */
28752 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28753 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
28755 /* dxa = xa2 - xa; */
28756 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
28758 /* generate 0.5, 1.0 and -0.5 */
28759 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
28760 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
28761 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
28765 tmp = gen_reg_rtx (mode);
28766 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
28767 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
28768 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28769 gen_rtx_AND (mode, one, tmp)));
28770 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28771 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
28772 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
28773 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28774 gen_rtx_AND (mode, one, tmp)));
28775 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
28777 /* res = copysign (xa2, operand1) */
28778 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
28780 emit_label (label);
28781 LABEL_NUSES (label) = 1;
28783 emit_move_insn (operand0, res);
28786 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28789 ix86_expand_trunc (rtx operand0, rtx operand1)
28791 /* C code for SSE variant we expand below.
28792 double xa = fabs (x), x2;
28793 if (!isless (xa, TWO52))
28795 x2 = (double)(long)x;
28796 if (HONOR_SIGNED_ZEROS (mode))
28797 return copysign (x2, x);
28800 enum machine_mode mode = GET_MODE (operand0);
28801 rtx xa, xi, TWO52, label, res, mask;
28803 TWO52 = ix86_gen_TWO52 (mode);
28805 /* Temporary for holding the result, initialized to the input
28806 operand to ease control flow. */
28807 res = gen_reg_rtx (mode);
28808 emit_move_insn (res, operand1);
28810 /* xa = abs (operand1) */
28811 xa = ix86_expand_sse_fabs (res, &mask);
28813 /* if (!isless (xa, TWO52)) goto label; */
28814 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28816 /* x = (double)(long)x */
28817 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28818 expand_fix (xi, res, 0);
28819 expand_float (res, xi, 0);
28821 if (HONOR_SIGNED_ZEROS (mode))
28822 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
28824 emit_label (label);
28825 LABEL_NUSES (label) = 1;
28827 emit_move_insn (operand0, res);
28830 /* Expand SSE sequence for computing trunc from OPERAND1 storing
28833 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
28835 enum machine_mode mode = GET_MODE (operand0);
28836 rtx xa, mask, TWO52, label, one, res, smask, tmp;
28838 /* C code for SSE variant we expand below.
28839 double xa = fabs (x), x2;
28840 if (!isless (xa, TWO52))
28842 xa2 = xa + TWO52 - TWO52;
28846 x2 = copysign (xa2, x);
28850 TWO52 = ix86_gen_TWO52 (mode);
28852 /* Temporary for holding the result, initialized to the input
28853 operand to ease control flow. */
28854 res = gen_reg_rtx (mode);
28855 emit_move_insn (res, operand1);
28857 /* xa = abs (operand1) */
28858 xa = ix86_expand_sse_fabs (res, &smask);
28860 /* if (!isless (xa, TWO52)) goto label; */
28861 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28863 /* res = xa + TWO52 - TWO52; */
28864 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
28865 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
28866 emit_move_insn (res, tmp);
28869 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
28871 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
28872 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
28873 emit_insn (gen_rtx_SET (VOIDmode, mask,
28874 gen_rtx_AND (mode, mask, one)));
28875 tmp = expand_simple_binop (mode, MINUS,
28876 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
28877 emit_move_insn (res, tmp);
28879 /* res = copysign (res, operand1) */
28880 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
28882 emit_label (label);
28883 LABEL_NUSES (label) = 1;
28885 emit_move_insn (operand0, res);
28888 /* Expand SSE sequence for computing round from OPERAND1 storing
28891 ix86_expand_round (rtx operand0, rtx operand1)
28893 /* C code for the stuff we're doing below:
28894 double xa = fabs (x);
28895 if (!isless (xa, TWO52))
28897 xa = (double)(long)(xa + nextafter (0.5, 0.0));
28898 return copysign (xa, x);
28900 enum machine_mode mode = GET_MODE (operand0);
28901 rtx res, TWO52, xa, label, xi, half, mask;
28902 const struct real_format *fmt;
28903 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
28905 /* Temporary for holding the result, initialized to the input
28906 operand to ease control flow. */
28907 res = gen_reg_rtx (mode);
28908 emit_move_insn (res, operand1);
28910 TWO52 = ix86_gen_TWO52 (mode);
28911 xa = ix86_expand_sse_fabs (res, &mask);
28912 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
28914 /* load nextafter (0.5, 0.0) */
28915 fmt = REAL_MODE_FORMAT (mode);
28916 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
28917 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
28919 /* xa = xa + 0.5 */
28920 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
28921 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
28923 /* xa = (double)(int64_t)xa */
28924 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
28925 expand_fix (xi, xa, 0);
28926 expand_float (xa, xi, 0);
28928 /* res = copysign (xa, operand1) */
28929 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
28931 emit_label (label);
28932 LABEL_NUSES (label) = 1;
28934 emit_move_insn (operand0, res);
28938 /* Table of valid machine attributes. */
28939 static const struct attribute_spec ix86_attribute_table[] =
28941 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
28942 /* Stdcall attribute says callee is responsible for popping arguments
28943 if they are not variable. */
28944 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28945 /* Fastcall attribute says callee is responsible for popping arguments
28946 if they are not variable. */
28947 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28948 /* Cdecl attribute says the callee is a normal C declaration */
28949 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28950 /* Regparm attribute specifies how many integer arguments are to be
28951 passed in registers. */
28952 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
28953 /* Sseregparm attribute says we are using x86_64 calling conventions
28954 for FP arguments. */
28955 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
28956 /* force_align_arg_pointer says this function realigns the stack at entry. */
28957 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
28958 false, true, true, ix86_handle_cconv_attribute },
28959 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
28960 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
28961 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
28962 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
28964 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
28965 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
28966 #ifdef SUBTARGET_ATTRIBUTE_TABLE
28967 SUBTARGET_ATTRIBUTE_TABLE,
28969 /* ms_abi and sysv_abi calling convention function attributes. */
28970 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28971 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
28972 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
28974 { NULL, 0, 0, false, false, false, NULL }
28977 /* Implement targetm.vectorize.builtin_vectorization_cost. */
28979 ix86_builtin_vectorization_cost (bool runtime_test)
28981 /* If the branch of the runtime test is taken - i.e. - the vectorized
28982 version is skipped - this incurs a misprediction cost (because the
28983 vectorized version is expected to be the fall-through). So we subtract
28984 the latency of a mispredicted branch from the costs that are incured
28985 when the vectorized version is executed.
28987 TODO: The values in individual target tables have to be tuned or new
28988 fields may be needed. For eg. on K8, the default branch path is the
28989 not-taken path. If the taken path is predicted correctly, the minimum
28990 penalty of going down the taken-path is 1 cycle. If the taken-path is
28991 not predicted correctly, then the minimum penalty is 10 cycles. */
28995 return (-(ix86_cost->cond_taken_branch_cost));
29001 /* Implement targetm.vectorize.builtin_vec_perm. */
29004 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
29006 tree itype = TREE_TYPE (vec_type);
29007 bool u = TYPE_UNSIGNED (itype);
29008 enum machine_mode vmode = TYPE_MODE (vec_type);
29009 enum ix86_builtins fcode = fcode; /* Silence bogus warning. */
29010 bool ok = TARGET_SSE2;
29016 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
29019 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
29021 itype = ix86_get_builtin_type (IX86_BT_DI);
29026 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
29030 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
29032 itype = ix86_get_builtin_type (IX86_BT_SI);
29036 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
29039 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
29042 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
29045 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
29055 *mask_type = itype;
29056 return ix86_builtins[(int) fcode];
29059 /* Return a vector mode with twice as many elements as VMODE. */
29060 /* ??? Consider moving this to a table generated by genmodes.c. */
29062 static enum machine_mode
29063 doublesize_vector_mode (enum machine_mode vmode)
29067 case V2SFmode: return V4SFmode;
29068 case V1DImode: return V2DImode;
29069 case V2SImode: return V4SImode;
29070 case V4HImode: return V8HImode;
29071 case V8QImode: return V16QImode;
29073 case V2DFmode: return V4DFmode;
29074 case V4SFmode: return V8SFmode;
29075 case V2DImode: return V4DImode;
29076 case V4SImode: return V8SImode;
29077 case V8HImode: return V16HImode;
29078 case V16QImode: return V32QImode;
29080 case V4DFmode: return V8DFmode;
29081 case V8SFmode: return V16SFmode;
29082 case V4DImode: return V8DImode;
29083 case V8SImode: return V16SImode;
29084 case V16HImode: return V32HImode;
29085 case V32QImode: return V64QImode;
29088 gcc_unreachable ();
29092 /* Construct (set target (vec_select op0 (parallel perm))) and
29093 return true if that's a valid instruction in the active ISA. */
29096 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
29098 rtx rperm[MAX_VECT_LEN], x;
29101 for (i = 0; i < nelt; ++i)
29102 rperm[i] = GEN_INT (perm[i]);
29104 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
29105 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
29106 x = gen_rtx_SET (VOIDmode, target, x);
29109 if (recog_memoized (x) < 0)
29117 /* Similar, but generate a vec_concat from op0 and op1 as well. */
29120 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
29121 const unsigned char *perm, unsigned nelt)
29123 enum machine_mode v2mode;
29126 v2mode = doublesize_vector_mode (GET_MODE (op0));
29127 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
29128 return expand_vselect (target, x, perm, nelt);
29131 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
29132 in terms of blendp[sd] / pblendw / pblendvb. */
29135 expand_vec_perm_blend (struct expand_vec_perm_d *d)
29137 enum machine_mode vmode = d->vmode;
29138 unsigned i, mask, nelt = d->nelt;
29139 rtx target, op0, op1, x;
29141 if (!TARGET_SSE4_1 || d->op0 == d->op1)
29143 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
29146 /* This is a blend, not a permute. Elements must stay in their
29147 respective lanes. */
29148 for (i = 0; i < nelt; ++i)
29150 unsigned e = d->perm[i];
29151 if (!(e == i || e == i + nelt))
29158 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
29159 decision should be extracted elsewhere, so that we only try that
29160 sequence once all budget==3 options have been tried. */
29162 /* For bytes, see if bytes move in pairs so we can use pblendw with
29163 an immediate argument, rather than pblendvb with a vector argument. */
29164 if (vmode == V16QImode)
29166 bool pblendw_ok = true;
29167 for (i = 0; i < 16 && pblendw_ok; i += 2)
29168 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
29172 rtx rperm[16], vperm;
29174 for (i = 0; i < nelt; ++i)
29175 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
29177 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
29178 vperm = force_reg (V16QImode, vperm);
29180 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
29185 target = d->target;
29197 for (i = 0; i < nelt; ++i)
29198 mask |= (d->perm[i] >= nelt) << i;
29202 for (i = 0; i < 2; ++i)
29203 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
29207 for (i = 0; i < 4; ++i)
29208 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
29212 for (i = 0; i < 8; ++i)
29213 mask |= (d->perm[i * 2] >= 16) << i;
29217 target = gen_lowpart (vmode, target);
29218 op0 = gen_lowpart (vmode, op0);
29219 op1 = gen_lowpart (vmode, op1);
29223 gcc_unreachable ();
29226 /* This matches five different patterns with the different modes. */
29227 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
29228 x = gen_rtx_SET (VOIDmode, target, x);
29234 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
29235 in terms of the variable form of vpermilps.
29237 Note that we will have already failed the immediate input vpermilps,
29238 which requires that the high and low part shuffle be identical; the
29239 variable form doesn't require that. */
29242 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
29244 rtx rperm[8], vperm;
29247 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
29250 /* We can only permute within the 128-bit lane. */
29251 for (i = 0; i < 8; ++i)
29253 unsigned e = d->perm[i];
29254 if (i < 4 ? e >= 4 : e < 4)
29261 for (i = 0; i < 8; ++i)
29263 unsigned e = d->perm[i];
29265 /* Within each 128-bit lane, the elements of op0 are numbered
29266 from 0 and the elements of op1 are numbered from 4. */
29272 rperm[i] = GEN_INT (e);
29275 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
29276 vperm = force_reg (V8SImode, vperm);
29277 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
29282 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
29283 in terms of pshufb or vpperm. */
29286 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
29288 unsigned i, nelt, eltsz;
29289 rtx rperm[16], vperm, target, op0, op1;
29291 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
29293 if (GET_MODE_SIZE (d->vmode) != 16)
29300 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
29302 for (i = 0; i < nelt; ++i)
29304 unsigned j, e = d->perm[i];
29305 for (j = 0; j < eltsz; ++j)
29306 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
29309 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
29310 vperm = force_reg (V16QImode, vperm);
29312 target = gen_lowpart (V16QImode, d->target);
29313 op0 = gen_lowpart (V16QImode, d->op0);
29314 if (d->op0 == d->op1)
29315 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
29318 op1 = gen_lowpart (V16QImode, d->op1);
29319 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
29325 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
29326 in a single instruction. */
29329 expand_vec_perm_1 (struct expand_vec_perm_d *d)
29331 unsigned i, nelt = d->nelt;
29332 unsigned char perm2[MAX_VECT_LEN];
29334 /* Check plain VEC_SELECT first, because AVX has instructions that could
29335 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
29336 input where SEL+CONCAT may not. */
29337 if (d->op0 == d->op1)
29339 int mask = nelt - 1;
29341 for (i = 0; i < nelt; i++)
29342 perm2[i] = d->perm[i] & mask;
29344 if (expand_vselect (d->target, d->op0, perm2, nelt))
29347 /* There are plenty of patterns in sse.md that are written for
29348 SEL+CONCAT and are not replicated for a single op. Perhaps
29349 that should be changed, to avoid the nastiness here. */
29351 /* Recognize interleave style patterns, which means incrementing
29352 every other permutation operand. */
29353 for (i = 0; i < nelt; i += 2)
29355 perm2[i] = d->perm[i] & mask;
29356 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
29358 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
29361 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
29364 for (i = 0; i < nelt; i += 4)
29366 perm2[i + 0] = d->perm[i + 0] & mask;
29367 perm2[i + 1] = d->perm[i + 1] & mask;
29368 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
29369 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
29372 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
29377 /* Finally, try the fully general two operand permute. */
29378 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
29381 /* Recognize interleave style patterns with reversed operands. */
29382 if (d->op0 != d->op1)
29384 for (i = 0; i < nelt; ++i)
29386 unsigned e = d->perm[i];
29394 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
29398 /* Try the SSE4.1 blend variable merge instructions. */
29399 if (expand_vec_perm_blend (d))
29402 /* Try one of the AVX vpermil variable permutations. */
29403 if (expand_vec_perm_vpermil (d))
29406 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
29407 if (expand_vec_perm_pshufb (d))
29413 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
29414 in terms of a pair of pshuflw + pshufhw instructions. */
29417 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
29419 unsigned char perm2[MAX_VECT_LEN];
29423 if (d->vmode != V8HImode || d->op0 != d->op1)
29426 /* The two permutations only operate in 64-bit lanes. */
29427 for (i = 0; i < 4; ++i)
29428 if (d->perm[i] >= 4)
29430 for (i = 4; i < 8; ++i)
29431 if (d->perm[i] < 4)
29437 /* Emit the pshuflw. */
29438 memcpy (perm2, d->perm, 4);
29439 for (i = 4; i < 8; ++i)
29441 ok = expand_vselect (d->target, d->op0, perm2, 8);
29444 /* Emit the pshufhw. */
29445 memcpy (perm2 + 4, d->perm + 4, 4);
29446 for (i = 0; i < 4; ++i)
29448 ok = expand_vselect (d->target, d->target, perm2, 8);
29454 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
29455 the permutation using the SSSE3 palignr instruction. This succeeds
29456 when all of the elements in PERM fit within one vector and we merely
29457 need to shift them down so that a single vector permutation has a
29458 chance to succeed. */
29461 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
29463 unsigned i, nelt = d->nelt;
29468 /* Even with AVX, palignr only operates on 128-bit vectors. */
29469 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
29472 min = nelt, max = 0;
29473 for (i = 0; i < nelt; ++i)
29475 unsigned e = d->perm[i];
29481 if (min == 0 || max - min >= nelt)
29484 /* Given that we have SSSE3, we know we'll be able to implement the
29485 single operand permutation after the palignr with pshufb. */
29489 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
29490 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
29491 gen_lowpart (TImode, d->op1),
29492 gen_lowpart (TImode, d->op0), shift));
29494 d->op0 = d->op1 = d->target;
29497 for (i = 0; i < nelt; ++i)
29499 unsigned e = d->perm[i] - min;
29505 /* Test for the degenerate case where the alignment by itself
29506 produces the desired permutation. */
29510 ok = expand_vec_perm_1 (d);
29516 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
29517 a two vector permutation into a single vector permutation by using
29518 an interleave operation to merge the vectors. */
29521 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
29523 struct expand_vec_perm_d dremap, dfinal;
29524 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
29525 unsigned contents, h1, h2, h3, h4;
29526 unsigned char remap[2 * MAX_VECT_LEN];
29530 if (d->op0 == d->op1)
29533 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
29534 lanes. We can use similar techniques with the vperm2f128 instruction,
29535 but it requires slightly different logic. */
29536 if (GET_MODE_SIZE (d->vmode) != 16)
29539 /* Examine from whence the elements come. */
29541 for (i = 0; i < nelt; ++i)
29542 contents |= 1u << d->perm[i];
29544 /* Split the two input vectors into 4 halves. */
29545 h1 = (1u << nelt2) - 1;
29550 memset (remap, 0xff, sizeof (remap));
29553 /* If the elements from the low halves use interleave low, and similarly
29554 for interleave high. If the elements are from mis-matched halves, we
29555 can use shufps for V4SF/V4SI or do a DImode shuffle. */
29556 if ((contents & (h1 | h3)) == contents)
29558 for (i = 0; i < nelt2; ++i)
29561 remap[i + nelt] = i * 2 + 1;
29562 dremap.perm[i * 2] = i;
29563 dremap.perm[i * 2 + 1] = i + nelt;
29566 else if ((contents & (h2 | h4)) == contents)
29568 for (i = 0; i < nelt2; ++i)
29570 remap[i + nelt2] = i * 2;
29571 remap[i + nelt + nelt2] = i * 2 + 1;
29572 dremap.perm[i * 2] = i + nelt2;
29573 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
29576 else if ((contents & (h1 | h4)) == contents)
29578 for (i = 0; i < nelt2; ++i)
29581 remap[i + nelt + nelt2] = i + nelt2;
29582 dremap.perm[i] = i;
29583 dremap.perm[i + nelt2] = i + nelt + nelt2;
29587 dremap.vmode = V2DImode;
29589 dremap.perm[0] = 0;
29590 dremap.perm[1] = 3;
29593 else if ((contents & (h2 | h3)) == contents)
29595 for (i = 0; i < nelt2; ++i)
29597 remap[i + nelt2] = i;
29598 remap[i + nelt] = i + nelt2;
29599 dremap.perm[i] = i + nelt2;
29600 dremap.perm[i + nelt2] = i + nelt;
29604 dremap.vmode = V2DImode;
29606 dremap.perm[0] = 1;
29607 dremap.perm[1] = 2;
29613 /* Use the remapping array set up above to move the elements from their
29614 swizzled locations into their final destinations. */
29616 for (i = 0; i < nelt; ++i)
29618 unsigned e = remap[d->perm[i]];
29619 gcc_assert (e < nelt);
29620 dfinal.perm[i] = e;
29622 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
29623 dfinal.op1 = dfinal.op0;
29624 dremap.target = dfinal.op0;
29626 /* Test if the final remap can be done with a single insn. For V4SFmode or
29627 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
29629 ok = expand_vec_perm_1 (&dfinal);
29630 seq = get_insns ();
29636 if (dremap.vmode != dfinal.vmode)
29638 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
29639 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
29640 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
29643 ok = expand_vec_perm_1 (&dremap);
29650 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
29651 permutation with two pshufb insns and an ior. We should have already
29652 failed all two instruction sequences. */
29655 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
29657 rtx rperm[2][16], vperm, l, h, op, m128;
29658 unsigned int i, nelt, eltsz;
29660 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
29662 gcc_assert (d->op0 != d->op1);
29665 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
29667 /* Generate two permutation masks. If the required element is within
29668 the given vector it is shuffled into the proper lane. If the required
29669 element is in the other vector, force a zero into the lane by setting
29670 bit 7 in the permutation mask. */
29671 m128 = GEN_INT (-128);
29672 for (i = 0; i < nelt; ++i)
29674 unsigned j, e = d->perm[i];
29675 unsigned which = (e >= nelt);
29679 for (j = 0; j < eltsz; ++j)
29681 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
29682 rperm[1-which][i*eltsz + j] = m128;
29686 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
29687 vperm = force_reg (V16QImode, vperm);
29689 l = gen_reg_rtx (V16QImode);
29690 op = gen_lowpart (V16QImode, d->op0);
29691 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
29693 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
29694 vperm = force_reg (V16QImode, vperm);
29696 h = gen_reg_rtx (V16QImode);
29697 op = gen_lowpart (V16QImode, d->op1);
29698 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
29700 op = gen_lowpart (V16QImode, d->target);
29701 emit_insn (gen_iorv16qi3 (op, l, h));
29706 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
29707 and extract-odd permutations. */
29710 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
29712 rtx t1, t2, t3, t4;
29717 t1 = gen_reg_rtx (V4DFmode);
29718 t2 = gen_reg_rtx (V4DFmode);
29720 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
29721 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
29722 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
29724 /* Now an unpck[lh]pd will produce the result required. */
29726 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
29728 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
29734 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
29735 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
29736 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
29738 t1 = gen_reg_rtx (V8SFmode);
29739 t2 = gen_reg_rtx (V8SFmode);
29740 t3 = gen_reg_rtx (V8SFmode);
29741 t4 = gen_reg_rtx (V8SFmode);
29743 /* Shuffle within the 128-bit lanes to produce:
29744 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
29745 expand_vselect (t1, d->op0, perm1, 8);
29746 expand_vselect (t2, d->op1, perm1, 8);
29748 /* Shuffle the lanes around to produce:
29749 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
29750 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
29751 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
29753 /* Now a vpermil2p will produce the result required. */
29754 /* ??? The vpermil2p requires a vector constant. Another option
29755 is a unpck[lh]ps to merge the two vectors to produce
29756 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
29757 vpermilps to get the elements into the final order. */
29760 memcpy (d->perm, odd ? permo: perme, 8);
29761 expand_vec_perm_vpermil (d);
29769 /* These are always directly implementable by expand_vec_perm_1. */
29770 gcc_unreachable ();
29774 return expand_vec_perm_pshufb2 (d);
29777 /* We need 2*log2(N)-1 operations to achieve odd/even
29778 with interleave. */
29779 t1 = gen_reg_rtx (V8HImode);
29780 t2 = gen_reg_rtx (V8HImode);
29781 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
29782 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
29783 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
29784 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
29786 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
29788 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
29795 return expand_vec_perm_pshufb2 (d);
29798 t1 = gen_reg_rtx (V16QImode);
29799 t2 = gen_reg_rtx (V16QImode);
29800 t3 = gen_reg_rtx (V16QImode);
29801 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
29802 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
29803 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
29804 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
29805 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
29806 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
29808 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
29810 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
29816 gcc_unreachable ();
29822 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
29823 extract-even and extract-odd permutations. */
29826 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
29828 unsigned i, odd, nelt = d->nelt;
29831 if (odd != 0 && odd != 1)
29834 for (i = 1; i < nelt; ++i)
29835 if (d->perm[i] != 2 * i + odd)
29838 return expand_vec_perm_even_odd_1 (d, odd);
29841 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
29842 permutations. We assume that expand_vec_perm_1 has already failed. */
29845 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
29847 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
29848 enum machine_mode vmode = d->vmode;
29849 unsigned char perm2[4];
29857 /* These are special-cased in sse.md so that we can optionally
29858 use the vbroadcast instruction. They expand to two insns
29859 if the input happens to be in a register. */
29860 gcc_unreachable ();
29866 /* These are always implementable using standard shuffle patterns. */
29867 gcc_unreachable ();
29871 /* These can be implemented via interleave. We save one insn by
29872 stopping once we have promoted to V4SImode and then use pshufd. */
29875 optab otab = vec_interleave_low_optab;
29879 otab = vec_interleave_high_optab;
29884 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
29885 vmode = get_mode_wider_vector (vmode);
29886 op0 = gen_lowpart (vmode, op0);
29888 while (vmode != V4SImode);
29890 memset (perm2, elt, 4);
29891 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
29896 gcc_unreachable ();
29900 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
29901 broadcast permutations. */
29904 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
29906 unsigned i, elt, nelt = d->nelt;
29908 if (d->op0 != d->op1)
29912 for (i = 1; i < nelt; ++i)
29913 if (d->perm[i] != elt)
29916 return expand_vec_perm_broadcast_1 (d);
29919 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
29920 With all of the interface bits taken care of, perform the expansion
29921 in D and return true on success. */
29924 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
29926 /* Try a single instruction expansion. */
29927 if (expand_vec_perm_1 (d))
29930 /* Try sequences of two instructions. */
29932 if (expand_vec_perm_pshuflw_pshufhw (d))
29935 if (expand_vec_perm_palignr (d))
29938 if (expand_vec_perm_interleave2 (d))
29941 if (expand_vec_perm_broadcast (d))
29944 /* Try sequences of three instructions. */
29946 if (expand_vec_perm_pshufb2 (d))
29949 /* ??? Look for narrow permutations whose element orderings would
29950 allow the promotion to a wider mode. */
29952 /* ??? Look for sequences of interleave or a wider permute that place
29953 the data into the correct lanes for a half-vector shuffle like
29954 pshuf[lh]w or vpermilps. */
29956 /* ??? Look for sequences of interleave that produce the desired results.
29957 The combinatorics of punpck[lh] get pretty ugly... */
29959 if (expand_vec_perm_even_odd (d))
29965 /* Extract the values from the vector CST into the permutation array in D.
29966 Return 0 on error, 1 if all values from the permutation come from the
29967 first vector, 2 if all values from the second vector, and 3 otherwise. */
29970 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
29972 tree list = TREE_VECTOR_CST_ELTS (cst);
29973 unsigned i, nelt = d->nelt;
29976 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
29978 unsigned HOST_WIDE_INT e;
29980 if (!host_integerp (TREE_VALUE (list), 1))
29982 e = tree_low_cst (TREE_VALUE (list), 1);
29986 ret |= (e < nelt ? 1 : 2);
29989 gcc_assert (list == NULL);
29991 /* For all elements from second vector, fold the elements to first. */
29993 for (i = 0; i < nelt; ++i)
29994 d->perm[i] -= nelt;
30000 ix86_expand_vec_perm_builtin (tree exp)
30002 struct expand_vec_perm_d d;
30003 tree arg0, arg1, arg2;
30005 arg0 = CALL_EXPR_ARG (exp, 0);
30006 arg1 = CALL_EXPR_ARG (exp, 1);
30007 arg2 = CALL_EXPR_ARG (exp, 2);
30009 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
30010 d.nelt = GET_MODE_NUNITS (d.vmode);
30011 d.testing_p = false;
30012 gcc_assert (VECTOR_MODE_P (d.vmode));
30014 if (TREE_CODE (arg2) != VECTOR_CST)
30016 error_at (EXPR_LOCATION (exp),
30017 "vector permutation requires vector constant");
30021 switch (extract_vec_perm_cst (&d, arg2))
30027 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
30031 if (!operand_equal_p (arg0, arg1, 0))
30033 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
30034 d.op0 = force_reg (d.vmode, d.op0);
30035 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
30036 d.op1 = force_reg (d.vmode, d.op1);
30040 /* The elements of PERM do not suggest that only the first operand
30041 is used, but both operands are identical. Allow easier matching
30042 of the permutation by folding the permutation into the single
30045 unsigned i, nelt = d.nelt;
30046 for (i = 0; i < nelt; ++i)
30047 if (d.perm[i] >= nelt)
30053 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
30054 d.op0 = force_reg (d.vmode, d.op0);
30059 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
30060 d.op0 = force_reg (d.vmode, d.op0);
30065 d.target = gen_reg_rtx (d.vmode);
30066 if (ix86_expand_vec_perm_builtin_1 (&d))
30069 /* For compiler generated permutations, we should never got here, because
30070 the compiler should also be checking the ok hook. But since this is a
30071 builtin the user has access too, so don't abort. */
30075 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
30078 sorry ("vector permutation (%d %d %d %d)",
30079 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
30082 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
30083 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
30084 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
30087 sorry ("vector permutation "
30088 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
30089 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
30090 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
30091 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
30092 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
30095 gcc_unreachable ();
30098 return CONST0_RTX (d.vmode);
30101 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
30104 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
30106 struct expand_vec_perm_d d;
30110 d.vmode = TYPE_MODE (vec_type);
30111 d.nelt = GET_MODE_NUNITS (d.vmode);
30112 d.testing_p = true;
30114 /* Given sufficient ISA support we can just return true here
30115 for selected vector modes. */
30116 if (GET_MODE_SIZE (d.vmode) == 16)
30118 /* All implementable with a single vpperm insn. */
30121 /* All implementable with 2 pshufb + 1 ior. */
30124 /* All implementable with shufpd or unpck[lh]pd. */
30129 vec_mask = extract_vec_perm_cst (&d, mask);
30131 /* This hook is cannot be called in response to something that the
30132 user does (unlike the builtin expander) so we shouldn't ever see
30133 an error generated from the extract. */
30134 gcc_assert (vec_mask > 0 && vec_mask <= 3);
30135 one_vec = (vec_mask != 3);
30137 /* Implementable with shufps or pshufd. */
30138 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
30141 /* Otherwise we have to go through the motions and see if we can
30142 figure out how to generate the requested permutation. */
30143 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
30144 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
30146 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
30149 ret = ix86_expand_vec_perm_builtin_1 (&d);
30156 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
30158 struct expand_vec_perm_d d;
30164 d.vmode = GET_MODE (targ);
30165 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
30166 d.testing_p = false;
30168 for (i = 0; i < nelt; ++i)
30169 d.perm[i] = i * 2 + odd;
30171 /* We'll either be able to implement the permutation directly... */
30172 if (expand_vec_perm_1 (&d))
30175 /* ... or we use the special-case patterns. */
30176 expand_vec_perm_even_odd_1 (&d, odd);
30179 /* This function returns the calling abi specific va_list type node.
30180 It returns the FNDECL specific va_list type. */
30183 ix86_fn_abi_va_list (tree fndecl)
30186 return va_list_type_node;
30187 gcc_assert (fndecl != NULL_TREE);
30189 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
30190 return ms_va_list_type_node;
30192 return sysv_va_list_type_node;
30195 /* Returns the canonical va_list type specified by TYPE. If there
30196 is no valid TYPE provided, it return NULL_TREE. */
30199 ix86_canonical_va_list_type (tree type)
30203 /* Resolve references and pointers to va_list type. */
30204 if (INDIRECT_REF_P (type))
30205 type = TREE_TYPE (type);
30206 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
30207 type = TREE_TYPE (type);
30211 wtype = va_list_type_node;
30212 gcc_assert (wtype != NULL_TREE);
30214 if (TREE_CODE (wtype) == ARRAY_TYPE)
30216 /* If va_list is an array type, the argument may have decayed
30217 to a pointer type, e.g. by being passed to another function.
30218 In that case, unwrap both types so that we can compare the
30219 underlying records. */
30220 if (TREE_CODE (htype) == ARRAY_TYPE
30221 || POINTER_TYPE_P (htype))
30223 wtype = TREE_TYPE (wtype);
30224 htype = TREE_TYPE (htype);
30227 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30228 return va_list_type_node;
30229 wtype = sysv_va_list_type_node;
30230 gcc_assert (wtype != NULL_TREE);
30232 if (TREE_CODE (wtype) == ARRAY_TYPE)
30234 /* If va_list is an array type, the argument may have decayed
30235 to a pointer type, e.g. by being passed to another function.
30236 In that case, unwrap both types so that we can compare the
30237 underlying records. */
30238 if (TREE_CODE (htype) == ARRAY_TYPE
30239 || POINTER_TYPE_P (htype))
30241 wtype = TREE_TYPE (wtype);
30242 htype = TREE_TYPE (htype);
30245 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30246 return sysv_va_list_type_node;
30247 wtype = ms_va_list_type_node;
30248 gcc_assert (wtype != NULL_TREE);
30250 if (TREE_CODE (wtype) == ARRAY_TYPE)
30252 /* If va_list is an array type, the argument may have decayed
30253 to a pointer type, e.g. by being passed to another function.
30254 In that case, unwrap both types so that we can compare the
30255 underlying records. */
30256 if (TREE_CODE (htype) == ARRAY_TYPE
30257 || POINTER_TYPE_P (htype))
30259 wtype = TREE_TYPE (wtype);
30260 htype = TREE_TYPE (htype);
30263 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
30264 return ms_va_list_type_node;
30267 return std_canonical_va_list_type (type);
30270 /* Iterate through the target-specific builtin types for va_list.
30271 IDX denotes the iterator, *PTREE is set to the result type of
30272 the va_list builtin, and *PNAME to its internal type.
30273 Returns zero if there is no element for this index, otherwise
30274 IDX should be increased upon the next call.
30275 Note, do not iterate a base builtin's name like __builtin_va_list.
30276 Used from c_common_nodes_and_builtins. */
30279 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
30285 *ptree = ms_va_list_type_node;
30286 *pname = "__builtin_ms_va_list";
30289 *ptree = sysv_va_list_type_node;
30290 *pname = "__builtin_sysv_va_list";
30298 /* Initialize the GCC target structure. */
30299 #undef TARGET_RETURN_IN_MEMORY
30300 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
30302 #undef TARGET_LEGITIMIZE_ADDRESS
30303 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
30305 #undef TARGET_ATTRIBUTE_TABLE
30306 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
30307 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30308 # undef TARGET_MERGE_DECL_ATTRIBUTES
30309 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
30312 #undef TARGET_COMP_TYPE_ATTRIBUTES
30313 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
30315 #undef TARGET_INIT_BUILTINS
30316 #define TARGET_INIT_BUILTINS ix86_init_builtins
30317 #undef TARGET_BUILTIN_DECL
30318 #define TARGET_BUILTIN_DECL ix86_builtin_decl
30319 #undef TARGET_EXPAND_BUILTIN
30320 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
30322 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
30323 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
30324 ix86_builtin_vectorized_function
30326 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
30327 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
30329 #undef TARGET_BUILTIN_RECIPROCAL
30330 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
30332 #undef TARGET_ASM_FUNCTION_EPILOGUE
30333 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
30335 #undef TARGET_ENCODE_SECTION_INFO
30336 #ifndef SUBTARGET_ENCODE_SECTION_INFO
30337 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
30339 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
30342 #undef TARGET_ASM_OPEN_PAREN
30343 #define TARGET_ASM_OPEN_PAREN ""
30344 #undef TARGET_ASM_CLOSE_PAREN
30345 #define TARGET_ASM_CLOSE_PAREN ""
30347 #undef TARGET_ASM_BYTE_OP
30348 #define TARGET_ASM_BYTE_OP ASM_BYTE
30350 #undef TARGET_ASM_ALIGNED_HI_OP
30351 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
30352 #undef TARGET_ASM_ALIGNED_SI_OP
30353 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
30355 #undef TARGET_ASM_ALIGNED_DI_OP
30356 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
30359 #undef TARGET_ASM_UNALIGNED_HI_OP
30360 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
30361 #undef TARGET_ASM_UNALIGNED_SI_OP
30362 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
30363 #undef TARGET_ASM_UNALIGNED_DI_OP
30364 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
30366 #undef TARGET_SCHED_ADJUST_COST
30367 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
30368 #undef TARGET_SCHED_ISSUE_RATE
30369 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
30370 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
30371 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
30372 ia32_multipass_dfa_lookahead
30374 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
30375 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
30378 #undef TARGET_HAVE_TLS
30379 #define TARGET_HAVE_TLS true
30381 #undef TARGET_CANNOT_FORCE_CONST_MEM
30382 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
30383 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
30384 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
30386 #undef TARGET_DELEGITIMIZE_ADDRESS
30387 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
30389 #undef TARGET_MS_BITFIELD_LAYOUT_P
30390 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
30393 #undef TARGET_BINDS_LOCAL_P
30394 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
30396 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30397 #undef TARGET_BINDS_LOCAL_P
30398 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
30401 #undef TARGET_ASM_OUTPUT_MI_THUNK
30402 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
30403 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
30404 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
30406 #undef TARGET_ASM_FILE_START
30407 #define TARGET_ASM_FILE_START x86_file_start
30409 #undef TARGET_DEFAULT_TARGET_FLAGS
30410 #define TARGET_DEFAULT_TARGET_FLAGS \
30412 | TARGET_SUBTARGET_DEFAULT \
30413 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
30416 #undef TARGET_HANDLE_OPTION
30417 #define TARGET_HANDLE_OPTION ix86_handle_option
30419 #undef TARGET_RTX_COSTS
30420 #define TARGET_RTX_COSTS ix86_rtx_costs
30421 #undef TARGET_ADDRESS_COST
30422 #define TARGET_ADDRESS_COST ix86_address_cost
30424 #undef TARGET_FIXED_CONDITION_CODE_REGS
30425 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
30426 #undef TARGET_CC_MODES_COMPATIBLE
30427 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
30429 #undef TARGET_MACHINE_DEPENDENT_REORG
30430 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
30432 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
30433 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
30435 #undef TARGET_BUILD_BUILTIN_VA_LIST
30436 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
30438 #undef TARGET_FN_ABI_VA_LIST
30439 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
30441 #undef TARGET_CANONICAL_VA_LIST_TYPE
30442 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
30444 #undef TARGET_EXPAND_BUILTIN_VA_START
30445 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
30447 #undef TARGET_MD_ASM_CLOBBERS
30448 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
30450 #undef TARGET_PROMOTE_PROTOTYPES
30451 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
30452 #undef TARGET_STRUCT_VALUE_RTX
30453 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
30454 #undef TARGET_SETUP_INCOMING_VARARGS
30455 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
30456 #undef TARGET_MUST_PASS_IN_STACK
30457 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
30458 #undef TARGET_PASS_BY_REFERENCE
30459 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
30460 #undef TARGET_INTERNAL_ARG_POINTER
30461 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
30462 #undef TARGET_UPDATE_STACK_BOUNDARY
30463 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
30464 #undef TARGET_GET_DRAP_RTX
30465 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
30466 #undef TARGET_STRICT_ARGUMENT_NAMING
30467 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
30468 #undef TARGET_STATIC_CHAIN
30469 #define TARGET_STATIC_CHAIN ix86_static_chain
30470 #undef TARGET_TRAMPOLINE_INIT
30471 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
30473 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
30474 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
30476 #undef TARGET_SCALAR_MODE_SUPPORTED_P
30477 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
30479 #undef TARGET_VECTOR_MODE_SUPPORTED_P
30480 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
30482 #undef TARGET_C_MODE_FOR_SUFFIX
30483 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
30486 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
30487 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
30490 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30491 #undef TARGET_INSERT_ATTRIBUTES
30492 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30495 #undef TARGET_MANGLE_TYPE
30496 #define TARGET_MANGLE_TYPE ix86_mangle_type
30498 #undef TARGET_STACK_PROTECT_FAIL
30499 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30501 #undef TARGET_FUNCTION_VALUE
30502 #define TARGET_FUNCTION_VALUE ix86_function_value
30504 #undef TARGET_SECONDARY_RELOAD
30505 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30507 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30508 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
30509 ix86_builtin_vectorization_cost
30510 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
30511 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
30512 ix86_vectorize_builtin_vec_perm
30513 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
30514 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
30515 ix86_vectorize_builtin_vec_perm_ok
30517 #undef TARGET_SET_CURRENT_FUNCTION
30518 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30520 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30521 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30523 #undef TARGET_OPTION_SAVE
30524 #define TARGET_OPTION_SAVE ix86_function_specific_save
30526 #undef TARGET_OPTION_RESTORE
30527 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30529 #undef TARGET_OPTION_PRINT
30530 #define TARGET_OPTION_PRINT ix86_function_specific_print
30532 #undef TARGET_CAN_INLINE_P
30533 #define TARGET_CAN_INLINE_P ix86_can_inline_p
30535 #undef TARGET_EXPAND_TO_RTL_HOOK
30536 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30538 #undef TARGET_LEGITIMATE_ADDRESS_P
30539 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
30541 #undef TARGET_IRA_COVER_CLASSES
30542 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
30544 #undef TARGET_FRAME_POINTER_REQUIRED
30545 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
30547 #undef TARGET_CAN_ELIMINATE
30548 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
30550 struct gcc_target targetm = TARGET_INITIALIZER;
30552 #include "gt-i386.h"