1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_SINGLE_POP: Enable if single pop insn is preferred
1484 over esp addition. */
1485 m_386 | m_486 | m_PENT | m_PPRO,
1487 /* X86_TUNE_DOUBLE_POP: Enable if double pop insn is preferred
1488 over esp addition. */
1491 /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
1492 over esp subtraction. */
1493 m_386 | m_486 | m_PENT | m_K6_GEODE,
1495 /* X86_TUNE_DOUBLE_PUSH. Enable if double push insn is preferred
1496 over esp subtraction. */
1497 m_PENT | m_K6_GEODE,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_EXT_80387_CONSTANTS */
1580 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1581 | m_CORE2 | m_GENERIC,
1583 /* X86_TUNE_SHORTEN_X87_SSE */
1586 /* X86_TUNE_AVOID_VECTOR_DECODE */
1589 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1590 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1593 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1594 vector path on AMD machines. */
1595 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1597 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1599 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1601 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1605 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1606 but one byte longer. */
1609 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1610 operand that cannot be represented using a modRM byte. The XOR
1611 replacement is long decoded, so this split helps here as well. */
1614 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1616 m_AMDFAM10 | m_GENERIC,
1618 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1619 from integer to FP. */
1622 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1623 with a subsequent conditional jump instruction into a single
1624 compare-and-branch uop. */
1627 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1628 will impact LEA instruction selection. */
1632 /* Feature tests against the various architecture variations. */
1633 unsigned char ix86_arch_features[X86_ARCH_LAST];
1635 /* Feature tests against the various architecture variations, used to create
1636 ix86_arch_features based on the processor mask. */
1637 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1638 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1639 ~(m_386 | m_486 | m_PENT | m_K6),
1641 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1644 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1647 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1650 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1654 static const unsigned int x86_accumulate_outgoing_args
1655 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1658 static const unsigned int x86_arch_always_fancy_math_387
1659 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1660 | m_NOCONA | m_CORE2 | m_GENERIC;
1662 static enum stringop_alg stringop_alg = no_stringop;
1664 /* In case the average insn count for single function invocation is
1665 lower than this constant, emit fast (but longer) prologue and
1667 #define FAST_PROLOGUE_INSN_COUNT 20
1669 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1670 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1671 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1672 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1674 /* Array of the smallest class containing reg number REGNO, indexed by
1675 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1677 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1679 /* ax, dx, cx, bx */
1680 AREG, DREG, CREG, BREG,
1681 /* si, di, bp, sp */
1682 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1684 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1685 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1688 /* flags, fpsr, fpcr, frame */
1689 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1691 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1694 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1697 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1698 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1699 /* SSE REX registers */
1700 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1704 /* The "default" register map used in 32bit mode. */
1706 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1708 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1709 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1710 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1711 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1712 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1713 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1714 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1717 /* The "default" register map used in 64bit mode. */
1719 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1721 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1722 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1723 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1724 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1725 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1726 8,9,10,11,12,13,14,15, /* extended integer registers */
1727 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1730 /* Define the register numbers to be used in Dwarf debugging information.
1731 The SVR4 reference port C compiler uses the following register numbers
1732 in its Dwarf output code:
1733 0 for %eax (gcc regno = 0)
1734 1 for %ecx (gcc regno = 2)
1735 2 for %edx (gcc regno = 1)
1736 3 for %ebx (gcc regno = 3)
1737 4 for %esp (gcc regno = 7)
1738 5 for %ebp (gcc regno = 6)
1739 6 for %esi (gcc regno = 4)
1740 7 for %edi (gcc regno = 5)
1741 The following three DWARF register numbers are never generated by
1742 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1743 believes these numbers have these meanings.
1744 8 for %eip (no gcc equivalent)
1745 9 for %eflags (gcc regno = 17)
1746 10 for %trapno (no gcc equivalent)
1747 It is not at all clear how we should number the FP stack registers
1748 for the x86 architecture. If the version of SDB on x86/svr4 were
1749 a bit less brain dead with respect to floating-point then we would
1750 have a precedent to follow with respect to DWARF register numbers
1751 for x86 FP registers, but the SDB on x86/svr4 is so completely
1752 broken with respect to FP registers that it is hardly worth thinking
1753 of it as something to strive for compatibility with.
1754 The version of x86/svr4 SDB I have at the moment does (partially)
1755 seem to believe that DWARF register number 11 is associated with
1756 the x86 register %st(0), but that's about all. Higher DWARF
1757 register numbers don't seem to be associated with anything in
1758 particular, and even for DWARF regno 11, SDB only seems to under-
1759 stand that it should say that a variable lives in %st(0) (when
1760 asked via an `=' command) if we said it was in DWARF regno 11,
1761 but SDB still prints garbage when asked for the value of the
1762 variable in question (via a `/' command).
1763 (Also note that the labels SDB prints for various FP stack regs
1764 when doing an `x' command are all wrong.)
1765 Note that these problems generally don't affect the native SVR4
1766 C compiler because it doesn't allow the use of -O with -g and
1767 because when it is *not* optimizing, it allocates a memory
1768 location for each floating-point variable, and the memory
1769 location is what gets described in the DWARF AT_location
1770 attribute for the variable in question.
1771 Regardless of the severe mental illness of the x86/svr4 SDB, we
1772 do something sensible here and we use the following DWARF
1773 register numbers. Note that these are all stack-top-relative
1775 11 for %st(0) (gcc regno = 8)
1776 12 for %st(1) (gcc regno = 9)
1777 13 for %st(2) (gcc regno = 10)
1778 14 for %st(3) (gcc regno = 11)
1779 15 for %st(4) (gcc regno = 12)
1780 16 for %st(5) (gcc regno = 13)
1781 17 for %st(6) (gcc regno = 14)
1782 18 for %st(7) (gcc regno = 15)
1784 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1786 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1787 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1788 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1789 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1790 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1791 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1792 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1795 /* Define parameter passing and return registers. */
1797 static int const x86_64_int_parameter_registers[6] =
1799 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1802 static int const x86_64_ms_abi_int_parameter_registers[4] =
1804 CX_REG, DX_REG, R8_REG, R9_REG
1807 static int const x86_64_int_return_registers[4] =
1809 AX_REG, DX_REG, DI_REG, SI_REG
1812 /* Define the structure for the machine field in struct function. */
1814 struct GTY(()) stack_local_entry {
1815 unsigned short mode;
1818 struct stack_local_entry *next;
1821 /* Structure describing stack frame layout.
1822 Stack grows downward:
1828 saved static chain if ix86_static_chain_on_stack
1830 saved frame pointer if frame_pointer_needed
1831 <- HARD_FRAME_POINTER
1837 <- sse_regs_save_offset
1840 [va_arg registers] |
1844 [padding2] | = to_allocate
1853 int outgoing_arguments_size;
1854 HOST_WIDE_INT frame;
1856 /* The offsets relative to ARG_POINTER. */
1857 HOST_WIDE_INT frame_pointer_offset;
1858 HOST_WIDE_INT hard_frame_pointer_offset;
1859 HOST_WIDE_INT stack_pointer_offset;
1860 HOST_WIDE_INT reg_save_offset;
1861 HOST_WIDE_INT sse_reg_save_offset;
1863 /* When save_regs_using_mov is set, emit prologue using
1864 move instead of push instructions. */
1865 bool save_regs_using_mov;
1868 /* Code model option. */
1869 enum cmodel ix86_cmodel;
1871 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1873 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1875 /* Which unit we are generating floating point math for. */
1876 enum fpmath_unit ix86_fpmath;
1878 /* Which cpu are we scheduling for. */
1879 enum attr_cpu ix86_schedule;
1881 /* Which cpu are we optimizing for. */
1882 enum processor_type ix86_tune;
1884 /* Which instruction set architecture to use. */
1885 enum processor_type ix86_arch;
1887 /* true if sse prefetch instruction is not NOOP. */
1888 int x86_prefetch_sse;
1890 /* ix86_regparm_string as a number */
1891 static int ix86_regparm;
1893 /* -mstackrealign option */
1894 extern int ix86_force_align_arg_pointer;
1895 static const char ix86_force_align_arg_pointer_string[]
1896 = "force_align_arg_pointer";
1898 static rtx (*ix86_gen_leave) (void);
1899 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1900 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1901 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1902 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1903 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1904 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1905 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1906 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1909 /* Preferred alignment for stack boundary in bits. */
1910 unsigned int ix86_preferred_stack_boundary;
1912 /* Alignment for incoming stack boundary in bits specified at
1914 static unsigned int ix86_user_incoming_stack_boundary;
1916 /* Default alignment for incoming stack boundary in bits. */
1917 static unsigned int ix86_default_incoming_stack_boundary;
1919 /* Alignment for incoming stack boundary in bits. */
1920 unsigned int ix86_incoming_stack_boundary;
1922 /* The abi used by target. */
1923 enum calling_abi ix86_abi;
1925 /* Values 1-5: see jump.c */
1926 int ix86_branch_cost;
1928 /* Calling abi specific va_list type nodes. */
1929 static GTY(()) tree sysv_va_list_type_node;
1930 static GTY(()) tree ms_va_list_type_node;
1932 /* Variables which are this size or smaller are put in the data/bss
1933 or ldata/lbss sections. */
1935 int ix86_section_threshold = 65536;
1937 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1938 char internal_label_prefix[16];
1939 int internal_label_prefix_len;
1941 /* Fence to use after loop using movnt. */
1944 /* Register class used for passing given 64bit part of the argument.
1945 These represent classes as documented by the PS ABI, with the exception
1946 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1947 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1949 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1950 whenever possible (upper half does contain padding). */
1951 enum x86_64_reg_class
1954 X86_64_INTEGER_CLASS,
1955 X86_64_INTEGERSI_CLASS,
1962 X86_64_COMPLEX_X87_CLASS,
1966 #define MAX_CLASSES 4
1968 /* Table of constants used by fldpi, fldln2, etc.... */
1969 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1970 static bool ext_80387_constants_init = 0;
1973 static struct machine_function * ix86_init_machine_status (void);
1974 static rtx ix86_function_value (const_tree, const_tree, bool);
1975 static bool ix86_function_value_regno_p (const unsigned int);
1976 static rtx ix86_static_chain (const_tree, bool);
1977 static int ix86_function_regparm (const_tree, const_tree);
1978 static void ix86_compute_frame_layout (struct ix86_frame *);
1979 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1981 static void ix86_add_new_builtins (int);
1982 static rtx ix86_expand_vec_perm_builtin (tree);
1983 static tree ix86_canonical_va_list_type (tree);
1985 enum ix86_function_specific_strings
1987 IX86_FUNCTION_SPECIFIC_ARCH,
1988 IX86_FUNCTION_SPECIFIC_TUNE,
1989 IX86_FUNCTION_SPECIFIC_FPMATH,
1990 IX86_FUNCTION_SPECIFIC_MAX
1993 static char *ix86_target_string (int, int, const char *, const char *,
1994 const char *, bool);
1995 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1996 static void ix86_function_specific_save (struct cl_target_option *);
1997 static void ix86_function_specific_restore (struct cl_target_option *);
1998 static void ix86_function_specific_print (FILE *, int,
1999 struct cl_target_option *);
2000 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2001 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2002 static bool ix86_can_inline_p (tree, tree);
2003 static void ix86_set_current_function (tree);
2004 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2006 static enum calling_abi ix86_function_abi (const_tree);
2009 #ifndef SUBTARGET32_DEFAULT_CPU
2010 #define SUBTARGET32_DEFAULT_CPU "i386"
2013 /* The svr4 ABI for the i386 says that records and unions are returned
2015 #ifndef DEFAULT_PCC_STRUCT_RETURN
2016 #define DEFAULT_PCC_STRUCT_RETURN 1
2019 /* Whether -mtune= or -march= were specified */
2020 static int ix86_tune_defaulted;
2021 static int ix86_arch_specified;
2023 /* Bit flags that specify the ISA we are compiling for. */
2024 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
2026 /* A mask of ix86_isa_flags that includes bit X if X
2027 was set or cleared on the command line. */
2028 static int ix86_isa_flags_explicit;
2030 /* Define a set of ISAs which are available when a given ISA is
2031 enabled. MMX and SSE ISAs are handled separately. */
2033 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2034 #define OPTION_MASK_ISA_3DNOW_SET \
2035 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2037 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2038 #define OPTION_MASK_ISA_SSE2_SET \
2039 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2040 #define OPTION_MASK_ISA_SSE3_SET \
2041 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2042 #define OPTION_MASK_ISA_SSSE3_SET \
2043 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2044 #define OPTION_MASK_ISA_SSE4_1_SET \
2045 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2046 #define OPTION_MASK_ISA_SSE4_2_SET \
2047 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2048 #define OPTION_MASK_ISA_AVX_SET \
2049 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2050 #define OPTION_MASK_ISA_FMA_SET \
2051 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2053 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2055 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2057 #define OPTION_MASK_ISA_SSE4A_SET \
2058 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2059 #define OPTION_MASK_ISA_FMA4_SET \
2060 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2061 | OPTION_MASK_ISA_AVX_SET)
2062 #define OPTION_MASK_ISA_XOP_SET \
2063 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2064 #define OPTION_MASK_ISA_LWP_SET \
2067 /* AES and PCLMUL need SSE2 because they use xmm registers */
2068 #define OPTION_MASK_ISA_AES_SET \
2069 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2070 #define OPTION_MASK_ISA_PCLMUL_SET \
2071 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2073 #define OPTION_MASK_ISA_ABM_SET \
2074 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2076 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2077 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2078 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2079 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2080 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2082 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2083 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2084 #define OPTION_MASK_ISA_F16C_SET \
2085 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2087 /* Define a set of ISAs which aren't available when a given ISA is
2088 disabled. MMX and SSE ISAs are handled separately. */
2090 #define OPTION_MASK_ISA_MMX_UNSET \
2091 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2092 #define OPTION_MASK_ISA_3DNOW_UNSET \
2093 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2094 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2096 #define OPTION_MASK_ISA_SSE_UNSET \
2097 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2098 #define OPTION_MASK_ISA_SSE2_UNSET \
2099 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2100 #define OPTION_MASK_ISA_SSE3_UNSET \
2101 (OPTION_MASK_ISA_SSE3 \
2102 | OPTION_MASK_ISA_SSSE3_UNSET \
2103 | OPTION_MASK_ISA_SSE4A_UNSET )
2104 #define OPTION_MASK_ISA_SSSE3_UNSET \
2105 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2106 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2107 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2108 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2109 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2110 #define OPTION_MASK_ISA_AVX_UNSET \
2111 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2112 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2113 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2115 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2117 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2119 #define OPTION_MASK_ISA_SSE4A_UNSET \
2120 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2122 #define OPTION_MASK_ISA_FMA4_UNSET \
2123 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2124 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2125 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2127 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2128 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2129 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2130 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2131 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2132 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2133 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2134 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2136 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2137 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2138 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2140 /* Vectorization library interface and handlers. */
2141 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2143 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2144 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2146 /* Processor target table, indexed by processor number */
2149 const struct processor_costs *cost; /* Processor costs */
2150 const int align_loop; /* Default alignments. */
2151 const int align_loop_max_skip;
2152 const int align_jump;
2153 const int align_jump_max_skip;
2154 const int align_func;
2157 static const struct ptt processor_target_table[PROCESSOR_max] =
2159 {&i386_cost, 4, 3, 4, 3, 4},
2160 {&i486_cost, 16, 15, 16, 15, 16},
2161 {&pentium_cost, 16, 7, 16, 7, 16},
2162 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2163 {&geode_cost, 0, 0, 0, 0, 0},
2164 {&k6_cost, 32, 7, 32, 7, 32},
2165 {&athlon_cost, 16, 7, 16, 7, 16},
2166 {&pentium4_cost, 0, 0, 0, 0, 0},
2167 {&k8_cost, 16, 7, 16, 7, 16},
2168 {&nocona_cost, 0, 0, 0, 0, 0},
2169 {&core2_cost, 16, 10, 16, 10, 16},
2170 {&generic32_cost, 16, 7, 16, 7, 16},
2171 {&generic64_cost, 16, 10, 16, 10, 16},
2172 {&amdfam10_cost, 32, 24, 32, 7, 32},
2173 {&bdver1_cost, 32, 24, 32, 7, 32},
2174 {&atom_cost, 16, 7, 16, 7, 16}
2177 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2204 /* Return true if a red-zone is in use. */
2207 ix86_using_red_zone (void)
2209 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2212 /* Implement TARGET_HANDLE_OPTION. */
2215 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2222 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2223 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2227 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2228 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2235 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2236 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2240 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2241 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2251 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2252 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2256 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2257 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2264 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2265 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2269 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2270 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2277 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2278 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2282 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2283 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2290 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2291 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2295 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2296 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2303 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2304 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2308 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2309 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2316 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2317 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2321 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2322 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2329 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2330 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2334 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2335 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2342 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2343 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2347 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2348 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2353 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2354 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2358 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2359 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2365 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2366 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2370 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2371 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2378 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2379 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2383 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2384 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2391 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2392 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2396 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2397 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2404 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2405 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2409 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2410 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2417 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2418 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2422 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2423 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2430 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2431 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2435 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2436 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2443 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2444 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2448 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2449 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2456 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2457 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2461 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2462 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2469 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2470 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2474 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2475 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2482 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2483 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2487 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2488 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2495 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2496 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2500 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2501 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2508 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2509 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2513 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2514 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2521 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2522 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2526 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2527 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2534 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2535 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2539 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2540 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2547 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2548 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2552 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2553 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2562 /* Return a string that documents the current -m options. The caller is
2563 responsible for freeing the string. */
2566 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2567 const char *fpmath, bool add_nl_p)
2569 struct ix86_target_opts
2571 const char *option; /* option string */
2572 int mask; /* isa mask options */
2575 /* This table is ordered so that options like -msse4.2 that imply
2576 preceding options while match those first. */
2577 static struct ix86_target_opts isa_opts[] =
2579 { "-m64", OPTION_MASK_ISA_64BIT },
2580 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2581 { "-mfma", OPTION_MASK_ISA_FMA },
2582 { "-mxop", OPTION_MASK_ISA_XOP },
2583 { "-mlwp", OPTION_MASK_ISA_LWP },
2584 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2585 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2586 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2587 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2588 { "-msse3", OPTION_MASK_ISA_SSE3 },
2589 { "-msse2", OPTION_MASK_ISA_SSE2 },
2590 { "-msse", OPTION_MASK_ISA_SSE },
2591 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2592 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2593 { "-mmmx", OPTION_MASK_ISA_MMX },
2594 { "-mabm", OPTION_MASK_ISA_ABM },
2595 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2596 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2597 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2598 { "-maes", OPTION_MASK_ISA_AES },
2599 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2600 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2601 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2602 { "-mf16c", OPTION_MASK_ISA_F16C },
2606 static struct ix86_target_opts flag_opts[] =
2608 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2609 { "-m80387", MASK_80387 },
2610 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2611 { "-malign-double", MASK_ALIGN_DOUBLE },
2612 { "-mcld", MASK_CLD },
2613 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2614 { "-mieee-fp", MASK_IEEE_FP },
2615 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2616 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2617 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2618 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2619 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2620 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2621 { "-mno-red-zone", MASK_NO_RED_ZONE },
2622 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2623 { "-mrecip", MASK_RECIP },
2624 { "-mrtd", MASK_RTD },
2625 { "-msseregparm", MASK_SSEREGPARM },
2626 { "-mstack-arg-probe", MASK_STACK_PROBE },
2627 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2630 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2633 char target_other[40];
2642 memset (opts, '\0', sizeof (opts));
2644 /* Add -march= option. */
2647 opts[num][0] = "-march=";
2648 opts[num++][1] = arch;
2651 /* Add -mtune= option. */
2654 opts[num][0] = "-mtune=";
2655 opts[num++][1] = tune;
2658 /* Pick out the options in isa options. */
2659 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2661 if ((isa & isa_opts[i].mask) != 0)
2663 opts[num++][0] = isa_opts[i].option;
2664 isa &= ~ isa_opts[i].mask;
2668 if (isa && add_nl_p)
2670 opts[num++][0] = isa_other;
2671 sprintf (isa_other, "(other isa: %#x)", isa);
2674 /* Add flag options. */
2675 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2677 if ((flags & flag_opts[i].mask) != 0)
2679 opts[num++][0] = flag_opts[i].option;
2680 flags &= ~ flag_opts[i].mask;
2684 if (flags && add_nl_p)
2686 opts[num++][0] = target_other;
2687 sprintf (target_other, "(other flags: %#x)", flags);
2690 /* Add -fpmath= option. */
2693 opts[num][0] = "-mfpmath=";
2694 opts[num++][1] = fpmath;
2701 gcc_assert (num < ARRAY_SIZE (opts));
2703 /* Size the string. */
2705 sep_len = (add_nl_p) ? 3 : 1;
2706 for (i = 0; i < num; i++)
2709 for (j = 0; j < 2; j++)
2711 len += strlen (opts[i][j]);
2714 /* Build the string. */
2715 ret = ptr = (char *) xmalloc (len);
2718 for (i = 0; i < num; i++)
2722 for (j = 0; j < 2; j++)
2723 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2730 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2738 for (j = 0; j < 2; j++)
2741 memcpy (ptr, opts[i][j], len2[j]);
2743 line_len += len2[j];
2748 gcc_assert (ret + len >= ptr);
2753 /* Return TRUE if software prefetching is beneficial for the
2757 software_prefetching_beneficial_p (void)
2761 case PROCESSOR_GEODE:
2763 case PROCESSOR_ATHLON:
2765 case PROCESSOR_AMDFAM10:
2773 /* Return true, if profiling code should be emitted before
2774 prologue. Otherwise it returns false.
2775 Note: For x86 with "hotfix" it is sorried. */
2777 ix86_profile_before_prologue (void)
2779 return flag_fentry != 0;
2782 /* Function that is callable from the debugger to print the current
2785 ix86_debug_options (void)
2787 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2788 ix86_arch_string, ix86_tune_string,
2789 ix86_fpmath_string, true);
2793 fprintf (stderr, "%s\n\n", opts);
2797 fputs ("<no options>\n\n", stderr);
2802 /* Sometimes certain combinations of command options do not make
2803 sense on a particular target machine. You can define a macro
2804 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2805 defined, is executed once just after all the command options have
2808 Don't use this macro to turn on various extra optimizations for
2809 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2812 override_options (bool main_args_p)
2815 unsigned int ix86_arch_mask, ix86_tune_mask;
2816 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2821 /* Comes from final.c -- no real reason to change it. */
2822 #define MAX_CODE_ALIGN 16
2830 PTA_PREFETCH_SSE = 1 << 4,
2832 PTA_3DNOW_A = 1 << 6,
2836 PTA_POPCNT = 1 << 10,
2838 PTA_SSE4A = 1 << 12,
2839 PTA_NO_SAHF = 1 << 13,
2840 PTA_SSE4_1 = 1 << 14,
2841 PTA_SSE4_2 = 1 << 15,
2843 PTA_PCLMUL = 1 << 17,
2846 PTA_MOVBE = 1 << 20,
2850 PTA_FSGSBASE = 1 << 24,
2851 PTA_RDRND = 1 << 25,
2857 const char *const name; /* processor name or nickname. */
2858 const enum processor_type processor;
2859 const enum attr_cpu schedule;
2860 const unsigned /*enum pta_flags*/ flags;
2862 const processor_alias_table[] =
2864 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2865 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2866 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2867 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2868 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2869 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2870 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2871 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2872 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2873 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2874 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2875 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2876 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2878 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2880 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2881 PTA_MMX | PTA_SSE | PTA_SSE2},
2882 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2883 PTA_MMX |PTA_SSE | PTA_SSE2},
2884 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2885 PTA_MMX | PTA_SSE | PTA_SSE2},
2886 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2887 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2888 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2889 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2890 | PTA_CX16 | PTA_NO_SAHF},
2891 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2892 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2893 | PTA_SSSE3 | PTA_CX16},
2894 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2895 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2896 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2897 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2898 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2899 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2900 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2901 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2902 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2903 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2904 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2905 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2906 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2907 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2908 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2909 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2910 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2911 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2912 {"x86-64", PROCESSOR_K8, CPU_K8,
2913 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2914 {"k8", PROCESSOR_K8, CPU_K8,
2915 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2916 | PTA_SSE2 | PTA_NO_SAHF},
2917 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2918 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2919 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2920 {"opteron", PROCESSOR_K8, CPU_K8,
2921 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2922 | PTA_SSE2 | PTA_NO_SAHF},
2923 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2924 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2925 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2926 {"athlon64", PROCESSOR_K8, CPU_K8,
2927 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2928 | PTA_SSE2 | PTA_NO_SAHF},
2929 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2930 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2931 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2932 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2933 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2934 | PTA_SSE2 | PTA_NO_SAHF},
2935 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2936 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2937 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2938 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2939 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2940 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2941 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2942 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2943 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2944 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2945 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2946 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2947 0 /* flags are only used for -march switch. */ },
2948 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2949 PTA_64BIT /* flags are only used for -march switch. */ },
2952 int const pta_size = ARRAY_SIZE (processor_alias_table);
2954 /* Set up prefix/suffix so the error messages refer to either the command
2955 line argument, or the attribute(target). */
2964 prefix = "option(\"";
2969 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2970 SUBTARGET_OVERRIDE_OPTIONS;
2973 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2974 SUBSUBTARGET_OVERRIDE_OPTIONS;
2977 /* -fPIC is the default for x86_64. */
2978 if (TARGET_MACHO && TARGET_64BIT)
2981 /* Need to check -mtune=generic first. */
2982 if (ix86_tune_string)
2984 if (!strcmp (ix86_tune_string, "generic")
2985 || !strcmp (ix86_tune_string, "i686")
2986 /* As special support for cross compilers we read -mtune=native
2987 as -mtune=generic. With native compilers we won't see the
2988 -mtune=native, as it was changed by the driver. */
2989 || !strcmp (ix86_tune_string, "native"))
2992 ix86_tune_string = "generic64";
2994 ix86_tune_string = "generic32";
2996 /* If this call is for setting the option attribute, allow the
2997 generic32/generic64 that was previously set. */
2998 else if (!main_args_p
2999 && (!strcmp (ix86_tune_string, "generic32")
3000 || !strcmp (ix86_tune_string, "generic64")))
3002 else if (!strncmp (ix86_tune_string, "generic", 7))
3003 error ("bad value (%s) for %stune=%s %s",
3004 ix86_tune_string, prefix, suffix, sw);
3005 else if (!strcmp (ix86_tune_string, "x86-64"))
3006 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3007 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3008 prefix, suffix, prefix, suffix, prefix, suffix);
3012 if (ix86_arch_string)
3013 ix86_tune_string = ix86_arch_string;
3014 if (!ix86_tune_string)
3016 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3017 ix86_tune_defaulted = 1;
3020 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3021 need to use a sensible tune option. */
3022 if (!strcmp (ix86_tune_string, "generic")
3023 || !strcmp (ix86_tune_string, "x86-64")
3024 || !strcmp (ix86_tune_string, "i686"))
3027 ix86_tune_string = "generic64";
3029 ix86_tune_string = "generic32";
3033 if (ix86_stringop_string)
3035 if (!strcmp (ix86_stringop_string, "rep_byte"))
3036 stringop_alg = rep_prefix_1_byte;
3037 else if (!strcmp (ix86_stringop_string, "libcall"))
3038 stringop_alg = libcall;
3039 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3040 stringop_alg = rep_prefix_4_byte;
3041 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3043 /* rep; movq isn't available in 32-bit code. */
3044 stringop_alg = rep_prefix_8_byte;
3045 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3046 stringop_alg = loop_1_byte;
3047 else if (!strcmp (ix86_stringop_string, "loop"))
3048 stringop_alg = loop;
3049 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3050 stringop_alg = unrolled_loop;
3052 error ("bad value (%s) for %sstringop-strategy=%s %s",
3053 ix86_stringop_string, prefix, suffix, sw);
3056 if (!ix86_arch_string)
3057 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3059 ix86_arch_specified = 1;
3061 /* Validate -mabi= value. */
3062 if (ix86_abi_string)
3064 if (strcmp (ix86_abi_string, "sysv") == 0)
3065 ix86_abi = SYSV_ABI;
3066 else if (strcmp (ix86_abi_string, "ms") == 0)
3069 error ("unknown ABI (%s) for %sabi=%s %s",
3070 ix86_abi_string, prefix, suffix, sw);
3073 ix86_abi = DEFAULT_ABI;
3075 if (ix86_cmodel_string != 0)
3077 if (!strcmp (ix86_cmodel_string, "small"))
3078 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3079 else if (!strcmp (ix86_cmodel_string, "medium"))
3080 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3081 else if (!strcmp (ix86_cmodel_string, "large"))
3082 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3084 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3085 else if (!strcmp (ix86_cmodel_string, "32"))
3086 ix86_cmodel = CM_32;
3087 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3088 ix86_cmodel = CM_KERNEL;
3090 error ("bad value (%s) for %scmodel=%s %s",
3091 ix86_cmodel_string, prefix, suffix, sw);
3095 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3096 use of rip-relative addressing. This eliminates fixups that
3097 would otherwise be needed if this object is to be placed in a
3098 DLL, and is essentially just as efficient as direct addressing. */
3099 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3100 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3101 else if (TARGET_64BIT)
3102 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3104 ix86_cmodel = CM_32;
3106 if (ix86_asm_string != 0)
3109 && !strcmp (ix86_asm_string, "intel"))
3110 ix86_asm_dialect = ASM_INTEL;
3111 else if (!strcmp (ix86_asm_string, "att"))
3112 ix86_asm_dialect = ASM_ATT;
3114 error ("bad value (%s) for %sasm=%s %s",
3115 ix86_asm_string, prefix, suffix, sw);
3117 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3118 error ("code model %qs not supported in the %s bit mode",
3119 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3120 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3121 sorry ("%i-bit mode not compiled in",
3122 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3124 for (i = 0; i < pta_size; i++)
3125 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3127 ix86_schedule = processor_alias_table[i].schedule;
3128 ix86_arch = processor_alias_table[i].processor;
3129 /* Default cpu tuning to the architecture. */
3130 ix86_tune = ix86_arch;
3132 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3133 error ("CPU you selected does not support x86-64 "
3136 if (processor_alias_table[i].flags & PTA_MMX
3137 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3138 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3139 if (processor_alias_table[i].flags & PTA_3DNOW
3140 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3141 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3142 if (processor_alias_table[i].flags & PTA_3DNOW_A
3143 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3144 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3145 if (processor_alias_table[i].flags & PTA_SSE
3146 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3147 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3148 if (processor_alias_table[i].flags & PTA_SSE2
3149 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3150 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3151 if (processor_alias_table[i].flags & PTA_SSE3
3152 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3153 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3154 if (processor_alias_table[i].flags & PTA_SSSE3
3155 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3156 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3157 if (processor_alias_table[i].flags & PTA_SSE4_1
3158 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3159 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3160 if (processor_alias_table[i].flags & PTA_SSE4_2
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3162 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3163 if (processor_alias_table[i].flags & PTA_AVX
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3165 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3166 if (processor_alias_table[i].flags & PTA_FMA
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3168 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3169 if (processor_alias_table[i].flags & PTA_SSE4A
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3171 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3172 if (processor_alias_table[i].flags & PTA_FMA4
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3174 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3175 if (processor_alias_table[i].flags & PTA_XOP
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3177 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3178 if (processor_alias_table[i].flags & PTA_LWP
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3180 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3181 if (processor_alias_table[i].flags & PTA_ABM
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3183 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3184 if (processor_alias_table[i].flags & PTA_CX16
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3186 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3187 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3189 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3190 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3192 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3193 if (processor_alias_table[i].flags & PTA_MOVBE
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3195 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3196 if (processor_alias_table[i].flags & PTA_AES
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3198 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3199 if (processor_alias_table[i].flags & PTA_PCLMUL
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3201 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3202 if (processor_alias_table[i].flags & PTA_FSGSBASE
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3204 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3205 if (processor_alias_table[i].flags & PTA_RDRND
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3207 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3208 if (processor_alias_table[i].flags & PTA_F16C
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3210 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3211 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3212 x86_prefetch_sse = true;
3217 if (!strcmp (ix86_arch_string, "generic"))
3218 error ("generic CPU can be used only for %stune=%s %s",
3219 prefix, suffix, sw);
3220 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3221 error ("bad value (%s) for %sarch=%s %s",
3222 ix86_arch_string, prefix, suffix, sw);
3224 ix86_arch_mask = 1u << ix86_arch;
3225 for (i = 0; i < X86_ARCH_LAST; ++i)
3226 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3228 for (i = 0; i < pta_size; i++)
3229 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3231 ix86_schedule = processor_alias_table[i].schedule;
3232 ix86_tune = processor_alias_table[i].processor;
3233 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3235 if (ix86_tune_defaulted)
3237 ix86_tune_string = "x86-64";
3238 for (i = 0; i < pta_size; i++)
3239 if (! strcmp (ix86_tune_string,
3240 processor_alias_table[i].name))
3242 ix86_schedule = processor_alias_table[i].schedule;
3243 ix86_tune = processor_alias_table[i].processor;
3246 error ("CPU you selected does not support x86-64 "
3249 /* Intel CPUs have always interpreted SSE prefetch instructions as
3250 NOPs; so, we can enable SSE prefetch instructions even when
3251 -mtune (rather than -march) points us to a processor that has them.
3252 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3253 higher processors. */
3255 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3256 x86_prefetch_sse = true;
3260 if (ix86_tune_specified && i == pta_size)
3261 error ("bad value (%s) for %stune=%s %s",
3262 ix86_tune_string, prefix, suffix, sw);
3264 ix86_tune_mask = 1u << ix86_tune;
3265 for (i = 0; i < X86_TUNE_LAST; ++i)
3266 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3268 #ifndef USE_IX86_FRAME_POINTER
3269 #define USE_IX86_FRAME_POINTER 0
3272 /* Set the default values for switches whose default depends on TARGET_64BIT
3273 in case they weren't overwritten by command line options. */
3278 if (flag_omit_frame_pointer == 2)
3279 flag_omit_frame_pointer = 1;
3280 if (flag_asynchronous_unwind_tables == 2)
3281 flag_asynchronous_unwind_tables = 1;
3282 if (flag_pcc_struct_return == 2)
3283 flag_pcc_struct_return = 0;
3289 if (flag_omit_frame_pointer == 2)
3290 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3291 if (flag_asynchronous_unwind_tables == 2)
3292 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3293 if (flag_pcc_struct_return == 2)
3294 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3298 ix86_cost = &ix86_size_cost;
3300 ix86_cost = processor_target_table[ix86_tune].cost;
3302 /* Arrange to set up i386_stack_locals for all functions. */
3303 init_machine_status = ix86_init_machine_status;
3305 /* Validate -mregparm= value. */
3306 if (ix86_regparm_string)
3309 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3310 i = atoi (ix86_regparm_string);
3311 if (i < 0 || i > REGPARM_MAX)
3312 error ("%sregparm=%d%s is not between 0 and %d",
3313 prefix, i, suffix, REGPARM_MAX);
3318 ix86_regparm = REGPARM_MAX;
3320 /* If the user has provided any of the -malign-* options,
3321 warn and use that value only if -falign-* is not set.
3322 Remove this code in GCC 3.2 or later. */
3323 if (ix86_align_loops_string)
3325 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3326 prefix, suffix, suffix);
3327 if (align_loops == 0)
3329 i = atoi (ix86_align_loops_string);
3330 if (i < 0 || i > MAX_CODE_ALIGN)
3331 error ("%salign-loops=%d%s is not between 0 and %d",
3332 prefix, i, suffix, MAX_CODE_ALIGN);
3334 align_loops = 1 << i;
3338 if (ix86_align_jumps_string)
3340 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3341 prefix, suffix, suffix);
3342 if (align_jumps == 0)
3344 i = atoi (ix86_align_jumps_string);
3345 if (i < 0 || i > MAX_CODE_ALIGN)
3346 error ("%salign-loops=%d%s is not between 0 and %d",
3347 prefix, i, suffix, MAX_CODE_ALIGN);
3349 align_jumps = 1 << i;
3353 if (ix86_align_funcs_string)
3355 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3356 prefix, suffix, suffix);
3357 if (align_functions == 0)
3359 i = atoi (ix86_align_funcs_string);
3360 if (i < 0 || i > MAX_CODE_ALIGN)
3361 error ("%salign-loops=%d%s is not between 0 and %d",
3362 prefix, i, suffix, MAX_CODE_ALIGN);
3364 align_functions = 1 << i;
3368 /* Default align_* from the processor table. */
3369 if (align_loops == 0)
3371 align_loops = processor_target_table[ix86_tune].align_loop;
3372 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3374 if (align_jumps == 0)
3376 align_jumps = processor_target_table[ix86_tune].align_jump;
3377 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3379 if (align_functions == 0)
3381 align_functions = processor_target_table[ix86_tune].align_func;
3384 /* Validate -mbranch-cost= value, or provide default. */
3385 ix86_branch_cost = ix86_cost->branch_cost;
3386 if (ix86_branch_cost_string)
3388 i = atoi (ix86_branch_cost_string);
3390 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3392 ix86_branch_cost = i;
3394 if (ix86_section_threshold_string)
3396 i = atoi (ix86_section_threshold_string);
3398 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3400 ix86_section_threshold = i;
3403 if (ix86_tls_dialect_string)
3405 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3406 ix86_tls_dialect = TLS_DIALECT_GNU;
3407 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3408 ix86_tls_dialect = TLS_DIALECT_GNU2;
3410 error ("bad value (%s) for %stls-dialect=%s %s",
3411 ix86_tls_dialect_string, prefix, suffix, sw);
3414 if (ix87_precision_string)
3416 i = atoi (ix87_precision_string);
3417 if (i != 32 && i != 64 && i != 80)
3418 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3423 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3425 /* Enable by default the SSE and MMX builtins. Do allow the user to
3426 explicitly disable any of these. In particular, disabling SSE and
3427 MMX for kernel code is extremely useful. */
3428 if (!ix86_arch_specified)
3430 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3431 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3434 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3438 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3440 if (!ix86_arch_specified)
3442 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3444 /* i386 ABI does not specify red zone. It still makes sense to use it
3445 when programmer takes care to stack from being destroyed. */
3446 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3447 target_flags |= MASK_NO_RED_ZONE;
3450 /* Keep nonleaf frame pointers. */
3451 if (flag_omit_frame_pointer)
3452 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3453 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3454 flag_omit_frame_pointer = 1;
3456 /* If we're doing fast math, we don't care about comparison order
3457 wrt NaNs. This lets us use a shorter comparison sequence. */
3458 if (flag_finite_math_only)
3459 target_flags &= ~MASK_IEEE_FP;
3461 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3462 since the insns won't need emulation. */
3463 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3464 target_flags &= ~MASK_NO_FANCY_MATH_387;
3466 /* Likewise, if the target doesn't have a 387, or we've specified
3467 software floating point, don't use 387 inline intrinsics. */
3469 target_flags |= MASK_NO_FANCY_MATH_387;
3471 /* Turn on MMX builtins for -msse. */
3474 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3475 x86_prefetch_sse = true;
3478 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3479 if (TARGET_SSE4_2 || TARGET_ABM)
3480 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3482 /* Validate -mpreferred-stack-boundary= value or default it to
3483 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3484 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3485 if (ix86_preferred_stack_boundary_string)
3487 i = atoi (ix86_preferred_stack_boundary_string);
3488 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3489 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3490 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3492 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3495 /* Set the default value for -mstackrealign. */
3496 if (ix86_force_align_arg_pointer == -1)
3497 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3499 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3501 /* Validate -mincoming-stack-boundary= value or default it to
3502 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3503 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3504 if (ix86_incoming_stack_boundary_string)
3506 i = atoi (ix86_incoming_stack_boundary_string);
3507 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3508 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3509 i, TARGET_64BIT ? 4 : 2);
3512 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3513 ix86_incoming_stack_boundary
3514 = ix86_user_incoming_stack_boundary;
3518 /* Accept -msseregparm only if at least SSE support is enabled. */
3519 if (TARGET_SSEREGPARM
3521 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3523 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3524 if (ix86_fpmath_string != 0)
3526 if (! strcmp (ix86_fpmath_string, "387"))
3527 ix86_fpmath = FPMATH_387;
3528 else if (! strcmp (ix86_fpmath_string, "sse"))
3532 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3533 ix86_fpmath = FPMATH_387;
3536 ix86_fpmath = FPMATH_SSE;
3538 else if (! strcmp (ix86_fpmath_string, "387,sse")
3539 || ! strcmp (ix86_fpmath_string, "387+sse")
3540 || ! strcmp (ix86_fpmath_string, "sse,387")
3541 || ! strcmp (ix86_fpmath_string, "sse+387")
3542 || ! strcmp (ix86_fpmath_string, "both"))
3546 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3547 ix86_fpmath = FPMATH_387;
3549 else if (!TARGET_80387)
3551 warning (0, "387 instruction set disabled, using SSE arithmetics");
3552 ix86_fpmath = FPMATH_SSE;
3555 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3558 error ("bad value (%s) for %sfpmath=%s %s",
3559 ix86_fpmath_string, prefix, suffix, sw);
3562 /* If the i387 is disabled, then do not return values in it. */
3564 target_flags &= ~MASK_FLOAT_RETURNS;
3566 /* Use external vectorized library in vectorizing intrinsics. */
3567 if (ix86_veclibabi_string)
3569 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3570 ix86_veclib_handler = ix86_veclibabi_svml;
3571 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3572 ix86_veclib_handler = ix86_veclibabi_acml;
3574 error ("unknown vectorization library ABI type (%s) for "
3575 "%sveclibabi=%s %s", ix86_veclibabi_string,
3576 prefix, suffix, sw);
3579 if ((!USE_IX86_FRAME_POINTER
3580 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3581 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3583 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3585 /* ??? Unwind info is not correct around the CFG unless either a frame
3586 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3587 unwind info generation to be aware of the CFG and propagating states
3589 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3590 || flag_exceptions || flag_non_call_exceptions)
3591 && flag_omit_frame_pointer
3592 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3594 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3595 warning (0, "unwind tables currently require either a frame pointer "
3596 "or %saccumulate-outgoing-args%s for correctness",
3598 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3601 /* If stack probes are required, the space used for large function
3602 arguments on the stack must also be probed, so enable
3603 -maccumulate-outgoing-args so this happens in the prologue. */
3604 if (TARGET_STACK_PROBE
3605 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3607 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3608 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3609 "for correctness", prefix, suffix);
3610 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3613 /* For sane SSE instruction set generation we need fcomi instruction.
3614 It is safe to enable all CMOVE instructions. */
3618 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3621 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3622 p = strchr (internal_label_prefix, 'X');
3623 internal_label_prefix_len = p - internal_label_prefix;
3627 /* When scheduling description is not available, disable scheduler pass
3628 so it won't slow down the compilation and make x87 code slower. */
3629 if (!TARGET_SCHEDULE)
3630 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3632 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3633 set_param_value ("simultaneous-prefetches",
3634 ix86_cost->simultaneous_prefetches);
3635 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3636 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3637 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3638 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3639 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3640 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3642 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3643 if (flag_prefetch_loop_arrays < 0
3646 && software_prefetching_beneficial_p ())
3647 flag_prefetch_loop_arrays = 1;
3649 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3650 can be optimized to ap = __builtin_next_arg (0). */
3652 targetm.expand_builtin_va_start = NULL;
3656 ix86_gen_leave = gen_leave_rex64;
3657 ix86_gen_add3 = gen_adddi3;
3658 ix86_gen_sub3 = gen_subdi3;
3659 ix86_gen_sub3_carry = gen_subdi3_carry;
3660 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3661 ix86_gen_monitor = gen_sse3_monitor64;
3662 ix86_gen_andsp = gen_anddi3;
3663 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
3664 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3665 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3669 ix86_gen_leave = gen_leave;
3670 ix86_gen_add3 = gen_addsi3;
3671 ix86_gen_sub3 = gen_subsi3;
3672 ix86_gen_sub3_carry = gen_subsi3_carry;
3673 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3674 ix86_gen_monitor = gen_sse3_monitor;
3675 ix86_gen_andsp = gen_andsi3;
3676 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
3677 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3678 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3682 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3684 target_flags |= MASK_CLD & ~target_flags_explicit;
3687 if (!TARGET_64BIT && flag_pic)
3689 if (flag_fentry > 0)
3690 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3693 if (flag_fentry < 0)
3695 #if defined(PROFILE_BEFORE_PROLOGUE)
3702 /* Save the initial options in case the user does function specific options */
3704 target_option_default_node = target_option_current_node
3705 = build_target_option_node ();
3708 /* Update register usage after having seen the compiler flags. */
3711 ix86_conditional_register_usage (void)
3716 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3718 if (fixed_regs[i] > 1)
3719 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3720 if (call_used_regs[i] > 1)
3721 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3724 /* The PIC register, if it exists, is fixed. */
3725 j = PIC_OFFSET_TABLE_REGNUM;
3726 if (j != INVALID_REGNUM)
3727 fixed_regs[j] = call_used_regs[j] = 1;
3729 /* The MS_ABI changes the set of call-used registers. */
3730 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3732 call_used_regs[SI_REG] = 0;
3733 call_used_regs[DI_REG] = 0;
3734 call_used_regs[XMM6_REG] = 0;
3735 call_used_regs[XMM7_REG] = 0;
3736 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3737 call_used_regs[i] = 0;
3740 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3741 other call-clobbered regs for 64-bit. */
3744 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3746 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3747 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3748 && call_used_regs[i])
3749 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3752 /* If MMX is disabled, squash the registers. */
3754 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3755 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3756 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3758 /* If SSE is disabled, squash the registers. */
3760 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3761 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3762 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3764 /* If the FPU is disabled, squash the registers. */
3765 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3766 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3767 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3768 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3770 /* If 32-bit, squash the 64-bit registers. */
3773 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3775 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3781 /* Save the current options */
3784 ix86_function_specific_save (struct cl_target_option *ptr)
3786 ptr->arch = ix86_arch;
3787 ptr->schedule = ix86_schedule;
3788 ptr->tune = ix86_tune;
3789 ptr->fpmath = ix86_fpmath;
3790 ptr->branch_cost = ix86_branch_cost;
3791 ptr->tune_defaulted = ix86_tune_defaulted;
3792 ptr->arch_specified = ix86_arch_specified;
3793 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3794 ptr->target_flags_explicit = target_flags_explicit;
3796 /* The fields are char but the variables are not; make sure the
3797 values fit in the fields. */
3798 gcc_assert (ptr->arch == ix86_arch);
3799 gcc_assert (ptr->schedule == ix86_schedule);
3800 gcc_assert (ptr->tune == ix86_tune);
3801 gcc_assert (ptr->fpmath == ix86_fpmath);
3802 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3805 /* Restore the current options */
3808 ix86_function_specific_restore (struct cl_target_option *ptr)
3810 enum processor_type old_tune = ix86_tune;
3811 enum processor_type old_arch = ix86_arch;
3812 unsigned int ix86_arch_mask, ix86_tune_mask;
3815 ix86_arch = (enum processor_type) ptr->arch;
3816 ix86_schedule = (enum attr_cpu) ptr->schedule;
3817 ix86_tune = (enum processor_type) ptr->tune;
3818 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3819 ix86_branch_cost = ptr->branch_cost;
3820 ix86_tune_defaulted = ptr->tune_defaulted;
3821 ix86_arch_specified = ptr->arch_specified;
3822 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3823 target_flags_explicit = ptr->target_flags_explicit;
3825 /* Recreate the arch feature tests if the arch changed */
3826 if (old_arch != ix86_arch)
3828 ix86_arch_mask = 1u << ix86_arch;
3829 for (i = 0; i < X86_ARCH_LAST; ++i)
3830 ix86_arch_features[i]
3831 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3834 /* Recreate the tune optimization tests */
3835 if (old_tune != ix86_tune)
3837 ix86_tune_mask = 1u << ix86_tune;
3838 for (i = 0; i < X86_TUNE_LAST; ++i)
3839 ix86_tune_features[i]
3840 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3844 /* Print the current options */
3847 ix86_function_specific_print (FILE *file, int indent,
3848 struct cl_target_option *ptr)
3851 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3852 NULL, NULL, NULL, false);
3854 fprintf (file, "%*sarch = %d (%s)\n",
3857 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3858 ? cpu_names[ptr->arch]
3861 fprintf (file, "%*stune = %d (%s)\n",
3864 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3865 ? cpu_names[ptr->tune]
3868 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3869 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3870 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3871 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3875 fprintf (file, "%*s%s\n", indent, "", target_string);
3876 free (target_string);
3881 /* Inner function to process the attribute((target(...))), take an argument and
3882 set the current options from the argument. If we have a list, recursively go
3886 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3891 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3892 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3893 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3894 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3909 enum ix86_opt_type type;
3914 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3915 IX86_ATTR_ISA ("abm", OPT_mabm),
3916 IX86_ATTR_ISA ("aes", OPT_maes),
3917 IX86_ATTR_ISA ("avx", OPT_mavx),
3918 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3919 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3920 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3921 IX86_ATTR_ISA ("sse", OPT_msse),
3922 IX86_ATTR_ISA ("sse2", OPT_msse2),
3923 IX86_ATTR_ISA ("sse3", OPT_msse3),
3924 IX86_ATTR_ISA ("sse4", OPT_msse4),
3925 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3926 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3927 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3928 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3929 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3930 IX86_ATTR_ISA ("xop", OPT_mxop),
3931 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3932 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3933 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3934 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3936 /* string options */
3937 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3938 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3939 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3942 IX86_ATTR_YES ("cld",
3946 IX86_ATTR_NO ("fancy-math-387",
3947 OPT_mfancy_math_387,
3948 MASK_NO_FANCY_MATH_387),
3950 IX86_ATTR_YES ("ieee-fp",
3954 IX86_ATTR_YES ("inline-all-stringops",
3955 OPT_minline_all_stringops,
3956 MASK_INLINE_ALL_STRINGOPS),
3958 IX86_ATTR_YES ("inline-stringops-dynamically",
3959 OPT_minline_stringops_dynamically,
3960 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3962 IX86_ATTR_NO ("align-stringops",
3963 OPT_mno_align_stringops,
3964 MASK_NO_ALIGN_STRINGOPS),
3966 IX86_ATTR_YES ("recip",
3972 /* If this is a list, recurse to get the options. */
3973 if (TREE_CODE (args) == TREE_LIST)
3977 for (; args; args = TREE_CHAIN (args))
3978 if (TREE_VALUE (args)
3979 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3985 else if (TREE_CODE (args) != STRING_CST)
3988 /* Handle multiple arguments separated by commas. */
3989 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3991 while (next_optstr && *next_optstr != '\0')
3993 char *p = next_optstr;
3995 char *comma = strchr (next_optstr, ',');
3996 const char *opt_string;
3997 size_t len, opt_len;
4002 enum ix86_opt_type type = ix86_opt_unknown;
4008 len = comma - next_optstr;
4009 next_optstr = comma + 1;
4017 /* Recognize no-xxx. */
4018 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4027 /* Find the option. */
4030 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4032 type = attrs[i].type;
4033 opt_len = attrs[i].len;
4034 if (ch == attrs[i].string[0]
4035 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4036 && memcmp (p, attrs[i].string, opt_len) == 0)
4039 mask = attrs[i].mask;
4040 opt_string = attrs[i].string;
4045 /* Process the option. */
4048 error ("attribute(target(\"%s\")) is unknown", orig_p);
4052 else if (type == ix86_opt_isa)
4053 ix86_handle_option (opt, p, opt_set_p);
4055 else if (type == ix86_opt_yes || type == ix86_opt_no)
4057 if (type == ix86_opt_no)
4058 opt_set_p = !opt_set_p;
4061 target_flags |= mask;
4063 target_flags &= ~mask;
4066 else if (type == ix86_opt_str)
4070 error ("option(\"%s\") was already specified", opt_string);
4074 p_strings[opt] = xstrdup (p + opt_len);
4084 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4087 ix86_valid_target_attribute_tree (tree args)
4089 const char *orig_arch_string = ix86_arch_string;
4090 const char *orig_tune_string = ix86_tune_string;
4091 const char *orig_fpmath_string = ix86_fpmath_string;
4092 int orig_tune_defaulted = ix86_tune_defaulted;
4093 int orig_arch_specified = ix86_arch_specified;
4094 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4097 struct cl_target_option *def
4098 = TREE_TARGET_OPTION (target_option_default_node);
4100 /* Process each of the options on the chain. */
4101 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4104 /* If the changed options are different from the default, rerun override_options,
4105 and then save the options away. The string options are are attribute options,
4106 and will be undone when we copy the save structure. */
4107 if (ix86_isa_flags != def->ix86_isa_flags
4108 || target_flags != def->target_flags
4109 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4110 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4111 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4113 /* If we are using the default tune= or arch=, undo the string assigned,
4114 and use the default. */
4115 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4116 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4117 else if (!orig_arch_specified)
4118 ix86_arch_string = NULL;
4120 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4121 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4122 else if (orig_tune_defaulted)
4123 ix86_tune_string = NULL;
4125 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4126 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4127 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4128 else if (!TARGET_64BIT && TARGET_SSE)
4129 ix86_fpmath_string = "sse,387";
4131 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4132 override_options (false);
4134 /* Add any builtin functions with the new isa if any. */
4135 ix86_add_new_builtins (ix86_isa_flags);
4137 /* Save the current options unless we are validating options for
4139 t = build_target_option_node ();
4141 ix86_arch_string = orig_arch_string;
4142 ix86_tune_string = orig_tune_string;
4143 ix86_fpmath_string = orig_fpmath_string;
4145 /* Free up memory allocated to hold the strings */
4146 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4147 if (option_strings[i])
4148 free (option_strings[i]);
4154 /* Hook to validate attribute((target("string"))). */
4157 ix86_valid_target_attribute_p (tree fndecl,
4158 tree ARG_UNUSED (name),
4160 int ARG_UNUSED (flags))
4162 struct cl_target_option cur_target;
4164 tree old_optimize = build_optimization_node ();
4165 tree new_target, new_optimize;
4166 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4168 /* If the function changed the optimization levels as well as setting target
4169 options, start with the optimizations specified. */
4170 if (func_optimize && func_optimize != old_optimize)
4171 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
4173 /* The target attributes may also change some optimization flags, so update
4174 the optimization options if necessary. */
4175 cl_target_option_save (&cur_target);
4176 new_target = ix86_valid_target_attribute_tree (args);
4177 new_optimize = build_optimization_node ();
4184 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4186 if (old_optimize != new_optimize)
4187 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4190 cl_target_option_restore (&cur_target);
4192 if (old_optimize != new_optimize)
4193 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
4199 /* Hook to determine if one function can safely inline another. */
4202 ix86_can_inline_p (tree caller, tree callee)
4205 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4206 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4208 /* If callee has no option attributes, then it is ok to inline. */
4212 /* If caller has no option attributes, but callee does then it is not ok to
4214 else if (!caller_tree)
4219 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4220 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4222 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4223 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4225 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
4226 != callee_opts->ix86_isa_flags)
4229 /* See if we have the same non-isa options. */
4230 else if (caller_opts->target_flags != callee_opts->target_flags)
4233 /* See if arch, tune, etc. are the same. */
4234 else if (caller_opts->arch != callee_opts->arch)
4237 else if (caller_opts->tune != callee_opts->tune)
4240 else if (caller_opts->fpmath != callee_opts->fpmath)
4243 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4254 /* Remember the last target of ix86_set_current_function. */
4255 static GTY(()) tree ix86_previous_fndecl;
4257 /* Establish appropriate back-end context for processing the function
4258 FNDECL. The argument might be NULL to indicate processing at top
4259 level, outside of any function scope. */
4261 ix86_set_current_function (tree fndecl)
4263 /* Only change the context if the function changes. This hook is called
4264 several times in the course of compiling a function, and we don't want to
4265 slow things down too much or call target_reinit when it isn't safe. */
4266 if (fndecl && fndecl != ix86_previous_fndecl)
4268 tree old_tree = (ix86_previous_fndecl
4269 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4272 tree new_tree = (fndecl
4273 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4276 ix86_previous_fndecl = fndecl;
4277 if (old_tree == new_tree)
4282 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4288 struct cl_target_option *def
4289 = TREE_TARGET_OPTION (target_option_current_node);
4291 cl_target_option_restore (def);
4298 /* Return true if this goes in large data/bss. */
4301 ix86_in_large_data_p (tree exp)
4303 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4306 /* Functions are never large data. */
4307 if (TREE_CODE (exp) == FUNCTION_DECL)
4310 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4312 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4313 if (strcmp (section, ".ldata") == 0
4314 || strcmp (section, ".lbss") == 0)
4320 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4322 /* If this is an incomplete type with size 0, then we can't put it
4323 in data because it might be too big when completed. */
4324 if (!size || size > ix86_section_threshold)
4331 /* Switch to the appropriate section for output of DECL.
4332 DECL is either a `VAR_DECL' node or a constant of some sort.
4333 RELOC indicates whether forming the initial value of DECL requires
4334 link-time relocations. */
4336 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4340 x86_64_elf_select_section (tree decl, int reloc,
4341 unsigned HOST_WIDE_INT align)
4343 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4344 && ix86_in_large_data_p (decl))
4346 const char *sname = NULL;
4347 unsigned int flags = SECTION_WRITE;
4348 switch (categorize_decl_for_section (decl, reloc))
4353 case SECCAT_DATA_REL:
4354 sname = ".ldata.rel";
4356 case SECCAT_DATA_REL_LOCAL:
4357 sname = ".ldata.rel.local";
4359 case SECCAT_DATA_REL_RO:
4360 sname = ".ldata.rel.ro";
4362 case SECCAT_DATA_REL_RO_LOCAL:
4363 sname = ".ldata.rel.ro.local";
4367 flags |= SECTION_BSS;
4370 case SECCAT_RODATA_MERGE_STR:
4371 case SECCAT_RODATA_MERGE_STR_INIT:
4372 case SECCAT_RODATA_MERGE_CONST:
4376 case SECCAT_SRODATA:
4383 /* We don't split these for medium model. Place them into
4384 default sections and hope for best. */
4389 /* We might get called with string constants, but get_named_section
4390 doesn't like them as they are not DECLs. Also, we need to set
4391 flags in that case. */
4393 return get_section (sname, flags, NULL);
4394 return get_named_section (decl, sname, reloc);
4397 return default_elf_select_section (decl, reloc, align);
4400 /* Build up a unique section name, expressed as a
4401 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4402 RELOC indicates whether the initial value of EXP requires
4403 link-time relocations. */
4405 static void ATTRIBUTE_UNUSED
4406 x86_64_elf_unique_section (tree decl, int reloc)
4408 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4409 && ix86_in_large_data_p (decl))
4411 const char *prefix = NULL;
4412 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4413 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4415 switch (categorize_decl_for_section (decl, reloc))
4418 case SECCAT_DATA_REL:
4419 case SECCAT_DATA_REL_LOCAL:
4420 case SECCAT_DATA_REL_RO:
4421 case SECCAT_DATA_REL_RO_LOCAL:
4422 prefix = one_only ? ".ld" : ".ldata";
4425 prefix = one_only ? ".lb" : ".lbss";
4428 case SECCAT_RODATA_MERGE_STR:
4429 case SECCAT_RODATA_MERGE_STR_INIT:
4430 case SECCAT_RODATA_MERGE_CONST:
4431 prefix = one_only ? ".lr" : ".lrodata";
4433 case SECCAT_SRODATA:
4440 /* We don't split these for medium model. Place them into
4441 default sections and hope for best. */
4446 const char *name, *linkonce;
4449 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4450 name = targetm.strip_name_encoding (name);
4452 /* If we're using one_only, then there needs to be a .gnu.linkonce
4453 prefix to the section name. */
4454 linkonce = one_only ? ".gnu.linkonce" : "";
4456 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4458 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4462 default_unique_section (decl, reloc);
4465 #ifdef COMMON_ASM_OP
4466 /* This says how to output assembler code to declare an
4467 uninitialized external linkage data object.
4469 For medium model x86-64 we need to use .largecomm opcode for
4472 x86_elf_aligned_common (FILE *file,
4473 const char *name, unsigned HOST_WIDE_INT size,
4476 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4477 && size > (unsigned int)ix86_section_threshold)
4478 fputs (".largecomm\t", file);
4480 fputs (COMMON_ASM_OP, file);
4481 assemble_name (file, name);
4482 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4483 size, align / BITS_PER_UNIT);
4487 /* Utility function for targets to use in implementing
4488 ASM_OUTPUT_ALIGNED_BSS. */
4491 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4492 const char *name, unsigned HOST_WIDE_INT size,
4495 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4496 && size > (unsigned int)ix86_section_threshold)
4497 switch_to_section (get_named_section (decl, ".lbss", 0));
4499 switch_to_section (bss_section);
4500 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4501 #ifdef ASM_DECLARE_OBJECT_NAME
4502 last_assemble_variable_decl = decl;
4503 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4505 /* Standard thing is just output label for the object. */
4506 ASM_OUTPUT_LABEL (file, name);
4507 #endif /* ASM_DECLARE_OBJECT_NAME */
4508 ASM_OUTPUT_SKIP (file, size ? size : 1);
4512 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4514 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4515 make the problem with not enough registers even worse. */
4516 #ifdef INSN_SCHEDULING
4518 flag_schedule_insns = 0;
4522 /* The Darwin libraries never set errno, so we might as well
4523 avoid calling them when that's the only reason we would. */
4524 flag_errno_math = 0;
4526 /* The default values of these switches depend on the TARGET_64BIT
4527 that is not known at this moment. Mark these values with 2 and
4528 let user the to override these. In case there is no command line option
4529 specifying them, we will set the defaults in override_options. */
4531 flag_omit_frame_pointer = 2;
4533 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4537 flag_pcc_struct_return = 2;
4538 flag_asynchronous_unwind_tables = 2;
4539 flag_vect_cost_model = 1;
4540 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4541 SUBTARGET_OPTIMIZATION_OPTIONS;
4545 /* Decide whether we must probe the stack before any space allocation
4546 on this target. It's essentially TARGET_STACK_PROBE except when
4547 -fstack-check causes the stack to be already probed differently. */
4550 ix86_target_stack_probe (void)
4552 /* Do not probe the stack twice if static stack checking is enabled. */
4553 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4556 return TARGET_STACK_PROBE;
4559 /* Decide whether we can make a sibling call to a function. DECL is the
4560 declaration of the function being targeted by the call and EXP is the
4561 CALL_EXPR representing the call. */
4564 ix86_function_ok_for_sibcall (tree decl, tree exp)
4566 tree type, decl_or_type;
4569 /* If we are generating position-independent code, we cannot sibcall
4570 optimize any indirect call, or a direct call to a global function,
4571 as the PLT requires %ebx be live. */
4572 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4575 /* If we need to align the outgoing stack, then sibcalling would
4576 unalign the stack, which may break the called function. */
4577 if (ix86_minimum_incoming_stack_boundary (true)
4578 < PREFERRED_STACK_BOUNDARY)
4583 decl_or_type = decl;
4584 type = TREE_TYPE (decl);
4588 /* We're looking at the CALL_EXPR, we need the type of the function. */
4589 type = CALL_EXPR_FN (exp); /* pointer expression */
4590 type = TREE_TYPE (type); /* pointer type */
4591 type = TREE_TYPE (type); /* function type */
4592 decl_or_type = type;
4595 /* Check that the return value locations are the same. Like
4596 if we are returning floats on the 80387 register stack, we cannot
4597 make a sibcall from a function that doesn't return a float to a
4598 function that does or, conversely, from a function that does return
4599 a float to a function that doesn't; the necessary stack adjustment
4600 would not be executed. This is also the place we notice
4601 differences in the return value ABI. Note that it is ok for one
4602 of the functions to have void return type as long as the return
4603 value of the other is passed in a register. */
4604 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4605 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4607 if (STACK_REG_P (a) || STACK_REG_P (b))
4609 if (!rtx_equal_p (a, b))
4612 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4614 else if (!rtx_equal_p (a, b))
4619 /* The SYSV ABI has more call-clobbered registers;
4620 disallow sibcalls from MS to SYSV. */
4621 if (cfun->machine->call_abi == MS_ABI
4622 && ix86_function_type_abi (type) == SYSV_ABI)
4627 /* If this call is indirect, we'll need to be able to use a
4628 call-clobbered register for the address of the target function.
4629 Make sure that all such registers are not used for passing
4630 parameters. Note that DLLIMPORT functions are indirect. */
4632 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4634 if (ix86_function_regparm (type, NULL) >= 3)
4636 /* ??? Need to count the actual number of registers to be used,
4637 not the possible number of registers. Fix later. */
4643 /* Otherwise okay. That also includes certain types of indirect calls. */
4647 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4648 and "sseregparm" calling convention attributes;
4649 arguments as in struct attribute_spec.handler. */
4652 ix86_handle_cconv_attribute (tree *node, tree name,
4654 int flags ATTRIBUTE_UNUSED,
4657 if (TREE_CODE (*node) != FUNCTION_TYPE
4658 && TREE_CODE (*node) != METHOD_TYPE
4659 && TREE_CODE (*node) != FIELD_DECL
4660 && TREE_CODE (*node) != TYPE_DECL)
4662 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4664 *no_add_attrs = true;
4668 /* Can combine regparm with all attributes but fastcall. */
4669 if (is_attribute_p ("regparm", name))
4673 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4675 error ("fastcall and regparm attributes are not compatible");
4678 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4680 error ("regparam and thiscall attributes are not compatible");
4683 cst = TREE_VALUE (args);
4684 if (TREE_CODE (cst) != INTEGER_CST)
4686 warning (OPT_Wattributes,
4687 "%qE attribute requires an integer constant argument",
4689 *no_add_attrs = true;
4691 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4693 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4695 *no_add_attrs = true;
4703 /* Do not warn when emulating the MS ABI. */
4704 if ((TREE_CODE (*node) != FUNCTION_TYPE
4705 && TREE_CODE (*node) != METHOD_TYPE)
4706 || ix86_function_type_abi (*node) != MS_ABI)
4707 warning (OPT_Wattributes, "%qE attribute ignored",
4709 *no_add_attrs = true;
4713 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4714 if (is_attribute_p ("fastcall", name))
4716 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4718 error ("fastcall and cdecl attributes are not compatible");
4720 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4722 error ("fastcall and stdcall attributes are not compatible");
4724 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4726 error ("fastcall and regparm attributes are not compatible");
4728 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4730 error ("fastcall and thiscall attributes are not compatible");
4734 /* Can combine stdcall with fastcall (redundant), regparm and
4736 else if (is_attribute_p ("stdcall", name))
4738 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4740 error ("stdcall and cdecl attributes are not compatible");
4742 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4744 error ("stdcall and fastcall attributes are not compatible");
4746 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4748 error ("stdcall and thiscall attributes are not compatible");
4752 /* Can combine cdecl with regparm and sseregparm. */
4753 else if (is_attribute_p ("cdecl", name))
4755 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4757 error ("stdcall and cdecl attributes are not compatible");
4759 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4761 error ("fastcall and cdecl attributes are not compatible");
4763 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4765 error ("cdecl and thiscall attributes are not compatible");
4768 else if (is_attribute_p ("thiscall", name))
4770 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4771 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4773 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4775 error ("stdcall and thiscall attributes are not compatible");
4777 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4779 error ("fastcall and thiscall attributes are not compatible");
4781 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4783 error ("cdecl and thiscall attributes are not compatible");
4787 /* Can combine sseregparm with all attributes. */
4792 /* Return 0 if the attributes for two types are incompatible, 1 if they
4793 are compatible, and 2 if they are nearly compatible (which causes a
4794 warning to be generated). */
4797 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4799 /* Check for mismatch of non-default calling convention. */
4800 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4802 if (TREE_CODE (type1) != FUNCTION_TYPE
4803 && TREE_CODE (type1) != METHOD_TYPE)
4806 /* Check for mismatched fastcall/regparm types. */
4807 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4808 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4809 || (ix86_function_regparm (type1, NULL)
4810 != ix86_function_regparm (type2, NULL)))
4813 /* Check for mismatched sseregparm types. */
4814 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4815 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4818 /* Check for mismatched thiscall types. */
4819 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4820 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4823 /* Check for mismatched return types (cdecl vs stdcall). */
4824 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4825 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4831 /* Return the regparm value for a function with the indicated TYPE and DECL.
4832 DECL may be NULL when calling function indirectly
4833 or considering a libcall. */
4836 ix86_function_regparm (const_tree type, const_tree decl)
4842 return (ix86_function_type_abi (type) == SYSV_ABI
4843 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4845 regparm = ix86_regparm;
4846 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4849 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4853 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4856 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4859 /* Use register calling convention for local functions when possible. */
4861 && TREE_CODE (decl) == FUNCTION_DECL
4863 && !(profile_flag && !flag_fentry))
4865 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4866 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4869 int local_regparm, globals = 0, regno;
4871 /* Make sure no regparm register is taken by a
4872 fixed register variable. */
4873 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4874 if (fixed_regs[local_regparm])
4877 /* We don't want to use regparm(3) for nested functions as
4878 these use a static chain pointer in the third argument. */
4879 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4882 /* Each fixed register usage increases register pressure,
4883 so less registers should be used for argument passing.
4884 This functionality can be overriden by an explicit
4886 for (regno = 0; regno <= DI_REG; regno++)
4887 if (fixed_regs[regno])
4891 = globals < local_regparm ? local_regparm - globals : 0;
4893 if (local_regparm > regparm)
4894 regparm = local_regparm;
4901 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4902 DFmode (2) arguments in SSE registers for a function with the
4903 indicated TYPE and DECL. DECL may be NULL when calling function
4904 indirectly or considering a libcall. Otherwise return 0. */
4907 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4909 gcc_assert (!TARGET_64BIT);
4911 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4912 by the sseregparm attribute. */
4913 if (TARGET_SSEREGPARM
4914 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4921 error ("Calling %qD with attribute sseregparm without "
4922 "SSE/SSE2 enabled", decl);
4924 error ("Calling %qT with attribute sseregparm without "
4925 "SSE/SSE2 enabled", type);
4933 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4934 (and DFmode for SSE2) arguments in SSE registers. */
4935 if (decl && TARGET_SSE_MATH && optimize
4936 && !(profile_flag && !flag_fentry))
4938 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4939 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4941 return TARGET_SSE2 ? 2 : 1;
4947 /* Return true if EAX is live at the start of the function. Used by
4948 ix86_expand_prologue to determine if we need special help before
4949 calling allocate_stack_worker. */
4952 ix86_eax_live_at_start_p (void)
4954 /* Cheat. Don't bother working forward from ix86_function_regparm
4955 to the function type to whether an actual argument is located in
4956 eax. Instead just look at cfg info, which is still close enough
4957 to correct at this point. This gives false positives for broken
4958 functions that might use uninitialized data that happens to be
4959 allocated in eax, but who cares? */
4960 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4963 /* Value is the number of bytes of arguments automatically
4964 popped when returning from a subroutine call.
4965 FUNDECL is the declaration node of the function (as a tree),
4966 FUNTYPE is the data type of the function (as a tree),
4967 or for a library call it is an identifier node for the subroutine name.
4968 SIZE is the number of bytes of arguments passed on the stack.
4970 On the 80386, the RTD insn may be used to pop them if the number
4971 of args is fixed, but if the number is variable then the caller
4972 must pop them all. RTD can't be used for library calls now
4973 because the library is compiled with the Unix compiler.
4974 Use of RTD is a selectable option, since it is incompatible with
4975 standard Unix calling sequences. If the option is not selected,
4976 the caller must always pop the args.
4978 The attribute stdcall is equivalent to RTD on a per module basis. */
4981 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4985 /* None of the 64-bit ABIs pop arguments. */
4989 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4991 /* Cdecl functions override -mrtd, and never pop the stack. */
4992 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4994 /* Stdcall and fastcall functions will pop the stack if not
4996 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4997 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
4998 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5001 if (rtd && ! stdarg_p (funtype))
5005 /* Lose any fake structure return argument if it is passed on the stack. */
5006 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5007 && !KEEP_AGGREGATE_RETURN_POINTER)
5009 int nregs = ix86_function_regparm (funtype, fundecl);
5011 return GET_MODE_SIZE (Pmode);
5017 /* Argument support functions. */
5019 /* Return true when register may be used to pass function parameters. */
5021 ix86_function_arg_regno_p (int regno)
5024 const int *parm_regs;
5029 return (regno < REGPARM_MAX
5030 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5032 return (regno < REGPARM_MAX
5033 || (TARGET_MMX && MMX_REGNO_P (regno)
5034 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5035 || (TARGET_SSE && SSE_REGNO_P (regno)
5036 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5041 if (SSE_REGNO_P (regno) && TARGET_SSE)
5046 if (TARGET_SSE && SSE_REGNO_P (regno)
5047 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5051 /* TODO: The function should depend on current function ABI but
5052 builtins.c would need updating then. Therefore we use the
5055 /* RAX is used as hidden argument to va_arg functions. */
5056 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5059 if (ix86_abi == MS_ABI)
5060 parm_regs = x86_64_ms_abi_int_parameter_registers;
5062 parm_regs = x86_64_int_parameter_registers;
5063 for (i = 0; i < (ix86_abi == MS_ABI
5064 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5065 if (regno == parm_regs[i])
5070 /* Return if we do not know how to pass TYPE solely in registers. */
5073 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5075 if (must_pass_in_stack_var_size_or_pad (mode, type))
5078 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5079 The layout_type routine is crafty and tries to trick us into passing
5080 currently unsupported vector types on the stack by using TImode. */
5081 return (!TARGET_64BIT && mode == TImode
5082 && type && TREE_CODE (type) != VECTOR_TYPE);
5085 /* It returns the size, in bytes, of the area reserved for arguments passed
5086 in registers for the function represented by fndecl dependent to the used
5089 ix86_reg_parm_stack_space (const_tree fndecl)
5091 enum calling_abi call_abi = SYSV_ABI;
5092 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5093 call_abi = ix86_function_abi (fndecl);
5095 call_abi = ix86_function_type_abi (fndecl);
5096 if (call_abi == MS_ABI)
5101 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5104 ix86_function_type_abi (const_tree fntype)
5106 if (TARGET_64BIT && fntype != NULL)
5108 enum calling_abi abi = ix86_abi;
5109 if (abi == SYSV_ABI)
5111 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5114 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5122 ix86_function_ms_hook_prologue (const_tree fn)
5124 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5126 if (decl_function_context (fn) != NULL_TREE)
5127 error_at (DECL_SOURCE_LOCATION (fn),
5128 "ms_hook_prologue is not compatible with nested function");
5135 static enum calling_abi
5136 ix86_function_abi (const_tree fndecl)
5140 return ix86_function_type_abi (TREE_TYPE (fndecl));
5143 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5146 ix86_cfun_abi (void)
5148 if (! cfun || ! TARGET_64BIT)
5150 return cfun->machine->call_abi;
5153 /* Write the extra assembler code needed to declare a function properly. */
5156 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5159 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5163 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5164 unsigned int filler_cc = 0xcccccccc;
5166 for (i = 0; i < filler_count; i += 4)
5167 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5170 ASM_OUTPUT_LABEL (asm_out_file, fname);
5172 /* Output magic byte marker, if hot-patch attribute is set. */
5177 /* leaq [%rsp + 0], %rsp */
5178 asm_fprintf (asm_out_file, ASM_BYTE
5179 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5183 /* movl.s %edi, %edi
5185 movl.s %esp, %ebp */
5186 asm_fprintf (asm_out_file, ASM_BYTE
5187 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5193 extern void init_regs (void);
5195 /* Implementation of call abi switching target hook. Specific to FNDECL
5196 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5197 for more details. */
5199 ix86_call_abi_override (const_tree fndecl)
5201 if (fndecl == NULL_TREE)
5202 cfun->machine->call_abi = ix86_abi;
5204 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5207 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5208 re-initialization of init_regs each time we switch function context since
5209 this is needed only during RTL expansion. */
5211 ix86_maybe_switch_abi (void)
5214 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5218 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5219 for a call to a function whose data type is FNTYPE.
5220 For a library call, FNTYPE is 0. */
5223 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5224 tree fntype, /* tree ptr for function decl */
5225 rtx libname, /* SYMBOL_REF of library name or 0 */
5228 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5229 memset (cum, 0, sizeof (*cum));
5232 cum->call_abi = ix86_function_abi (fndecl);
5234 cum->call_abi = ix86_function_type_abi (fntype);
5235 /* Set up the number of registers to use for passing arguments. */
5237 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5238 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5239 "or subtarget optimization implying it");
5240 cum->nregs = ix86_regparm;
5243 cum->nregs = (cum->call_abi == SYSV_ABI
5244 ? X86_64_REGPARM_MAX
5245 : X86_64_MS_REGPARM_MAX);
5249 cum->sse_nregs = SSE_REGPARM_MAX;
5252 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5253 ? X86_64_SSE_REGPARM_MAX
5254 : X86_64_MS_SSE_REGPARM_MAX);
5258 cum->mmx_nregs = MMX_REGPARM_MAX;
5259 cum->warn_avx = true;
5260 cum->warn_sse = true;
5261 cum->warn_mmx = true;
5263 /* Because type might mismatch in between caller and callee, we need to
5264 use actual type of function for local calls.
5265 FIXME: cgraph_analyze can be told to actually record if function uses
5266 va_start so for local functions maybe_vaarg can be made aggressive
5268 FIXME: once typesytem is fixed, we won't need this code anymore. */
5270 fntype = TREE_TYPE (fndecl);
5271 cum->maybe_vaarg = (fntype
5272 ? (!prototype_p (fntype) || stdarg_p (fntype))
5277 /* If there are variable arguments, then we won't pass anything
5278 in registers in 32-bit mode. */
5279 if (stdarg_p (fntype))
5290 /* Use ecx and edx registers if function has fastcall attribute,
5291 else look for regparm information. */
5294 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5297 cum->fastcall = 1; /* Same first register as in fastcall. */
5299 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5305 cum->nregs = ix86_function_regparm (fntype, fndecl);
5308 /* Set up the number of SSE registers used for passing SFmode
5309 and DFmode arguments. Warn for mismatching ABI. */
5310 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5314 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5315 But in the case of vector types, it is some vector mode.
5317 When we have only some of our vector isa extensions enabled, then there
5318 are some modes for which vector_mode_supported_p is false. For these
5319 modes, the generic vector support in gcc will choose some non-vector mode
5320 in order to implement the type. By computing the natural mode, we'll
5321 select the proper ABI location for the operand and not depend on whatever
5322 the middle-end decides to do with these vector types.
5324 The midde-end can't deal with the vector types > 16 bytes. In this
5325 case, we return the original mode and warn ABI change if CUM isn't
5328 static enum machine_mode
5329 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5331 enum machine_mode mode = TYPE_MODE (type);
5333 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5335 HOST_WIDE_INT size = int_size_in_bytes (type);
5336 if ((size == 8 || size == 16 || size == 32)
5337 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5338 && TYPE_VECTOR_SUBPARTS (type) > 1)
5340 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5342 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5343 mode = MIN_MODE_VECTOR_FLOAT;
5345 mode = MIN_MODE_VECTOR_INT;
5347 /* Get the mode which has this inner mode and number of units. */
5348 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5349 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5350 && GET_MODE_INNER (mode) == innermode)
5352 if (size == 32 && !TARGET_AVX)
5354 static bool warnedavx;
5361 warning (0, "AVX vector argument without AVX "
5362 "enabled changes the ABI");
5364 return TYPE_MODE (type);
5377 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5378 this may not agree with the mode that the type system has chosen for the
5379 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5380 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5383 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5388 if (orig_mode != BLKmode)
5389 tmp = gen_rtx_REG (orig_mode, regno);
5392 tmp = gen_rtx_REG (mode, regno);
5393 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5394 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5400 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5401 of this code is to classify each 8bytes of incoming argument by the register
5402 class and assign registers accordingly. */
5404 /* Return the union class of CLASS1 and CLASS2.
5405 See the x86-64 PS ABI for details. */
5407 static enum x86_64_reg_class
5408 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5410 /* Rule #1: If both classes are equal, this is the resulting class. */
5411 if (class1 == class2)
5414 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5416 if (class1 == X86_64_NO_CLASS)
5418 if (class2 == X86_64_NO_CLASS)
5421 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5422 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5423 return X86_64_MEMORY_CLASS;
5425 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5426 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5427 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5428 return X86_64_INTEGERSI_CLASS;
5429 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5430 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5431 return X86_64_INTEGER_CLASS;
5433 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5435 if (class1 == X86_64_X87_CLASS
5436 || class1 == X86_64_X87UP_CLASS
5437 || class1 == X86_64_COMPLEX_X87_CLASS
5438 || class2 == X86_64_X87_CLASS
5439 || class2 == X86_64_X87UP_CLASS
5440 || class2 == X86_64_COMPLEX_X87_CLASS)
5441 return X86_64_MEMORY_CLASS;
5443 /* Rule #6: Otherwise class SSE is used. */
5444 return X86_64_SSE_CLASS;
5447 /* Classify the argument of type TYPE and mode MODE.
5448 CLASSES will be filled by the register class used to pass each word
5449 of the operand. The number of words is returned. In case the parameter
5450 should be passed in memory, 0 is returned. As a special case for zero
5451 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5453 BIT_OFFSET is used internally for handling records and specifies offset
5454 of the offset in bits modulo 256 to avoid overflow cases.
5456 See the x86-64 PS ABI for details.
5460 classify_argument (enum machine_mode mode, const_tree type,
5461 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5463 HOST_WIDE_INT bytes =
5464 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5465 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5467 /* Variable sized entities are always passed/returned in memory. */
5471 if (mode != VOIDmode
5472 && targetm.calls.must_pass_in_stack (mode, type))
5475 if (type && AGGREGATE_TYPE_P (type))
5479 enum x86_64_reg_class subclasses[MAX_CLASSES];
5481 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5485 for (i = 0; i < words; i++)
5486 classes[i] = X86_64_NO_CLASS;
5488 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5489 signalize memory class, so handle it as special case. */
5492 classes[0] = X86_64_NO_CLASS;
5496 /* Classify each field of record and merge classes. */
5497 switch (TREE_CODE (type))
5500 /* And now merge the fields of structure. */
5501 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5503 if (TREE_CODE (field) == FIELD_DECL)
5507 if (TREE_TYPE (field) == error_mark_node)
5510 /* Bitfields are always classified as integer. Handle them
5511 early, since later code would consider them to be
5512 misaligned integers. */
5513 if (DECL_BIT_FIELD (field))
5515 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5516 i < ((int_bit_position (field) + (bit_offset % 64))
5517 + tree_low_cst (DECL_SIZE (field), 0)
5520 merge_classes (X86_64_INTEGER_CLASS,
5527 type = TREE_TYPE (field);
5529 /* Flexible array member is ignored. */
5530 if (TYPE_MODE (type) == BLKmode
5531 && TREE_CODE (type) == ARRAY_TYPE
5532 && TYPE_SIZE (type) == NULL_TREE
5533 && TYPE_DOMAIN (type) != NULL_TREE
5534 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5539 if (!warned && warn_psabi)
5542 inform (input_location,
5543 "The ABI of passing struct with"
5544 " a flexible array member has"
5545 " changed in GCC 4.4");
5549 num = classify_argument (TYPE_MODE (type), type,
5551 (int_bit_position (field)
5552 + bit_offset) % 256);
5555 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5556 for (i = 0; i < num && (i + pos) < words; i++)
5558 merge_classes (subclasses[i], classes[i + pos]);
5565 /* Arrays are handled as small records. */
5568 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5569 TREE_TYPE (type), subclasses, bit_offset);
5573 /* The partial classes are now full classes. */
5574 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5575 subclasses[0] = X86_64_SSE_CLASS;
5576 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5577 && !((bit_offset % 64) == 0 && bytes == 4))
5578 subclasses[0] = X86_64_INTEGER_CLASS;
5580 for (i = 0; i < words; i++)
5581 classes[i] = subclasses[i % num];
5586 case QUAL_UNION_TYPE:
5587 /* Unions are similar to RECORD_TYPE but offset is always 0.
5589 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5591 if (TREE_CODE (field) == FIELD_DECL)
5595 if (TREE_TYPE (field) == error_mark_node)
5598 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5599 TREE_TYPE (field), subclasses,
5603 for (i = 0; i < num; i++)
5604 classes[i] = merge_classes (subclasses[i], classes[i]);
5615 /* When size > 16 bytes, if the first one isn't
5616 X86_64_SSE_CLASS or any other ones aren't
5617 X86_64_SSEUP_CLASS, everything should be passed in
5619 if (classes[0] != X86_64_SSE_CLASS)
5622 for (i = 1; i < words; i++)
5623 if (classes[i] != X86_64_SSEUP_CLASS)
5627 /* Final merger cleanup. */
5628 for (i = 0; i < words; i++)
5630 /* If one class is MEMORY, everything should be passed in
5632 if (classes[i] == X86_64_MEMORY_CLASS)
5635 /* The X86_64_SSEUP_CLASS should be always preceded by
5636 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5637 if (classes[i] == X86_64_SSEUP_CLASS
5638 && classes[i - 1] != X86_64_SSE_CLASS
5639 && classes[i - 1] != X86_64_SSEUP_CLASS)
5641 /* The first one should never be X86_64_SSEUP_CLASS. */
5642 gcc_assert (i != 0);
5643 classes[i] = X86_64_SSE_CLASS;
5646 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5647 everything should be passed in memory. */
5648 if (classes[i] == X86_64_X87UP_CLASS
5649 && (classes[i - 1] != X86_64_X87_CLASS))
5653 /* The first one should never be X86_64_X87UP_CLASS. */
5654 gcc_assert (i != 0);
5655 if (!warned && warn_psabi)
5658 inform (input_location,
5659 "The ABI of passing union with long double"
5660 " has changed in GCC 4.4");
5668 /* Compute alignment needed. We align all types to natural boundaries with
5669 exception of XFmode that is aligned to 64bits. */
5670 if (mode != VOIDmode && mode != BLKmode)
5672 int mode_alignment = GET_MODE_BITSIZE (mode);
5675 mode_alignment = 128;
5676 else if (mode == XCmode)
5677 mode_alignment = 256;
5678 if (COMPLEX_MODE_P (mode))
5679 mode_alignment /= 2;
5680 /* Misaligned fields are always returned in memory. */
5681 if (bit_offset % mode_alignment)
5685 /* for V1xx modes, just use the base mode */
5686 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5687 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5688 mode = GET_MODE_INNER (mode);
5690 /* Classification of atomic types. */
5695 classes[0] = X86_64_SSE_CLASS;
5698 classes[0] = X86_64_SSE_CLASS;
5699 classes[1] = X86_64_SSEUP_CLASS;
5709 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5713 classes[0] = X86_64_INTEGERSI_CLASS;
5716 else if (size <= 64)
5718 classes[0] = X86_64_INTEGER_CLASS;
5721 else if (size <= 64+32)
5723 classes[0] = X86_64_INTEGER_CLASS;
5724 classes[1] = X86_64_INTEGERSI_CLASS;
5727 else if (size <= 64+64)
5729 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5737 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5741 /* OImode shouldn't be used directly. */
5746 if (!(bit_offset % 64))
5747 classes[0] = X86_64_SSESF_CLASS;
5749 classes[0] = X86_64_SSE_CLASS;
5752 classes[0] = X86_64_SSEDF_CLASS;
5755 classes[0] = X86_64_X87_CLASS;
5756 classes[1] = X86_64_X87UP_CLASS;
5759 classes[0] = X86_64_SSE_CLASS;
5760 classes[1] = X86_64_SSEUP_CLASS;
5763 classes[0] = X86_64_SSE_CLASS;
5764 if (!(bit_offset % 64))
5770 if (!warned && warn_psabi)
5773 inform (input_location,
5774 "The ABI of passing structure with complex float"
5775 " member has changed in GCC 4.4");
5777 classes[1] = X86_64_SSESF_CLASS;
5781 classes[0] = X86_64_SSEDF_CLASS;
5782 classes[1] = X86_64_SSEDF_CLASS;
5785 classes[0] = X86_64_COMPLEX_X87_CLASS;
5788 /* This modes is larger than 16 bytes. */
5796 classes[0] = X86_64_SSE_CLASS;
5797 classes[1] = X86_64_SSEUP_CLASS;
5798 classes[2] = X86_64_SSEUP_CLASS;
5799 classes[3] = X86_64_SSEUP_CLASS;
5807 classes[0] = X86_64_SSE_CLASS;
5808 classes[1] = X86_64_SSEUP_CLASS;
5816 classes[0] = X86_64_SSE_CLASS;
5822 gcc_assert (VECTOR_MODE_P (mode));
5827 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5829 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5830 classes[0] = X86_64_INTEGERSI_CLASS;
5832 classes[0] = X86_64_INTEGER_CLASS;
5833 classes[1] = X86_64_INTEGER_CLASS;
5834 return 1 + (bytes > 8);
5838 /* Examine the argument and return set number of register required in each
5839 class. Return 0 iff parameter should be passed in memory. */
5841 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5842 int *int_nregs, int *sse_nregs)
5844 enum x86_64_reg_class regclass[MAX_CLASSES];
5845 int n = classify_argument (mode, type, regclass, 0);
5851 for (n--; n >= 0; n--)
5852 switch (regclass[n])
5854 case X86_64_INTEGER_CLASS:
5855 case X86_64_INTEGERSI_CLASS:
5858 case X86_64_SSE_CLASS:
5859 case X86_64_SSESF_CLASS:
5860 case X86_64_SSEDF_CLASS:
5863 case X86_64_NO_CLASS:
5864 case X86_64_SSEUP_CLASS:
5866 case X86_64_X87_CLASS:
5867 case X86_64_X87UP_CLASS:
5871 case X86_64_COMPLEX_X87_CLASS:
5872 return in_return ? 2 : 0;
5873 case X86_64_MEMORY_CLASS:
5879 /* Construct container for the argument used by GCC interface. See
5880 FUNCTION_ARG for the detailed description. */
5883 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5884 const_tree type, int in_return, int nintregs, int nsseregs,
5885 const int *intreg, int sse_regno)
5887 /* The following variables hold the static issued_error state. */
5888 static bool issued_sse_arg_error;
5889 static bool issued_sse_ret_error;
5890 static bool issued_x87_ret_error;
5892 enum machine_mode tmpmode;
5894 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5895 enum x86_64_reg_class regclass[MAX_CLASSES];
5899 int needed_sseregs, needed_intregs;
5900 rtx exp[MAX_CLASSES];
5903 n = classify_argument (mode, type, regclass, 0);
5906 if (!examine_argument (mode, type, in_return, &needed_intregs,
5909 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5912 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5913 some less clueful developer tries to use floating-point anyway. */
5914 if (needed_sseregs && !TARGET_SSE)
5918 if (!issued_sse_ret_error)
5920 error ("SSE register return with SSE disabled");
5921 issued_sse_ret_error = true;
5924 else if (!issued_sse_arg_error)
5926 error ("SSE register argument with SSE disabled");
5927 issued_sse_arg_error = true;
5932 /* Likewise, error if the ABI requires us to return values in the
5933 x87 registers and the user specified -mno-80387. */
5934 if (!TARGET_80387 && in_return)
5935 for (i = 0; i < n; i++)
5936 if (regclass[i] == X86_64_X87_CLASS
5937 || regclass[i] == X86_64_X87UP_CLASS
5938 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5940 if (!issued_x87_ret_error)
5942 error ("x87 register return with x87 disabled");
5943 issued_x87_ret_error = true;
5948 /* First construct simple cases. Avoid SCmode, since we want to use
5949 single register to pass this type. */
5950 if (n == 1 && mode != SCmode)
5951 switch (regclass[0])
5953 case X86_64_INTEGER_CLASS:
5954 case X86_64_INTEGERSI_CLASS:
5955 return gen_rtx_REG (mode, intreg[0]);
5956 case X86_64_SSE_CLASS:
5957 case X86_64_SSESF_CLASS:
5958 case X86_64_SSEDF_CLASS:
5959 if (mode != BLKmode)
5960 return gen_reg_or_parallel (mode, orig_mode,
5961 SSE_REGNO (sse_regno));
5963 case X86_64_X87_CLASS:
5964 case X86_64_COMPLEX_X87_CLASS:
5965 return gen_rtx_REG (mode, FIRST_STACK_REG);
5966 case X86_64_NO_CLASS:
5967 /* Zero sized array, struct or class. */
5972 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5973 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5974 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5976 && regclass[0] == X86_64_SSE_CLASS
5977 && regclass[1] == X86_64_SSEUP_CLASS
5978 && regclass[2] == X86_64_SSEUP_CLASS
5979 && regclass[3] == X86_64_SSEUP_CLASS
5981 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5984 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5985 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5986 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5987 && regclass[1] == X86_64_INTEGER_CLASS
5988 && (mode == CDImode || mode == TImode || mode == TFmode)
5989 && intreg[0] + 1 == intreg[1])
5990 return gen_rtx_REG (mode, intreg[0]);
5992 /* Otherwise figure out the entries of the PARALLEL. */
5993 for (i = 0; i < n; i++)
5997 switch (regclass[i])
5999 case X86_64_NO_CLASS:
6001 case X86_64_INTEGER_CLASS:
6002 case X86_64_INTEGERSI_CLASS:
6003 /* Merge TImodes on aligned occasions here too. */
6004 if (i * 8 + 8 > bytes)
6005 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6006 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6010 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6011 if (tmpmode == BLKmode)
6013 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6014 gen_rtx_REG (tmpmode, *intreg),
6018 case X86_64_SSESF_CLASS:
6019 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6020 gen_rtx_REG (SFmode,
6021 SSE_REGNO (sse_regno)),
6025 case X86_64_SSEDF_CLASS:
6026 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6027 gen_rtx_REG (DFmode,
6028 SSE_REGNO (sse_regno)),
6032 case X86_64_SSE_CLASS:
6040 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6050 && regclass[1] == X86_64_SSEUP_CLASS
6051 && regclass[2] == X86_64_SSEUP_CLASS
6052 && regclass[3] == X86_64_SSEUP_CLASS);
6059 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6060 gen_rtx_REG (tmpmode,
6061 SSE_REGNO (sse_regno)),
6070 /* Empty aligned struct, union or class. */
6074 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6075 for (i = 0; i < nexps; i++)
6076 XVECEXP (ret, 0, i) = exp [i];
6080 /* Update the data in CUM to advance over an argument of mode MODE
6081 and data type TYPE. (TYPE is null for libcalls where that information
6082 may not be available.) */
6085 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6086 const_tree type, HOST_WIDE_INT bytes,
6087 HOST_WIDE_INT words)
6103 cum->words += words;
6104 cum->nregs -= words;
6105 cum->regno += words;
6107 if (cum->nregs <= 0)
6115 /* OImode shouldn't be used directly. */
6119 if (cum->float_in_sse < 2)
6122 if (cum->float_in_sse < 1)
6139 if (!type || !AGGREGATE_TYPE_P (type))
6141 cum->sse_words += words;
6142 cum->sse_nregs -= 1;
6143 cum->sse_regno += 1;
6144 if (cum->sse_nregs <= 0)
6158 if (!type || !AGGREGATE_TYPE_P (type))
6160 cum->mmx_words += words;
6161 cum->mmx_nregs -= 1;
6162 cum->mmx_regno += 1;
6163 if (cum->mmx_nregs <= 0)
6174 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6175 const_tree type, HOST_WIDE_INT words, bool named)
6177 int int_nregs, sse_nregs;
6179 /* Unnamed 256bit vector mode parameters are passed on stack. */
6180 if (!named && VALID_AVX256_REG_MODE (mode))
6183 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6184 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6186 cum->nregs -= int_nregs;
6187 cum->sse_nregs -= sse_nregs;
6188 cum->regno += int_nregs;
6189 cum->sse_regno += sse_nregs;
6193 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6194 cum->words = (cum->words + align - 1) & ~(align - 1);
6195 cum->words += words;
6200 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6201 HOST_WIDE_INT words)
6203 /* Otherwise, this should be passed indirect. */
6204 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6206 cum->words += words;
6214 /* Update the data in CUM to advance over an argument of mode MODE and
6215 data type TYPE. (TYPE is null for libcalls where that information
6216 may not be available.) */
6219 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6220 const_tree type, bool named)
6222 HOST_WIDE_INT bytes, words;
6224 if (mode == BLKmode)
6225 bytes = int_size_in_bytes (type);
6227 bytes = GET_MODE_SIZE (mode);
6228 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6231 mode = type_natural_mode (type, NULL);
6233 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6234 function_arg_advance_ms_64 (cum, bytes, words);
6235 else if (TARGET_64BIT)
6236 function_arg_advance_64 (cum, mode, type, words, named);
6238 function_arg_advance_32 (cum, mode, type, bytes, words);
6241 /* Define where to put the arguments to a function.
6242 Value is zero to push the argument on the stack,
6243 or a hard register in which to store the argument.
6245 MODE is the argument's machine mode.
6246 TYPE is the data type of the argument (as a tree).
6247 This is null for libcalls where that information may
6249 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6250 the preceding args and about the function being called.
6251 NAMED is nonzero if this argument is a named parameter
6252 (otherwise it is an extra parameter matching an ellipsis). */
6255 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6256 enum machine_mode orig_mode, const_tree type,
6257 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6259 static bool warnedsse, warnedmmx;
6261 /* Avoid the AL settings for the Unix64 ABI. */
6262 if (mode == VOIDmode)
6278 if (words <= cum->nregs)
6280 int regno = cum->regno;
6282 /* Fastcall allocates the first two DWORD (SImode) or
6283 smaller arguments to ECX and EDX if it isn't an
6289 || (type && AGGREGATE_TYPE_P (type)))
6292 /* ECX not EAX is the first allocated register. */
6293 if (regno == AX_REG)
6296 return gen_rtx_REG (mode, regno);
6301 if (cum->float_in_sse < 2)
6304 if (cum->float_in_sse < 1)
6308 /* In 32bit, we pass TImode in xmm registers. */
6315 if (!type || !AGGREGATE_TYPE_P (type))
6317 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6320 warning (0, "SSE vector argument without SSE enabled "
6324 return gen_reg_or_parallel (mode, orig_mode,
6325 cum->sse_regno + FIRST_SSE_REG);
6330 /* OImode shouldn't be used directly. */
6339 if (!type || !AGGREGATE_TYPE_P (type))
6342 return gen_reg_or_parallel (mode, orig_mode,
6343 cum->sse_regno + FIRST_SSE_REG);
6353 if (!type || !AGGREGATE_TYPE_P (type))
6355 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6358 warning (0, "MMX vector argument without MMX enabled "
6362 return gen_reg_or_parallel (mode, orig_mode,
6363 cum->mmx_regno + FIRST_MMX_REG);
6372 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6373 enum machine_mode orig_mode, const_tree type, bool named)
6375 /* Handle a hidden AL argument containing number of registers
6376 for varargs x86-64 functions. */
6377 if (mode == VOIDmode)
6378 return GEN_INT (cum->maybe_vaarg
6379 ? (cum->sse_nregs < 0
6380 ? X86_64_SSE_REGPARM_MAX
6395 /* Unnamed 256bit vector mode parameters are passed on stack. */
6401 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6403 &x86_64_int_parameter_registers [cum->regno],
6408 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6409 enum machine_mode orig_mode, bool named,
6410 HOST_WIDE_INT bytes)
6414 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6415 We use value of -2 to specify that current function call is MSABI. */
6416 if (mode == VOIDmode)
6417 return GEN_INT (-2);
6419 /* If we've run out of registers, it goes on the stack. */
6420 if (cum->nregs == 0)
6423 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6425 /* Only floating point modes are passed in anything but integer regs. */
6426 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6429 regno = cum->regno + FIRST_SSE_REG;
6434 /* Unnamed floating parameters are passed in both the
6435 SSE and integer registers. */
6436 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6437 t2 = gen_rtx_REG (mode, regno);
6438 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6439 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6440 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6443 /* Handle aggregated types passed in register. */
6444 if (orig_mode == BLKmode)
6446 if (bytes > 0 && bytes <= 8)
6447 mode = (bytes > 4 ? DImode : SImode);
6448 if (mode == BLKmode)
6452 return gen_reg_or_parallel (mode, orig_mode, regno);
6455 /* Return where to put the arguments to a function.
6456 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6458 MODE is the argument's machine mode. TYPE is the data type of the
6459 argument. It is null for libcalls where that information may not be
6460 available. CUM gives information about the preceding args and about
6461 the function being called. NAMED is nonzero if this argument is a
6462 named parameter (otherwise it is an extra parameter matching an
6466 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6467 const_tree type, bool named)
6469 enum machine_mode mode = omode;
6470 HOST_WIDE_INT bytes, words;
6472 if (mode == BLKmode)
6473 bytes = int_size_in_bytes (type);
6475 bytes = GET_MODE_SIZE (mode);
6476 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6478 /* To simplify the code below, represent vector types with a vector mode
6479 even if MMX/SSE are not active. */
6480 if (type && TREE_CODE (type) == VECTOR_TYPE)
6481 mode = type_natural_mode (type, cum);
6483 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6484 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6485 else if (TARGET_64BIT)
6486 return function_arg_64 (cum, mode, omode, type, named);
6488 return function_arg_32 (cum, mode, omode, type, bytes, words);
6491 /* A C expression that indicates when an argument must be passed by
6492 reference. If nonzero for an argument, a copy of that argument is
6493 made in memory and a pointer to the argument is passed instead of
6494 the argument itself. The pointer is passed in whatever way is
6495 appropriate for passing a pointer to that type. */
6498 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6499 enum machine_mode mode ATTRIBUTE_UNUSED,
6500 const_tree type, bool named ATTRIBUTE_UNUSED)
6502 /* See Windows x64 Software Convention. */
6503 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6505 int msize = (int) GET_MODE_SIZE (mode);
6508 /* Arrays are passed by reference. */
6509 if (TREE_CODE (type) == ARRAY_TYPE)
6512 if (AGGREGATE_TYPE_P (type))
6514 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6515 are passed by reference. */
6516 msize = int_size_in_bytes (type);
6520 /* __m128 is passed by reference. */
6522 case 1: case 2: case 4: case 8:
6528 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6534 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6537 contains_aligned_value_p (const_tree type)
6539 enum machine_mode mode = TYPE_MODE (type);
6540 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6544 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6546 if (TYPE_ALIGN (type) < 128)
6549 if (AGGREGATE_TYPE_P (type))
6551 /* Walk the aggregates recursively. */
6552 switch (TREE_CODE (type))
6556 case QUAL_UNION_TYPE:
6560 /* Walk all the structure fields. */
6561 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6563 if (TREE_CODE (field) == FIELD_DECL
6564 && contains_aligned_value_p (TREE_TYPE (field)))
6571 /* Just for use if some languages passes arrays by value. */
6572 if (contains_aligned_value_p (TREE_TYPE (type)))
6583 /* Gives the alignment boundary, in bits, of an argument with the
6584 specified mode and type. */
6587 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6592 /* Since the main variant type is used for call, we convert it to
6593 the main variant type. */
6594 type = TYPE_MAIN_VARIANT (type);
6595 align = TYPE_ALIGN (type);
6598 align = GET_MODE_ALIGNMENT (mode);
6599 if (align < PARM_BOUNDARY)
6600 align = PARM_BOUNDARY;
6601 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6602 natural boundaries. */
6603 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6605 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6606 make an exception for SSE modes since these require 128bit
6609 The handling here differs from field_alignment. ICC aligns MMX
6610 arguments to 4 byte boundaries, while structure fields are aligned
6611 to 8 byte boundaries. */
6614 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6615 align = PARM_BOUNDARY;
6619 if (!contains_aligned_value_p (type))
6620 align = PARM_BOUNDARY;
6623 if (align > BIGGEST_ALIGNMENT)
6624 align = BIGGEST_ALIGNMENT;
6628 /* Return true if N is a possible register number of function value. */
6631 ix86_function_value_regno_p (const unsigned int regno)
6638 case FIRST_FLOAT_REG:
6639 /* TODO: The function should depend on current function ABI but
6640 builtins.c would need updating then. Therefore we use the
6642 if (TARGET_64BIT && ix86_abi == MS_ABI)
6644 return TARGET_FLOAT_RETURNS_IN_80387;
6650 if (TARGET_MACHO || TARGET_64BIT)
6658 /* Define how to find the value returned by a function.
6659 VALTYPE is the data type of the value (as a tree).
6660 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6661 otherwise, FUNC is 0. */
6664 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6665 const_tree fntype, const_tree fn)
6669 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6670 we normally prevent this case when mmx is not available. However
6671 some ABIs may require the result to be returned like DImode. */
6672 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6673 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6675 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6676 we prevent this case when sse is not available. However some ABIs
6677 may require the result to be returned like integer TImode. */
6678 else if (mode == TImode
6679 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6680 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6682 /* 32-byte vector modes in %ymm0. */
6683 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6684 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6686 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6687 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6688 regno = FIRST_FLOAT_REG;
6690 /* Most things go in %eax. */
6693 /* Override FP return register with %xmm0 for local functions when
6694 SSE math is enabled or for functions with sseregparm attribute. */
6695 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6697 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6698 if ((sse_level >= 1 && mode == SFmode)
6699 || (sse_level == 2 && mode == DFmode))
6700 regno = FIRST_SSE_REG;
6703 /* OImode shouldn't be used directly. */
6704 gcc_assert (mode != OImode);
6706 return gen_rtx_REG (orig_mode, regno);
6710 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6715 /* Handle libcalls, which don't provide a type node. */
6716 if (valtype == NULL)
6728 return gen_rtx_REG (mode, FIRST_SSE_REG);
6731 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6735 return gen_rtx_REG (mode, AX_REG);
6739 ret = construct_container (mode, orig_mode, valtype, 1,
6740 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6741 x86_64_int_return_registers, 0);
6743 /* For zero sized structures, construct_container returns NULL, but we
6744 need to keep rest of compiler happy by returning meaningful value. */
6746 ret = gen_rtx_REG (orig_mode, AX_REG);
6752 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6754 unsigned int regno = AX_REG;
6758 switch (GET_MODE_SIZE (mode))
6761 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6762 && !COMPLEX_MODE_P (mode))
6763 regno = FIRST_SSE_REG;
6767 if (mode == SFmode || mode == DFmode)
6768 regno = FIRST_SSE_REG;
6774 return gen_rtx_REG (orig_mode, regno);
6778 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6779 enum machine_mode orig_mode, enum machine_mode mode)
6781 const_tree fn, fntype;
6784 if (fntype_or_decl && DECL_P (fntype_or_decl))
6785 fn = fntype_or_decl;
6786 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6788 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6789 return function_value_ms_64 (orig_mode, mode);
6790 else if (TARGET_64BIT)
6791 return function_value_64 (orig_mode, mode, valtype);
6793 return function_value_32 (orig_mode, mode, fntype, fn);
6797 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6798 bool outgoing ATTRIBUTE_UNUSED)
6800 enum machine_mode mode, orig_mode;
6802 orig_mode = TYPE_MODE (valtype);
6803 mode = type_natural_mode (valtype, NULL);
6804 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6808 ix86_libcall_value (enum machine_mode mode)
6810 return ix86_function_value_1 (NULL, NULL, mode, mode);
6813 /* Return true iff type is returned in memory. */
6815 static bool ATTRIBUTE_UNUSED
6816 return_in_memory_32 (const_tree type, enum machine_mode mode)
6820 if (mode == BLKmode)
6823 size = int_size_in_bytes (type);
6825 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6828 if (VECTOR_MODE_P (mode) || mode == TImode)
6830 /* User-created vectors small enough to fit in EAX. */
6834 /* MMX/3dNow values are returned in MM0,
6835 except when it doesn't exits. */
6839 /* SSE values are returned in XMM0, except when it doesn't exist. */
6843 /* AVX values are returned in YMM0, except when it doesn't exist. */
6854 /* OImode shouldn't be used directly. */
6855 gcc_assert (mode != OImode);
6860 static bool ATTRIBUTE_UNUSED
6861 return_in_memory_64 (const_tree type, enum machine_mode mode)
6863 int needed_intregs, needed_sseregs;
6864 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6867 static bool ATTRIBUTE_UNUSED
6868 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6870 HOST_WIDE_INT size = int_size_in_bytes (type);
6872 /* __m128 is returned in xmm0. */
6873 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6874 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6877 /* Otherwise, the size must be exactly in [1248]. */
6878 return size != 1 && size != 2 && size != 4 && size != 8;
6882 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6884 #ifdef SUBTARGET_RETURN_IN_MEMORY
6885 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6887 const enum machine_mode mode = type_natural_mode (type, NULL);
6891 if (ix86_function_type_abi (fntype) == MS_ABI)
6892 return return_in_memory_ms_64 (type, mode);
6894 return return_in_memory_64 (type, mode);
6897 return return_in_memory_32 (type, mode);
6901 /* Return false iff TYPE is returned in memory. This version is used
6902 on Solaris 2. It is similar to the generic ix86_return_in_memory,
6903 but differs notably in that when MMX is available, 8-byte vectors
6904 are returned in memory, rather than in MMX registers. */
6907 ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6910 enum machine_mode mode = type_natural_mode (type, NULL);
6913 return return_in_memory_64 (type, mode);
6915 if (mode == BLKmode)
6918 size = int_size_in_bytes (type);
6920 if (VECTOR_MODE_P (mode))
6922 /* Return in memory only if MMX registers *are* available. This
6923 seems backwards, but it is consistent with the existing
6930 else if (mode == TImode)
6932 else if (mode == XFmode)
6938 /* When returning SSE vector types, we have a choice of either
6939 (1) being abi incompatible with a -march switch, or
6940 (2) generating an error.
6941 Given no good solution, I think the safest thing is one warning.
6942 The user won't be able to use -Werror, but....
6944 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6945 called in response to actually generating a caller or callee that
6946 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6947 via aggregate_value_p for general type probing from tree-ssa. */
6950 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6952 static bool warnedsse, warnedmmx;
6954 if (!TARGET_64BIT && type)
6956 /* Look at the return type of the function, not the function type. */
6957 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6959 if (!TARGET_SSE && !warnedsse)
6962 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6965 warning (0, "SSE vector return without SSE enabled "
6970 if (!TARGET_MMX && !warnedmmx)
6972 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6975 warning (0, "MMX vector return without MMX enabled "
6985 /* Create the va_list data type. */
6987 /* Returns the calling convention specific va_list date type.
6988 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6991 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6993 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6995 /* For i386 we use plain pointer to argument area. */
6996 if (!TARGET_64BIT || abi == MS_ABI)
6997 return build_pointer_type (char_type_node);
6999 record = lang_hooks.types.make_type (RECORD_TYPE);
7000 type_decl = build_decl (BUILTINS_LOCATION,
7001 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7003 f_gpr = build_decl (BUILTINS_LOCATION,
7004 FIELD_DECL, get_identifier ("gp_offset"),
7005 unsigned_type_node);
7006 f_fpr = build_decl (BUILTINS_LOCATION,
7007 FIELD_DECL, get_identifier ("fp_offset"),
7008 unsigned_type_node);
7009 f_ovf = build_decl (BUILTINS_LOCATION,
7010 FIELD_DECL, get_identifier ("overflow_arg_area"),
7012 f_sav = build_decl (BUILTINS_LOCATION,
7013 FIELD_DECL, get_identifier ("reg_save_area"),
7016 va_list_gpr_counter_field = f_gpr;
7017 va_list_fpr_counter_field = f_fpr;
7019 DECL_FIELD_CONTEXT (f_gpr) = record;
7020 DECL_FIELD_CONTEXT (f_fpr) = record;
7021 DECL_FIELD_CONTEXT (f_ovf) = record;
7022 DECL_FIELD_CONTEXT (f_sav) = record;
7024 TREE_CHAIN (record) = type_decl;
7025 TYPE_NAME (record) = type_decl;
7026 TYPE_FIELDS (record) = f_gpr;
7027 DECL_CHAIN (f_gpr) = f_fpr;
7028 DECL_CHAIN (f_fpr) = f_ovf;
7029 DECL_CHAIN (f_ovf) = f_sav;
7031 layout_type (record);
7033 /* The correct type is an array type of one element. */
7034 return build_array_type (record, build_index_type (size_zero_node));
7037 /* Setup the builtin va_list data type and for 64-bit the additional
7038 calling convention specific va_list data types. */
7041 ix86_build_builtin_va_list (void)
7043 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7045 /* Initialize abi specific va_list builtin types. */
7049 if (ix86_abi == MS_ABI)
7051 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7052 if (TREE_CODE (t) != RECORD_TYPE)
7053 t = build_variant_type_copy (t);
7054 sysv_va_list_type_node = t;
7059 if (TREE_CODE (t) != RECORD_TYPE)
7060 t = build_variant_type_copy (t);
7061 sysv_va_list_type_node = t;
7063 if (ix86_abi != MS_ABI)
7065 t = ix86_build_builtin_va_list_abi (MS_ABI);
7066 if (TREE_CODE (t) != RECORD_TYPE)
7067 t = build_variant_type_copy (t);
7068 ms_va_list_type_node = t;
7073 if (TREE_CODE (t) != RECORD_TYPE)
7074 t = build_variant_type_copy (t);
7075 ms_va_list_type_node = t;
7082 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7085 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7091 /* GPR size of varargs save area. */
7092 if (cfun->va_list_gpr_size)
7093 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7095 ix86_varargs_gpr_size = 0;
7097 /* FPR size of varargs save area. We don't need it if we don't pass
7098 anything in SSE registers. */
7099 if (TARGET_SSE && cfun->va_list_fpr_size)
7100 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7102 ix86_varargs_fpr_size = 0;
7104 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7107 save_area = frame_pointer_rtx;
7108 set = get_varargs_alias_set ();
7110 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7111 if (max > X86_64_REGPARM_MAX)
7112 max = X86_64_REGPARM_MAX;
7114 for (i = cum->regno; i < max; i++)
7116 mem = gen_rtx_MEM (Pmode,
7117 plus_constant (save_area, i * UNITS_PER_WORD));
7118 MEM_NOTRAP_P (mem) = 1;
7119 set_mem_alias_set (mem, set);
7120 emit_move_insn (mem, gen_rtx_REG (Pmode,
7121 x86_64_int_parameter_registers[i]));
7124 if (ix86_varargs_fpr_size)
7126 enum machine_mode smode;
7129 /* Now emit code to save SSE registers. The AX parameter contains number
7130 of SSE parameter registers used to call this function, though all we
7131 actually check here is the zero/non-zero status. */
7133 label = gen_label_rtx ();
7134 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7135 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7138 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7139 we used movdqa (i.e. TImode) instead? Perhaps even better would
7140 be if we could determine the real mode of the data, via a hook
7141 into pass_stdarg. Ignore all that for now. */
7143 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7144 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7146 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7147 if (max > X86_64_SSE_REGPARM_MAX)
7148 max = X86_64_SSE_REGPARM_MAX;
7150 for (i = cum->sse_regno; i < max; ++i)
7152 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7153 mem = gen_rtx_MEM (smode, mem);
7154 MEM_NOTRAP_P (mem) = 1;
7155 set_mem_alias_set (mem, set);
7156 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7158 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7166 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7168 alias_set_type set = get_varargs_alias_set ();
7171 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7175 mem = gen_rtx_MEM (Pmode,
7176 plus_constant (virtual_incoming_args_rtx,
7177 i * UNITS_PER_WORD));
7178 MEM_NOTRAP_P (mem) = 1;
7179 set_mem_alias_set (mem, set);
7181 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7182 emit_move_insn (mem, reg);
7187 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7188 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7191 CUMULATIVE_ARGS next_cum;
7194 /* This argument doesn't appear to be used anymore. Which is good,
7195 because the old code here didn't suppress rtl generation. */
7196 gcc_assert (!no_rtl);
7201 fntype = TREE_TYPE (current_function_decl);
7203 /* For varargs, we do not want to skip the dummy va_dcl argument.
7204 For stdargs, we do want to skip the last named argument. */
7206 if (stdarg_p (fntype))
7207 ix86_function_arg_advance (&next_cum, mode, type, true);
7209 if (cum->call_abi == MS_ABI)
7210 setup_incoming_varargs_ms_64 (&next_cum);
7212 setup_incoming_varargs_64 (&next_cum);
7215 /* Checks if TYPE is of kind va_list char *. */
7218 is_va_list_char_pointer (tree type)
7222 /* For 32-bit it is always true. */
7225 canonic = ix86_canonical_va_list_type (type);
7226 return (canonic == ms_va_list_type_node
7227 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7230 /* Implement va_start. */
7233 ix86_va_start (tree valist, rtx nextarg)
7235 HOST_WIDE_INT words, n_gpr, n_fpr;
7236 tree f_gpr, f_fpr, f_ovf, f_sav;
7237 tree gpr, fpr, ovf, sav, t;
7240 /* Only 64bit target needs something special. */
7241 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7243 std_expand_builtin_va_start (valist, nextarg);
7247 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7248 f_fpr = DECL_CHAIN (f_gpr);
7249 f_ovf = DECL_CHAIN (f_fpr);
7250 f_sav = DECL_CHAIN (f_ovf);
7252 valist = build_simple_mem_ref (valist);
7253 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7254 /* The following should be folded into the MEM_REF offset. */
7255 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7257 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7259 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7261 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7264 /* Count number of gp and fp argument registers used. */
7265 words = crtl->args.info.words;
7266 n_gpr = crtl->args.info.regno;
7267 n_fpr = crtl->args.info.sse_regno;
7269 if (cfun->va_list_gpr_size)
7271 type = TREE_TYPE (gpr);
7272 t = build2 (MODIFY_EXPR, type,
7273 gpr, build_int_cst (type, n_gpr * 8));
7274 TREE_SIDE_EFFECTS (t) = 1;
7275 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7278 if (TARGET_SSE && cfun->va_list_fpr_size)
7280 type = TREE_TYPE (fpr);
7281 t = build2 (MODIFY_EXPR, type, fpr,
7282 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7283 TREE_SIDE_EFFECTS (t) = 1;
7284 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7287 /* Find the overflow area. */
7288 type = TREE_TYPE (ovf);
7289 t = make_tree (type, crtl->args.internal_arg_pointer);
7291 t = build2 (POINTER_PLUS_EXPR, type, t,
7292 size_int (words * UNITS_PER_WORD));
7293 t = build2 (MODIFY_EXPR, type, ovf, t);
7294 TREE_SIDE_EFFECTS (t) = 1;
7295 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7297 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7299 /* Find the register save area.
7300 Prologue of the function save it right above stack frame. */
7301 type = TREE_TYPE (sav);
7302 t = make_tree (type, frame_pointer_rtx);
7303 if (!ix86_varargs_gpr_size)
7304 t = build2 (POINTER_PLUS_EXPR, type, t,
7305 size_int (-8 * X86_64_REGPARM_MAX));
7306 t = build2 (MODIFY_EXPR, type, sav, t);
7307 TREE_SIDE_EFFECTS (t) = 1;
7308 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7312 /* Implement va_arg. */
7315 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7318 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7319 tree f_gpr, f_fpr, f_ovf, f_sav;
7320 tree gpr, fpr, ovf, sav, t;
7322 tree lab_false, lab_over = NULL_TREE;
7327 enum machine_mode nat_mode;
7328 unsigned int arg_boundary;
7330 /* Only 64bit target needs something special. */
7331 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7332 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7334 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7335 f_fpr = DECL_CHAIN (f_gpr);
7336 f_ovf = DECL_CHAIN (f_fpr);
7337 f_sav = DECL_CHAIN (f_ovf);
7339 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7340 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7341 valist = build_va_arg_indirect_ref (valist);
7342 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7343 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7344 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7346 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7348 type = build_pointer_type (type);
7349 size = int_size_in_bytes (type);
7350 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7352 nat_mode = type_natural_mode (type, NULL);
7361 /* Unnamed 256bit vector mode parameters are passed on stack. */
7362 if (ix86_cfun_abi () == SYSV_ABI)
7369 container = construct_container (nat_mode, TYPE_MODE (type),
7370 type, 0, X86_64_REGPARM_MAX,
7371 X86_64_SSE_REGPARM_MAX, intreg,
7376 /* Pull the value out of the saved registers. */
7378 addr = create_tmp_var (ptr_type_node, "addr");
7382 int needed_intregs, needed_sseregs;
7384 tree int_addr, sse_addr;
7386 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7387 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7389 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7391 need_temp = (!REG_P (container)
7392 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7393 || TYPE_ALIGN (type) > 128));
7395 /* In case we are passing structure, verify that it is consecutive block
7396 on the register save area. If not we need to do moves. */
7397 if (!need_temp && !REG_P (container))
7399 /* Verify that all registers are strictly consecutive */
7400 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7404 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7406 rtx slot = XVECEXP (container, 0, i);
7407 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7408 || INTVAL (XEXP (slot, 1)) != i * 16)
7416 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7418 rtx slot = XVECEXP (container, 0, i);
7419 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7420 || INTVAL (XEXP (slot, 1)) != i * 8)
7432 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7433 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7436 /* First ensure that we fit completely in registers. */
7439 t = build_int_cst (TREE_TYPE (gpr),
7440 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7441 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7442 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7443 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7444 gimplify_and_add (t, pre_p);
7448 t = build_int_cst (TREE_TYPE (fpr),
7449 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7450 + X86_64_REGPARM_MAX * 8);
7451 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7452 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7453 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7454 gimplify_and_add (t, pre_p);
7457 /* Compute index to start of area used for integer regs. */
7460 /* int_addr = gpr + sav; */
7461 t = fold_convert (sizetype, gpr);
7462 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7463 gimplify_assign (int_addr, t, pre_p);
7467 /* sse_addr = fpr + sav; */
7468 t = fold_convert (sizetype, fpr);
7469 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7470 gimplify_assign (sse_addr, t, pre_p);
7474 int i, prev_size = 0;
7475 tree temp = create_tmp_var (type, "va_arg_tmp");
7478 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7479 gimplify_assign (addr, t, pre_p);
7481 for (i = 0; i < XVECLEN (container, 0); i++)
7483 rtx slot = XVECEXP (container, 0, i);
7484 rtx reg = XEXP (slot, 0);
7485 enum machine_mode mode = GET_MODE (reg);
7491 tree dest_addr, dest;
7492 int cur_size = GET_MODE_SIZE (mode);
7494 if (prev_size + cur_size > size)
7496 cur_size = size - prev_size;
7497 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7498 if (mode == BLKmode)
7501 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7502 if (mode == GET_MODE (reg))
7503 addr_type = build_pointer_type (piece_type);
7505 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7507 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7510 if (SSE_REGNO_P (REGNO (reg)))
7512 src_addr = sse_addr;
7513 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7517 src_addr = int_addr;
7518 src_offset = REGNO (reg) * 8;
7520 src_addr = fold_convert (addr_type, src_addr);
7521 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7522 size_int (src_offset));
7524 dest_addr = fold_convert (daddr_type, addr);
7525 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7526 size_int (INTVAL (XEXP (slot, 1))));
7527 if (cur_size == GET_MODE_SIZE (mode))
7529 src = build_va_arg_indirect_ref (src_addr);
7530 dest = build_va_arg_indirect_ref (dest_addr);
7532 gimplify_assign (dest, src, pre_p);
7537 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7538 3, dest_addr, src_addr,
7539 size_int (cur_size));
7540 gimplify_and_add (copy, pre_p);
7542 prev_size += cur_size;
7548 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7549 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7550 gimplify_assign (gpr, t, pre_p);
7555 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7556 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7557 gimplify_assign (fpr, t, pre_p);
7560 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7562 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7565 /* ... otherwise out of the overflow area. */
7567 /* When we align parameter on stack for caller, if the parameter
7568 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7569 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7570 here with caller. */
7571 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7572 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7573 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7575 /* Care for on-stack alignment if needed. */
7576 if (arg_boundary <= 64 || size == 0)
7580 HOST_WIDE_INT align = arg_boundary / 8;
7581 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7582 size_int (align - 1));
7583 t = fold_convert (sizetype, t);
7584 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7586 t = fold_convert (TREE_TYPE (ovf), t);
7589 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7590 gimplify_assign (addr, t, pre_p);
7592 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7593 size_int (rsize * UNITS_PER_WORD));
7594 gimplify_assign (unshare_expr (ovf), t, pre_p);
7597 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7599 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7600 addr = fold_convert (ptrtype, addr);
7603 addr = build_va_arg_indirect_ref (addr);
7604 return build_va_arg_indirect_ref (addr);
7607 /* Return true if OPNUM's MEM should be matched
7608 in movabs* patterns. */
7611 ix86_check_movabs (rtx insn, int opnum)
7615 set = PATTERN (insn);
7616 if (GET_CODE (set) == PARALLEL)
7617 set = XVECEXP (set, 0, 0);
7618 gcc_assert (GET_CODE (set) == SET);
7619 mem = XEXP (set, opnum);
7620 while (GET_CODE (mem) == SUBREG)
7621 mem = SUBREG_REG (mem);
7622 gcc_assert (MEM_P (mem));
7623 return volatile_ok || !MEM_VOLATILE_P (mem);
7626 /* Initialize the table of extra 80387 mathematical constants. */
7629 init_ext_80387_constants (void)
7631 static const char * cst[5] =
7633 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7634 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7635 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7636 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7637 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7641 for (i = 0; i < 5; i++)
7643 real_from_string (&ext_80387_constants_table[i], cst[i]);
7644 /* Ensure each constant is rounded to XFmode precision. */
7645 real_convert (&ext_80387_constants_table[i],
7646 XFmode, &ext_80387_constants_table[i]);
7649 ext_80387_constants_init = 1;
7652 /* Return non-zero if the constant is something that
7653 can be loaded with a special instruction. */
7656 standard_80387_constant_p (rtx x)
7658 enum machine_mode mode = GET_MODE (x);
7662 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7665 if (x == CONST0_RTX (mode))
7667 if (x == CONST1_RTX (mode))
7670 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7672 /* For XFmode constants, try to find a special 80387 instruction when
7673 optimizing for size or on those CPUs that benefit from them. */
7675 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7679 if (! ext_80387_constants_init)
7680 init_ext_80387_constants ();
7682 for (i = 0; i < 5; i++)
7683 if (real_identical (&r, &ext_80387_constants_table[i]))
7687 /* Load of the constant -0.0 or -1.0 will be split as
7688 fldz;fchs or fld1;fchs sequence. */
7689 if (real_isnegzero (&r))
7691 if (real_identical (&r, &dconstm1))
7697 /* Return the opcode of the special instruction to be used to load
7701 standard_80387_constant_opcode (rtx x)
7703 switch (standard_80387_constant_p (x))
7727 /* Return the CONST_DOUBLE representing the 80387 constant that is
7728 loaded by the specified special instruction. The argument IDX
7729 matches the return value from standard_80387_constant_p. */
7732 standard_80387_constant_rtx (int idx)
7736 if (! ext_80387_constants_init)
7737 init_ext_80387_constants ();
7753 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7757 /* Return 1 if X is all 0s and 2 if x is all 1s
7758 in supported SSE vector mode. */
7761 standard_sse_constant_p (rtx x)
7763 enum machine_mode mode = GET_MODE (x);
7765 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7767 if (vector_all_ones_operand (x, mode))
7783 /* Return the opcode of the special instruction to be used to load
7787 standard_sse_constant_opcode (rtx insn, rtx x)
7789 switch (standard_sse_constant_p (x))
7792 switch (get_attr_mode (insn))
7795 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7797 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7798 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7800 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7802 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7803 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7805 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7807 return "vxorps\t%x0, %x0, %x0";
7809 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7810 return "vxorps\t%x0, %x0, %x0";
7812 return "vxorpd\t%x0, %x0, %x0";
7814 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7815 return "vxorps\t%x0, %x0, %x0";
7817 return "vpxor\t%x0, %x0, %x0";
7822 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7829 /* Returns true if OP contains a symbol reference */
7832 symbolic_reference_mentioned_p (rtx op)
7837 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7840 fmt = GET_RTX_FORMAT (GET_CODE (op));
7841 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7847 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7848 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7852 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7859 /* Return true if it is appropriate to emit `ret' instructions in the
7860 body of a function. Do this only if the epilogue is simple, needing a
7861 couple of insns. Prior to reloading, we can't tell how many registers
7862 must be saved, so return false then. Return false if there is no frame
7863 marker to de-allocate. */
7866 ix86_can_use_return_insn_p (void)
7868 struct ix86_frame frame;
7870 if (! reload_completed || frame_pointer_needed)
7873 /* Don't allow more than 32k pop, since that's all we can do
7874 with one instruction. */
7875 if (crtl->args.pops_args && crtl->args.size >= 32768)
7878 ix86_compute_frame_layout (&frame);
7879 return (frame.stack_pointer_offset == UNITS_PER_WORD
7880 && (frame.nregs + frame.nsseregs) == 0);
7883 /* Value should be nonzero if functions must have frame pointers.
7884 Zero means the frame pointer need not be set up (and parms may
7885 be accessed via the stack pointer) in functions that seem suitable. */
7888 ix86_frame_pointer_required (void)
7890 /* If we accessed previous frames, then the generated code expects
7891 to be able to access the saved ebp value in our frame. */
7892 if (cfun->machine->accesses_prev_frame)
7895 /* Several x86 os'es need a frame pointer for other reasons,
7896 usually pertaining to setjmp. */
7897 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7900 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7901 the frame pointer by default. Turn it back on now if we've not
7902 got a leaf function. */
7903 if (TARGET_OMIT_LEAF_FRAME_POINTER
7904 && (!current_function_is_leaf
7905 || ix86_current_function_calls_tls_descriptor))
7908 if (crtl->profile && !flag_fentry)
7914 /* Record that the current function accesses previous call frames. */
7917 ix86_setup_frame_addresses (void)
7919 cfun->machine->accesses_prev_frame = 1;
7922 #ifndef USE_HIDDEN_LINKONCE
7923 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7924 # define USE_HIDDEN_LINKONCE 1
7926 # define USE_HIDDEN_LINKONCE 0
7930 static int pic_labels_used;
7932 /* Fills in the label name that should be used for a pc thunk for
7933 the given register. */
7936 get_pc_thunk_name (char name[32], unsigned int regno)
7938 gcc_assert (!TARGET_64BIT);
7940 if (USE_HIDDEN_LINKONCE)
7941 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7943 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7947 /* This function generates code for -fpic that loads %ebx with
7948 the return address of the caller and then returns. */
7951 ix86_code_end (void)
7956 for (regno = 0; regno < 8; ++regno)
7961 if (! ((pic_labels_used >> regno) & 1))
7964 get_pc_thunk_name (name, regno);
7966 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
7967 get_identifier (name),
7968 build_function_type (void_type_node, void_list_node));
7969 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
7970 NULL_TREE, void_type_node);
7971 TREE_PUBLIC (decl) = 1;
7972 TREE_STATIC (decl) = 1;
7977 switch_to_section (darwin_sections[text_coal_section]);
7978 fputs ("\t.weak_definition\t", asm_out_file);
7979 assemble_name (asm_out_file, name);
7980 fputs ("\n\t.private_extern\t", asm_out_file);
7981 assemble_name (asm_out_file, name);
7982 putc ('\n', asm_out_file);
7983 ASM_OUTPUT_LABEL (asm_out_file, name);
7984 DECL_WEAK (decl) = 1;
7988 if (USE_HIDDEN_LINKONCE)
7990 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7992 targetm.asm_out.unique_section (decl, 0);
7993 switch_to_section (get_named_section (decl, NULL, 0));
7995 targetm.asm_out.globalize_label (asm_out_file, name);
7996 fputs ("\t.hidden\t", asm_out_file);
7997 assemble_name (asm_out_file, name);
7998 putc ('\n', asm_out_file);
7999 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8003 switch_to_section (text_section);
8004 ASM_OUTPUT_LABEL (asm_out_file, name);
8007 DECL_INITIAL (decl) = make_node (BLOCK);
8008 current_function_decl = decl;
8009 init_function_start (decl);
8010 first_function_block_is_cold = false;
8011 /* Make sure unwind info is emitted for the thunk if needed. */
8012 final_start_function (emit_barrier (), asm_out_file, 1);
8014 xops[0] = gen_rtx_REG (Pmode, regno);
8015 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8016 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8017 output_asm_insn ("ret", xops);
8018 final_end_function ();
8019 init_insn_lengths ();
8020 free_after_compilation (cfun);
8022 current_function_decl = NULL;
8026 /* Emit code for the SET_GOT patterns. */
8029 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8035 if (TARGET_VXWORKS_RTP && flag_pic)
8037 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8038 xops[2] = gen_rtx_MEM (Pmode,
8039 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8040 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8042 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8043 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8044 an unadorned address. */
8045 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8046 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8047 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8051 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8053 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8055 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8058 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8061 output_asm_insn ("call\t%a2", xops);
8062 #ifdef DWARF2_UNWIND_INFO
8063 /* The call to next label acts as a push. */
8064 if (dwarf2out_do_frame ())
8068 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8069 gen_rtx_PLUS (Pmode,
8072 RTX_FRAME_RELATED_P (insn) = 1;
8073 dwarf2out_frame_debug (insn, true);
8080 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8081 is what will be referenced by the Mach-O PIC subsystem. */
8083 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8086 targetm.asm_out.internal_label (asm_out_file, "L",
8087 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8091 output_asm_insn ("pop%z0\t%0", xops);
8092 #ifdef DWARF2_UNWIND_INFO
8093 /* The pop is a pop and clobbers dest, but doesn't restore it
8094 for unwind info purposes. */
8095 if (dwarf2out_do_frame ())
8099 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8100 dwarf2out_frame_debug (insn, true);
8101 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8102 gen_rtx_PLUS (Pmode,
8105 RTX_FRAME_RELATED_P (insn) = 1;
8106 dwarf2out_frame_debug (insn, true);
8115 get_pc_thunk_name (name, REGNO (dest));
8116 pic_labels_used |= 1 << REGNO (dest);
8118 #ifdef DWARF2_UNWIND_INFO
8119 /* Ensure all queued register saves are flushed before the
8121 if (dwarf2out_do_frame ())
8122 dwarf2out_flush_queued_reg_saves ();
8124 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8125 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8126 output_asm_insn ("call\t%X2", xops);
8127 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8128 is what will be referenced by the Mach-O PIC subsystem. */
8131 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8133 targetm.asm_out.internal_label (asm_out_file, "L",
8134 CODE_LABEL_NUMBER (label));
8141 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8142 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8144 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8149 /* Generate an "push" pattern for input ARG. */
8154 struct machine_function *m = cfun->machine;
8156 if (m->fs.cfa_reg == stack_pointer_rtx)
8157 m->fs.cfa_offset += UNITS_PER_WORD;
8158 m->fs.sp_offset += UNITS_PER_WORD;
8160 return gen_rtx_SET (VOIDmode,
8162 gen_rtx_PRE_DEC (Pmode,
8163 stack_pointer_rtx)),
8167 /* Generate an "pop" pattern for input ARG. */
8172 return gen_rtx_SET (VOIDmode,
8175 gen_rtx_POST_INC (Pmode,
8176 stack_pointer_rtx)));
8179 /* Return >= 0 if there is an unused call-clobbered register available
8180 for the entire function. */
8183 ix86_select_alt_pic_regnum (void)
8185 if (current_function_is_leaf
8187 && !ix86_current_function_calls_tls_descriptor)
8190 /* Can't use the same register for both PIC and DRAP. */
8192 drap = REGNO (crtl->drap_reg);
8195 for (i = 2; i >= 0; --i)
8196 if (i != drap && !df_regs_ever_live_p (i))
8200 return INVALID_REGNUM;
8203 /* Return 1 if we need to save REGNO. */
8205 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8207 if (pic_offset_table_rtx
8208 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8209 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8211 || crtl->calls_eh_return
8212 || crtl->uses_const_pool))
8214 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8219 if (crtl->calls_eh_return && maybe_eh_return)
8224 unsigned test = EH_RETURN_DATA_REGNO (i);
8225 if (test == INVALID_REGNUM)
8232 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8235 return (df_regs_ever_live_p (regno)
8236 && !call_used_regs[regno]
8237 && !fixed_regs[regno]
8238 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8241 /* Return number of saved general prupose registers. */
8244 ix86_nsaved_regs (void)
8249 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8250 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8255 /* Return number of saved SSE registrers. */
8258 ix86_nsaved_sseregs (void)
8263 if (ix86_cfun_abi () != MS_ABI)
8265 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8266 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8271 /* Given FROM and TO register numbers, say whether this elimination is
8272 allowed. If stack alignment is needed, we can only replace argument
8273 pointer with hard frame pointer, or replace frame pointer with stack
8274 pointer. Otherwise, frame pointer elimination is automatically
8275 handled and all other eliminations are valid. */
8278 ix86_can_eliminate (const int from, const int to)
8280 if (stack_realign_fp)
8281 return ((from == ARG_POINTER_REGNUM
8282 && to == HARD_FRAME_POINTER_REGNUM)
8283 || (from == FRAME_POINTER_REGNUM
8284 && to == STACK_POINTER_REGNUM));
8286 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8289 /* Return the offset between two registers, one to be eliminated, and the other
8290 its replacement, at the start of a routine. */
8293 ix86_initial_elimination_offset (int from, int to)
8295 struct ix86_frame frame;
8296 ix86_compute_frame_layout (&frame);
8298 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8299 return frame.hard_frame_pointer_offset;
8300 else if (from == FRAME_POINTER_REGNUM
8301 && to == HARD_FRAME_POINTER_REGNUM)
8302 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8305 gcc_assert (to == STACK_POINTER_REGNUM);
8307 if (from == ARG_POINTER_REGNUM)
8308 return frame.stack_pointer_offset;
8310 gcc_assert (from == FRAME_POINTER_REGNUM);
8311 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8315 /* In a dynamically-aligned function, we can't know the offset from
8316 stack pointer to frame pointer, so we must ensure that setjmp
8317 eliminates fp against the hard fp (%ebp) rather than trying to
8318 index from %esp up to the top of the frame across a gap that is
8319 of unknown (at compile-time) size. */
8321 ix86_builtin_setjmp_frame_value (void)
8323 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8326 /* Fill structure ix86_frame about frame of currently computed function. */
8329 ix86_compute_frame_layout (struct ix86_frame *frame)
8331 unsigned int stack_alignment_needed;
8332 HOST_WIDE_INT offset;
8333 unsigned int preferred_alignment;
8334 HOST_WIDE_INT size = get_frame_size ();
8335 HOST_WIDE_INT to_allocate;
8337 frame->nregs = ix86_nsaved_regs ();
8338 frame->nsseregs = ix86_nsaved_sseregs ();
8340 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8341 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8343 /* MS ABI seem to require stack alignment to be always 16 except for function
8344 prologues and leaf. */
8345 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8346 && (!current_function_is_leaf || cfun->calls_alloca != 0
8347 || ix86_current_function_calls_tls_descriptor))
8349 preferred_alignment = 16;
8350 stack_alignment_needed = 16;
8351 crtl->preferred_stack_boundary = 128;
8352 crtl->stack_alignment_needed = 128;
8355 gcc_assert (!size || stack_alignment_needed);
8356 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8357 gcc_assert (preferred_alignment <= stack_alignment_needed);
8359 /* During reload iteration the amount of registers saved can change.
8360 Recompute the value as needed. Do not recompute when amount of registers
8361 didn't change as reload does multiple calls to the function and does not
8362 expect the decision to change within single iteration. */
8363 if (!optimize_function_for_size_p (cfun)
8364 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8366 int count = frame->nregs;
8367 struct cgraph_node *node = cgraph_node (current_function_decl);
8369 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8370 /* The fast prologue uses move instead of push to save registers. This
8371 is significantly longer, but also executes faster as modern hardware
8372 can execute the moves in parallel, but can't do that for push/pop.
8374 Be careful about choosing what prologue to emit: When function takes
8375 many instructions to execute we may use slow version as well as in
8376 case function is known to be outside hot spot (this is known with
8377 feedback only). Weight the size of function by number of registers
8378 to save as it is cheap to use one or two push instructions but very
8379 slow to use many of them. */
8381 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8382 if (node->frequency < NODE_FREQUENCY_NORMAL
8383 || (flag_branch_probabilities
8384 && node->frequency < NODE_FREQUENCY_HOT))
8385 cfun->machine->use_fast_prologue_epilogue = false;
8387 cfun->machine->use_fast_prologue_epilogue
8388 = !expensive_function_p (count);
8390 if (TARGET_PROLOGUE_USING_MOVE
8391 && cfun->machine->use_fast_prologue_epilogue)
8392 frame->save_regs_using_mov = true;
8394 frame->save_regs_using_mov = false;
8396 /* If static stack checking is enabled and done with probes, the registers
8397 need to be saved before allocating the frame. */
8398 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8399 frame->save_regs_using_mov = false;
8401 /* Skip return address. */
8402 offset = UNITS_PER_WORD;
8404 /* Skip pushed static chain. */
8405 if (ix86_static_chain_on_stack)
8406 offset += UNITS_PER_WORD;
8408 /* Skip saved base pointer. */
8409 if (frame_pointer_needed)
8410 offset += UNITS_PER_WORD;
8412 frame->hard_frame_pointer_offset = offset;
8414 /* Register save area */
8415 offset += frame->nregs * UNITS_PER_WORD;
8416 frame->reg_save_offset = offset;
8418 /* Align and set SSE register save area. */
8419 if (frame->nsseregs)
8421 /* The only ABI that has saved SSE registers (Win64) also has a
8422 16-byte aligned default stack, and thus we don't need to be
8423 within the re-aligned local stack frame to save them. */
8424 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8425 offset = (offset + 16 - 1) & -16;
8426 offset += frame->nsseregs * 16;
8428 frame->sse_reg_save_offset = offset;
8430 /* The re-aligned stack starts here. Values before this point are not
8431 directly comparable with values below this point. In order to make
8432 sure that no value happens to be the same before and after, force
8433 the alignment computation below to add a non-zero value. */
8434 if (stack_realign_fp)
8435 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8438 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8439 offset += frame->va_arg_size;
8441 /* Align start of frame for local function. */
8442 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8444 /* Frame pointer points here. */
8445 frame->frame_pointer_offset = offset;
8449 /* Add outgoing arguments area. Can be skipped if we eliminated
8450 all the function calls as dead code.
8451 Skipping is however impossible when function calls alloca. Alloca
8452 expander assumes that last crtl->outgoing_args_size
8453 of stack frame are unused. */
8454 if (ACCUMULATE_OUTGOING_ARGS
8455 && (!current_function_is_leaf || cfun->calls_alloca
8456 || ix86_current_function_calls_tls_descriptor))
8458 offset += crtl->outgoing_args_size;
8459 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8462 frame->outgoing_arguments_size = 0;
8464 /* Align stack boundary. Only needed if we're calling another function
8466 if (!current_function_is_leaf || cfun->calls_alloca
8467 || ix86_current_function_calls_tls_descriptor)
8468 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8470 /* We've reached end of stack frame. */
8471 frame->stack_pointer_offset = offset;
8473 /* Size prologue needs to allocate. */
8474 to_allocate = offset - frame->sse_reg_save_offset;
8476 if ((!to_allocate && frame->nregs <= 1)
8477 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8478 frame->save_regs_using_mov = false;
8480 if (ix86_using_red_zone ()
8481 && current_function_sp_is_unchanging
8482 && current_function_is_leaf
8483 && !ix86_current_function_calls_tls_descriptor)
8485 frame->red_zone_size = to_allocate;
8486 if (frame->save_regs_using_mov)
8487 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8488 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8489 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8492 frame->red_zone_size = 0;
8493 frame->stack_pointer_offset -= frame->red_zone_size;
8496 /* This is semi-inlined memory_address_length, but simplified
8497 since we know that we're always dealing with reg+offset, and
8498 to avoid having to create and discard all that rtl. */
8501 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8507 /* EBP and R13 cannot be encoded without an offset. */
8508 len = (regno == BP_REG || regno == R13_REG);
8510 else if (IN_RANGE (offset, -128, 127))
8513 /* ESP and R12 must be encoded with a SIB byte. */
8514 if (regno == SP_REG || regno == R12_REG)
8520 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8521 The valid base registers are taken from CFUN->MACHINE->FS. */
8524 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8526 const struct machine_function *m = cfun->machine;
8527 rtx base_reg = NULL;
8528 HOST_WIDE_INT base_offset = 0;
8530 if (m->use_fast_prologue_epilogue)
8532 /* Choose the base register most likely to allow the most scheduling
8533 opportunities. Generally FP is valid througout the function,
8534 while DRAP must be reloaded within the epilogue. But choose either
8535 over the SP due to increased encoding size. */
8539 base_reg = hard_frame_pointer_rtx;
8540 base_offset = m->fs.fp_offset - cfa_offset;
8542 else if (m->fs.drap_valid)
8544 base_reg = crtl->drap_reg;
8545 base_offset = 0 - cfa_offset;
8547 else if (m->fs.sp_valid)
8549 base_reg = stack_pointer_rtx;
8550 base_offset = m->fs.sp_offset - cfa_offset;
8555 HOST_WIDE_INT toffset;
8558 /* Choose the base register with the smallest address encoding.
8559 With a tie, choose FP > DRAP > SP. */
8562 base_reg = stack_pointer_rtx;
8563 base_offset = m->fs.sp_offset - cfa_offset;
8564 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8566 if (m->fs.drap_valid)
8568 toffset = 0 - cfa_offset;
8569 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8572 base_reg = crtl->drap_reg;
8573 base_offset = toffset;
8579 toffset = m->fs.fp_offset - cfa_offset;
8580 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8583 base_reg = hard_frame_pointer_rtx;
8584 base_offset = toffset;
8589 gcc_assert (base_reg != NULL);
8591 return plus_constant (base_reg, base_offset);
8594 /* Emit code to save registers in the prologue. */
8597 ix86_emit_save_regs (void)
8602 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8603 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8605 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8606 RTX_FRAME_RELATED_P (insn) = 1;
8610 /* Emit a single register save at CFA - CFA_OFFSET. */
8613 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8614 HOST_WIDE_INT cfa_offset)
8616 struct machine_function *m = cfun->machine;
8617 rtx reg = gen_rtx_REG (mode, regno);
8618 rtx mem, addr, base, insn;
8620 addr = choose_baseaddr (cfa_offset);
8621 mem = gen_frame_mem (mode, addr);
8623 /* For SSE saves, we need to indicate the 128-bit alignment. */
8624 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8626 insn = emit_move_insn (mem, reg);
8627 RTX_FRAME_RELATED_P (insn) = 1;
8630 if (GET_CODE (base) == PLUS)
8631 base = XEXP (base, 0);
8632 gcc_checking_assert (REG_P (base));
8634 /* When saving registers into a re-aligned local stack frame, avoid
8635 any tricky guessing by dwarf2out. */
8636 if (m->fs.realigned)
8638 gcc_checking_assert (stack_realign_drap);
8640 if (regno == REGNO (crtl->drap_reg))
8642 /* A bit of a hack. We force the DRAP register to be saved in
8643 the re-aligned stack frame, which provides us with a copy
8644 of the CFA that will last past the prologue. Install it. */
8645 gcc_checking_assert (cfun->machine->fs.fp_valid);
8646 addr = plus_constant (hard_frame_pointer_rtx,
8647 cfun->machine->fs.fp_offset - cfa_offset);
8648 mem = gen_rtx_MEM (mode, addr);
8649 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8653 /* The frame pointer is a stable reference within the
8654 aligned frame. Use it. */
8655 gcc_checking_assert (cfun->machine->fs.fp_valid);
8656 addr = plus_constant (hard_frame_pointer_rtx,
8657 cfun->machine->fs.fp_offset - cfa_offset);
8658 mem = gen_rtx_MEM (mode, addr);
8659 add_reg_note (insn, REG_CFA_EXPRESSION,
8660 gen_rtx_SET (VOIDmode, mem, reg));
8664 /* The memory may not be relative to the current CFA register,
8665 which means that we may need to generate a new pattern for
8666 use by the unwind info. */
8667 else if (base != m->fs.cfa_reg)
8669 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8670 mem = gen_rtx_MEM (mode, addr);
8671 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8675 /* Emit code to save registers using MOV insns.
8676 First register is stored at CFA - CFA_OFFSET. */
8678 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8682 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8683 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8685 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8686 cfa_offset -= UNITS_PER_WORD;
8690 /* Emit code to save SSE registers using MOV insns.
8691 First register is stored at CFA - CFA_OFFSET. */
8693 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8697 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8698 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8700 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8705 static GTY(()) rtx queued_cfa_restores;
8707 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8708 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8709 Don't add the note if the previously saved value will be left untouched
8710 within stack red-zone till return, as unwinders can find the same value
8711 in the register and on the stack. */
8714 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8716 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8721 add_reg_note (insn, REG_CFA_RESTORE, reg);
8722 RTX_FRAME_RELATED_P (insn) = 1;
8726 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8729 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8732 ix86_add_queued_cfa_restore_notes (rtx insn)
8735 if (!queued_cfa_restores)
8737 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8739 XEXP (last, 1) = REG_NOTES (insn);
8740 REG_NOTES (insn) = queued_cfa_restores;
8741 queued_cfa_restores = NULL_RTX;
8742 RTX_FRAME_RELATED_P (insn) = 1;
8745 /* Expand prologue or epilogue stack adjustment.
8746 The pattern exist to put a dependency on all ebp-based memory accesses.
8747 STYLE should be negative if instructions should be marked as frame related,
8748 zero if %r11 register is live and cannot be freely used and positive
8752 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8753 int style, bool set_cfa)
8755 struct machine_function *m = cfun->machine;
8759 insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
8760 else if (x86_64_immediate_operand (offset, DImode))
8761 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
8765 /* r11 is used by indirect sibcall return as well, set before the
8766 epilogue and used after the epilogue. */
8768 tmp = gen_rtx_REG (DImode, R11_REG);
8771 gcc_assert (src != hard_frame_pointer_rtx
8772 && dest != hard_frame_pointer_rtx);
8773 tmp = hard_frame_pointer_rtx;
8775 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8777 RTX_FRAME_RELATED_P (insn) = 1;
8778 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
8783 ix86_add_queued_cfa_restore_notes (insn);
8789 gcc_assert (m->fs.cfa_reg == src);
8790 m->fs.cfa_offset += INTVAL (offset);
8791 m->fs.cfa_reg = dest;
8793 r = gen_rtx_PLUS (Pmode, src, offset);
8794 r = gen_rtx_SET (VOIDmode, dest, r);
8795 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8796 RTX_FRAME_RELATED_P (insn) = 1;
8799 RTX_FRAME_RELATED_P (insn) = 1;
8801 if (dest == stack_pointer_rtx)
8803 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8804 bool valid = m->fs.sp_valid;
8806 if (src == hard_frame_pointer_rtx)
8808 valid = m->fs.fp_valid;
8809 ooffset = m->fs.fp_offset;
8811 else if (src == crtl->drap_reg)
8813 valid = m->fs.drap_valid;
8818 /* Else there are two possibilities: SP itself, which we set
8819 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8820 taken care of this by hand along the eh_return path. */
8821 gcc_checking_assert (src == stack_pointer_rtx
8822 || offset == const0_rtx);
8825 m->fs.sp_offset = ooffset - INTVAL (offset);
8826 m->fs.sp_valid = valid;
8830 /* Find an available register to be used as dynamic realign argument
8831 pointer regsiter. Such a register will be written in prologue and
8832 used in begin of body, so it must not be
8833 1. parameter passing register.
8835 We reuse static-chain register if it is available. Otherwise, we
8836 use DI for i386 and R13 for x86-64. We chose R13 since it has
8839 Return: the regno of chosen register. */
8842 find_drap_reg (void)
8844 tree decl = cfun->decl;
8848 /* Use R13 for nested function or function need static chain.
8849 Since function with tail call may use any caller-saved
8850 registers in epilogue, DRAP must not use caller-saved
8851 register in such case. */
8852 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8859 /* Use DI for nested function or function need static chain.
8860 Since function with tail call may use any caller-saved
8861 registers in epilogue, DRAP must not use caller-saved
8862 register in such case. */
8863 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8866 /* Reuse static chain register if it isn't used for parameter
8868 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8869 && !lookup_attribute ("fastcall",
8870 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8871 && !lookup_attribute ("thiscall",
8872 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8879 /* Return minimum incoming stack alignment. */
8882 ix86_minimum_incoming_stack_boundary (bool sibcall)
8884 unsigned int incoming_stack_boundary;
8886 /* Prefer the one specified at command line. */
8887 if (ix86_user_incoming_stack_boundary)
8888 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8889 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8890 if -mstackrealign is used, it isn't used for sibcall check and
8891 estimated stack alignment is 128bit. */
8894 && ix86_force_align_arg_pointer
8895 && crtl->stack_alignment_estimated == 128)
8896 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8898 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8900 /* Incoming stack alignment can be changed on individual functions
8901 via force_align_arg_pointer attribute. We use the smallest
8902 incoming stack boundary. */
8903 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8904 && lookup_attribute (ix86_force_align_arg_pointer_string,
8905 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8906 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8908 /* The incoming stack frame has to be aligned at least at
8909 parm_stack_boundary. */
8910 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8911 incoming_stack_boundary = crtl->parm_stack_boundary;
8913 /* Stack at entrance of main is aligned by runtime. We use the
8914 smallest incoming stack boundary. */
8915 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8916 && DECL_NAME (current_function_decl)
8917 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8918 && DECL_FILE_SCOPE_P (current_function_decl))
8919 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8921 return incoming_stack_boundary;
8924 /* Update incoming stack boundary and estimated stack alignment. */
8927 ix86_update_stack_boundary (void)
8929 ix86_incoming_stack_boundary
8930 = ix86_minimum_incoming_stack_boundary (false);
8932 /* x86_64 vararg needs 16byte stack alignment for register save
8936 && crtl->stack_alignment_estimated < 128)
8937 crtl->stack_alignment_estimated = 128;
8940 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8941 needed or an rtx for DRAP otherwise. */
8944 ix86_get_drap_rtx (void)
8946 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8947 crtl->need_drap = true;
8949 if (stack_realign_drap)
8951 /* Assign DRAP to vDRAP and returns vDRAP */
8952 unsigned int regno = find_drap_reg ();
8957 arg_ptr = gen_rtx_REG (Pmode, regno);
8958 crtl->drap_reg = arg_ptr;
8961 drap_vreg = copy_to_reg (arg_ptr);
8965 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8968 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8969 RTX_FRAME_RELATED_P (insn) = 1;
8977 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8980 ix86_internal_arg_pointer (void)
8982 return virtual_incoming_args_rtx;
8985 struct scratch_reg {
8990 /* Return a short-lived scratch register for use on function entry.
8991 In 32-bit mode, it is valid only after the registers are saved
8992 in the prologue. This register must be released by means of
8993 release_scratch_register_on_entry once it is dead. */
8996 get_scratch_register_on_entry (struct scratch_reg *sr)
9004 /* We always use R11 in 64-bit mode. */
9009 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9011 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9012 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9013 int regparm = ix86_function_regparm (fntype, decl);
9015 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9017 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9018 for the static chain register. */
9019 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9020 && drap_regno != AX_REG)
9022 else if (regparm < 2 && drap_regno != DX_REG)
9024 /* ecx is the static chain register. */
9025 else if (regparm < 3 && !fastcall_p && !static_chain_p
9026 && drap_regno != CX_REG)
9028 else if (ix86_save_reg (BX_REG, true))
9030 /* esi is the static chain register. */
9031 else if (!(regparm == 3 && static_chain_p)
9032 && ix86_save_reg (SI_REG, true))
9034 else if (ix86_save_reg (DI_REG, true))
9038 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9043 sr->reg = gen_rtx_REG (Pmode, regno);
9046 rtx insn = emit_insn (gen_push (sr->reg));
9047 RTX_FRAME_RELATED_P (insn) = 1;
9051 /* Release a scratch register obtained from the preceding function. */
9054 release_scratch_register_on_entry (struct scratch_reg *sr)
9058 rtx x, insn = emit_insn (gen_pop (sr->reg));
9060 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9061 RTX_FRAME_RELATED_P (insn) = 1;
9062 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9063 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9064 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9068 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9070 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9073 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9075 /* We skip the probe for the first interval + a small dope of 4 words and
9076 probe that many bytes past the specified size to maintain a protection
9077 area at the botton of the stack. */
9078 const int dope = 4 * UNITS_PER_WORD;
9079 rtx size_rtx = GEN_INT (size);
9081 /* See if we have a constant small number of probes to generate. If so,
9082 that's the easy case. The run-time loop is made up of 11 insns in the
9083 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9084 for n # of intervals. */
9085 if (size <= 5 * PROBE_INTERVAL)
9087 HOST_WIDE_INT i, adjust;
9088 bool first_probe = true;
9090 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9091 values of N from 1 until it exceeds SIZE. If only one probe is
9092 needed, this will not generate any code. Then adjust and probe
9093 to PROBE_INTERVAL + SIZE. */
9094 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9098 adjust = 2 * PROBE_INTERVAL + dope;
9099 first_probe = false;
9102 adjust = PROBE_INTERVAL;
9104 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9105 plus_constant (stack_pointer_rtx, -adjust)));
9106 emit_stack_probe (stack_pointer_rtx);
9110 adjust = size + PROBE_INTERVAL + dope;
9112 adjust = size + PROBE_INTERVAL - i;
9114 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9115 plus_constant (stack_pointer_rtx, -adjust)));
9116 emit_stack_probe (stack_pointer_rtx);
9118 /* Adjust back to account for the additional first interval. */
9119 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9120 plus_constant (stack_pointer_rtx,
9121 PROBE_INTERVAL + dope)));
9124 /* Otherwise, do the same as above, but in a loop. Note that we must be
9125 extra careful with variables wrapping around because we might be at
9126 the very top (or the very bottom) of the address space and we have
9127 to be able to handle this case properly; in particular, we use an
9128 equality test for the loop condition. */
9131 HOST_WIDE_INT rounded_size;
9132 struct scratch_reg sr;
9134 get_scratch_register_on_entry (&sr);
9137 /* Step 1: round SIZE to the previous multiple of the interval. */
9139 rounded_size = size & -PROBE_INTERVAL;
9142 /* Step 2: compute initial and final value of the loop counter. */
9144 /* SP = SP_0 + PROBE_INTERVAL. */
9145 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9146 plus_constant (stack_pointer_rtx,
9147 - (PROBE_INTERVAL + dope))));
9149 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9150 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9151 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9152 gen_rtx_PLUS (Pmode, sr.reg,
9153 stack_pointer_rtx)));
9158 while (SP != LAST_ADDR)
9160 SP = SP + PROBE_INTERVAL
9164 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9165 values of N from 1 until it is equal to ROUNDED_SIZE. */
9167 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9170 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9171 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9173 if (size != rounded_size)
9175 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9176 plus_constant (stack_pointer_rtx,
9177 rounded_size - size)));
9178 emit_stack_probe (stack_pointer_rtx);
9181 /* Adjust back to account for the additional first interval. */
9182 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9183 plus_constant (stack_pointer_rtx,
9184 PROBE_INTERVAL + dope)));
9186 release_scratch_register_on_entry (&sr);
9189 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9190 cfun->machine->fs.sp_offset += size;
9192 /* Make sure nothing is scheduled before we are done. */
9193 emit_insn (gen_blockage ());
9196 /* Adjust the stack pointer up to REG while probing it. */
9199 output_adjust_stack_and_probe (rtx reg)
9201 static int labelno = 0;
9202 char loop_lab[32], end_lab[32];
9205 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9206 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9208 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9210 /* Jump to END_LAB if SP == LAST_ADDR. */
9211 xops[0] = stack_pointer_rtx;
9213 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9214 fputs ("\tje\t", asm_out_file);
9215 assemble_name_raw (asm_out_file, end_lab);
9216 fputc ('\n', asm_out_file);
9218 /* SP = SP + PROBE_INTERVAL. */
9219 xops[1] = GEN_INT (PROBE_INTERVAL);
9220 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9223 xops[1] = const0_rtx;
9224 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9226 fprintf (asm_out_file, "\tjmp\t");
9227 assemble_name_raw (asm_out_file, loop_lab);
9228 fputc ('\n', asm_out_file);
9230 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9235 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9236 inclusive. These are offsets from the current stack pointer. */
9239 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9241 /* See if we have a constant small number of probes to generate. If so,
9242 that's the easy case. The run-time loop is made up of 7 insns in the
9243 generic case while the compile-time loop is made up of n insns for n #
9245 if (size <= 7 * PROBE_INTERVAL)
9249 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9250 it exceeds SIZE. If only one probe is needed, this will not
9251 generate any code. Then probe at FIRST + SIZE. */
9252 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9253 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9255 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9258 /* Otherwise, do the same as above, but in a loop. Note that we must be
9259 extra careful with variables wrapping around because we might be at
9260 the very top (or the very bottom) of the address space and we have
9261 to be able to handle this case properly; in particular, we use an
9262 equality test for the loop condition. */
9265 HOST_WIDE_INT rounded_size, last;
9266 struct scratch_reg sr;
9268 get_scratch_register_on_entry (&sr);
9271 /* Step 1: round SIZE to the previous multiple of the interval. */
9273 rounded_size = size & -PROBE_INTERVAL;
9276 /* Step 2: compute initial and final value of the loop counter. */
9278 /* TEST_OFFSET = FIRST. */
9279 emit_move_insn (sr.reg, GEN_INT (-first));
9281 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9282 last = first + rounded_size;
9287 while (TEST_ADDR != LAST_ADDR)
9289 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9293 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9294 until it is equal to ROUNDED_SIZE. */
9296 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9299 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9300 that SIZE is equal to ROUNDED_SIZE. */
9302 if (size != rounded_size)
9303 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9306 rounded_size - size));
9308 release_scratch_register_on_entry (&sr);
9311 /* Make sure nothing is scheduled before we are done. */
9312 emit_insn (gen_blockage ());
9315 /* Probe a range of stack addresses from REG to END, inclusive. These are
9316 offsets from the current stack pointer. */
9319 output_probe_stack_range (rtx reg, rtx end)
9321 static int labelno = 0;
9322 char loop_lab[32], end_lab[32];
9325 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9326 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9328 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9330 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9333 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9334 fputs ("\tje\t", asm_out_file);
9335 assemble_name_raw (asm_out_file, end_lab);
9336 fputc ('\n', asm_out_file);
9338 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9339 xops[1] = GEN_INT (PROBE_INTERVAL);
9340 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9342 /* Probe at TEST_ADDR. */
9343 xops[0] = stack_pointer_rtx;
9345 xops[2] = const0_rtx;
9346 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9348 fprintf (asm_out_file, "\tjmp\t");
9349 assemble_name_raw (asm_out_file, loop_lab);
9350 fputc ('\n', asm_out_file);
9352 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9357 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9358 to be generated in correct form. */
9360 ix86_finalize_stack_realign_flags (void)
9362 /* Check if stack realign is really needed after reload, and
9363 stores result in cfun */
9364 unsigned int incoming_stack_boundary
9365 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9366 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9367 unsigned int stack_realign = (incoming_stack_boundary
9368 < (current_function_is_leaf
9369 ? crtl->max_used_stack_slot_alignment
9370 : crtl->stack_alignment_needed));
9372 if (crtl->stack_realign_finalized)
9374 /* After stack_realign_needed is finalized, we can't no longer
9376 gcc_assert (crtl->stack_realign_needed == stack_realign);
9380 crtl->stack_realign_needed = stack_realign;
9381 crtl->stack_realign_finalized = true;
9385 /* Expand the prologue into a bunch of separate insns. */
9388 ix86_expand_prologue (void)
9390 struct machine_function *m = cfun->machine;
9393 struct ix86_frame frame;
9394 HOST_WIDE_INT allocate;
9395 bool int_registers_saved;
9397 ix86_finalize_stack_realign_flags ();
9399 /* DRAP should not coexist with stack_realign_fp */
9400 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9402 memset (&m->fs, 0, sizeof (m->fs));
9404 /* Initialize CFA state for before the prologue. */
9405 m->fs.cfa_reg = stack_pointer_rtx;
9406 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9408 /* Track SP offset to the CFA. We continue tracking this after we've
9409 swapped the CFA register away from SP. In the case of re-alignment
9410 this is fudged; we're interested to offsets within the local frame. */
9411 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9412 m->fs.sp_valid = true;
9414 ix86_compute_frame_layout (&frame);
9416 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9418 /* We should have already generated an error for any use of
9419 ms_hook on a nested function. */
9420 gcc_checking_assert (!ix86_static_chain_on_stack);
9422 /* Check if profiling is active and we shall use profiling before
9423 prologue variant. If so sorry. */
9424 if (crtl->profile && flag_fentry != 0)
9425 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9427 /* In ix86_asm_output_function_label we emitted:
9428 8b ff movl.s %edi,%edi
9430 8b ec movl.s %esp,%ebp
9432 This matches the hookable function prologue in Win32 API
9433 functions in Microsoft Windows XP Service Pack 2 and newer.
9434 Wine uses this to enable Windows apps to hook the Win32 API
9435 functions provided by Wine.
9437 What that means is that we've already set up the frame pointer. */
9439 if (frame_pointer_needed
9440 && !(crtl->drap_reg && crtl->stack_realign_needed))
9444 /* We've decided to use the frame pointer already set up.
9445 Describe this to the unwinder by pretending that both
9446 push and mov insns happen right here.
9448 Putting the unwind info here at the end of the ms_hook
9449 is done so that we can make absolutely certain we get
9450 the required byte sequence at the start of the function,
9451 rather than relying on an assembler that can produce
9452 the exact encoding required.
9454 However it does mean (in the unpatched case) that we have
9455 a 1 insn window where the asynchronous unwind info is
9456 incorrect. However, if we placed the unwind info at
9457 its correct location we would have incorrect unwind info
9458 in the patched case. Which is probably all moot since
9459 I don't expect Wine generates dwarf2 unwind info for the
9460 system libraries that use this feature. */
9462 insn = emit_insn (gen_blockage ());
9464 push = gen_push (hard_frame_pointer_rtx);
9465 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9467 RTX_FRAME_RELATED_P (push) = 1;
9468 RTX_FRAME_RELATED_P (mov) = 1;
9470 RTX_FRAME_RELATED_P (insn) = 1;
9471 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9472 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9474 /* Note that gen_push incremented m->fs.cfa_offset, even
9475 though we didn't emit the push insn here. */
9476 m->fs.cfa_reg = hard_frame_pointer_rtx;
9477 m->fs.fp_offset = m->fs.cfa_offset;
9478 m->fs.fp_valid = true;
9482 /* The frame pointer is not needed so pop %ebp again.
9483 This leaves us with a pristine state. */
9484 emit_insn (gen_pop (hard_frame_pointer_rtx));
9488 /* The first insn of a function that accepts its static chain on the
9489 stack is to push the register that would be filled in by a direct
9490 call. This insn will be skipped by the trampoline. */
9491 else if (ix86_static_chain_on_stack)
9493 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9494 emit_insn (gen_blockage ());
9496 /* We don't want to interpret this push insn as a register save,
9497 only as a stack adjustment. The real copy of the register as
9498 a save will be done later, if needed. */
9499 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9500 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9501 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9502 RTX_FRAME_RELATED_P (insn) = 1;
9505 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9506 of DRAP is needed and stack realignment is really needed after reload */
9507 if (stack_realign_drap)
9509 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9511 /* Only need to push parameter pointer reg if it is caller saved. */
9512 if (!call_used_regs[REGNO (crtl->drap_reg)])
9514 /* Push arg pointer reg */
9515 insn = emit_insn (gen_push (crtl->drap_reg));
9516 RTX_FRAME_RELATED_P (insn) = 1;
9519 /* Grab the argument pointer. */
9520 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9521 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9522 RTX_FRAME_RELATED_P (insn) = 1;
9523 m->fs.cfa_reg = crtl->drap_reg;
9524 m->fs.cfa_offset = 0;
9526 /* Align the stack. */
9527 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9529 GEN_INT (-align_bytes)));
9530 RTX_FRAME_RELATED_P (insn) = 1;
9532 /* Replicate the return address on the stack so that return
9533 address can be reached via (argp - 1) slot. This is needed
9534 to implement macro RETURN_ADDR_RTX and intrinsic function
9535 expand_builtin_return_addr etc. */
9536 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9537 t = gen_frame_mem (Pmode, t);
9538 insn = emit_insn (gen_push (t));
9539 RTX_FRAME_RELATED_P (insn) = 1;
9541 /* For the purposes of frame and register save area addressing,
9542 we've started over with a new frame. */
9543 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9544 m->fs.realigned = true;
9547 if (frame_pointer_needed && !m->fs.fp_valid)
9549 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9550 slower on all targets. Also sdb doesn't like it. */
9551 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9552 RTX_FRAME_RELATED_P (insn) = 1;
9554 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9555 RTX_FRAME_RELATED_P (insn) = 1;
9557 if (m->fs.cfa_reg == stack_pointer_rtx)
9558 m->fs.cfa_reg = hard_frame_pointer_rtx;
9559 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9560 m->fs.fp_offset = m->fs.sp_offset;
9561 m->fs.fp_valid = true;
9564 int_registers_saved = (frame.nregs == 0);
9566 if (!int_registers_saved)
9568 /* If saving registers via PUSH, do so now. */
9569 if (!frame.save_regs_using_mov)
9571 ix86_emit_save_regs ();
9572 int_registers_saved = true;
9573 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9576 /* When using red zone we may start register saving before allocating
9577 the stack frame saving one cycle of the prologue. However, avoid
9578 doing this if we have to probe the stack; at least on x86_64 the
9579 stack probe can turn into a call that clobbers a red zone location. */
9580 else if (ix86_using_red_zone ()
9581 && (! TARGET_STACK_PROBE
9582 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9584 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9585 int_registers_saved = true;
9589 if (stack_realign_fp)
9591 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9592 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9594 /* The computation of the size of the re-aligned stack frame means
9595 that we must allocate the size of the register save area before
9596 performing the actual alignment. Otherwise we cannot guarantee
9597 that there's enough storage above the realignment point. */
9598 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9599 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9600 GEN_INT (m->fs.sp_offset
9601 - frame.sse_reg_save_offset),
9604 /* Align the stack. */
9605 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9607 GEN_INT (-align_bytes)));
9609 /* For the purposes of register save area addressing, the stack
9610 pointer is no longer valid. As for the value of sp_offset,
9611 see ix86_compute_frame_layout, which we need to match in order
9612 to pass verification of stack_pointer_offset at the end. */
9613 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9614 m->fs.sp_valid = false;
9617 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9619 if (flag_stack_usage)
9621 /* We start to count from ARG_POINTER. */
9622 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9624 /* If it was realigned, take into account the fake frame. */
9625 if (stack_realign_drap)
9627 if (ix86_static_chain_on_stack)
9628 stack_size += UNITS_PER_WORD;
9630 if (!call_used_regs[REGNO (crtl->drap_reg)])
9631 stack_size += UNITS_PER_WORD;
9633 /* This over-estimates by 1 minimal-stack-alignment-unit but
9634 mitigates that by counting in the new return address slot. */
9635 current_function_dynamic_stack_size
9636 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9639 current_function_static_stack_size = stack_size;
9642 /* The stack has already been decremented by the instruction calling us
9643 so we need to probe unconditionally to preserve the protection area. */
9644 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9646 /* We expect the registers to be saved when probes are used. */
9647 gcc_assert (int_registers_saved);
9649 if (STACK_CHECK_MOVING_SP)
9651 ix86_adjust_stack_and_probe (allocate);
9656 HOST_WIDE_INT size = allocate;
9658 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9659 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9661 if (TARGET_STACK_PROBE)
9662 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9664 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9670 else if (!ix86_target_stack_probe ()
9671 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9673 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9674 GEN_INT (-allocate), -1,
9675 m->fs.cfa_reg == stack_pointer_rtx);
9679 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9682 if (cfun->machine->call_abi == MS_ABI)
9685 eax_live = ix86_eax_live_at_start_p ();
9689 emit_insn (gen_push (eax));
9690 allocate -= UNITS_PER_WORD;
9693 emit_move_insn (eax, GEN_INT (allocate));
9695 insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9697 if (m->fs.cfa_reg == stack_pointer_rtx)
9699 m->fs.cfa_offset += allocate;
9700 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
9701 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9702 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9703 RTX_FRAME_RELATED_P (insn) = 1;
9705 m->fs.sp_offset += allocate;
9709 t = choose_baseaddr (m->fs.sp_offset - allocate);
9710 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9713 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9715 if (!int_registers_saved)
9716 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9718 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9720 pic_reg_used = false;
9721 if (pic_offset_table_rtx
9722 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9725 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9727 if (alt_pic_reg_used != INVALID_REGNUM)
9728 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9730 pic_reg_used = true;
9737 if (ix86_cmodel == CM_LARGE_PIC)
9739 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9740 rtx label = gen_label_rtx ();
9742 LABEL_PRESERVE_P (label) = 1;
9743 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9744 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9745 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9746 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9747 pic_offset_table_rtx, tmp_reg));
9750 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9753 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9756 /* In the pic_reg_used case, make sure that the got load isn't deleted
9757 when mcount needs it. Blockage to avoid call movement across mcount
9758 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9760 if (crtl->profile && !flag_fentry && pic_reg_used)
9761 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9763 if (crtl->drap_reg && !crtl->stack_realign_needed)
9765 /* vDRAP is setup but after reload it turns out stack realign
9766 isn't necessary, here we will emit prologue to setup DRAP
9767 without stack realign adjustment */
9768 t = choose_baseaddr (0);
9769 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9772 /* Prevent instructions from being scheduled into register save push
9773 sequence when access to the redzone area is done through frame pointer.
9774 The offset between the frame pointer and the stack pointer is calculated
9775 relative to the value of the stack pointer at the end of the function
9776 prologue, and moving instructions that access redzone area via frame
9777 pointer inside push sequence violates this assumption. */
9778 if (frame_pointer_needed && frame.red_zone_size)
9779 emit_insn (gen_memory_blockage ());
9781 /* Emit cld instruction if stringops are used in the function. */
9782 if (TARGET_CLD && ix86_current_function_needs_cld)
9783 emit_insn (gen_cld ());
9786 /* Emit code to restore REG using a POP insn. */
9789 ix86_emit_restore_reg_using_pop (rtx reg)
9791 struct machine_function *m = cfun->machine;
9792 rtx insn = emit_insn (gen_pop (reg));
9794 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9795 m->fs.sp_offset -= UNITS_PER_WORD;
9797 if (m->fs.cfa_reg == crtl->drap_reg
9798 && REGNO (reg) == REGNO (crtl->drap_reg))
9800 /* Previously we'd represented the CFA as an expression
9801 like *(%ebp - 8). We've just popped that value from
9802 the stack, which means we need to reset the CFA to
9803 the drap register. This will remain until we restore
9804 the stack pointer. */
9805 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9806 RTX_FRAME_RELATED_P (insn) = 1;
9808 /* This means that the DRAP register is valid for addressing too. */
9809 m->fs.drap_valid = true;
9813 if (m->fs.cfa_reg == stack_pointer_rtx)
9815 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9816 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9817 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9818 RTX_FRAME_RELATED_P (insn) = 1;
9820 m->fs.cfa_offset -= UNITS_PER_WORD;
9823 /* When the frame pointer is the CFA, and we pop it, we are
9824 swapping back to the stack pointer as the CFA. This happens
9825 for stack frames that don't allocate other data, so we assume
9826 the stack pointer is now pointing at the return address, i.e.
9827 the function entry state, which makes the offset be 1 word. */
9828 if (reg == hard_frame_pointer_rtx)
9830 m->fs.fp_valid = false;
9831 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9833 m->fs.cfa_reg = stack_pointer_rtx;
9834 m->fs.cfa_offset -= UNITS_PER_WORD;
9836 add_reg_note (insn, REG_CFA_DEF_CFA,
9837 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9838 GEN_INT (m->fs.cfa_offset)));
9839 RTX_FRAME_RELATED_P (insn) = 1;
9844 /* Emit code to restore saved registers using POP insns. */
9847 ix86_emit_restore_regs_using_pop (void)
9851 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9852 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9853 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9856 /* Emit code and notes for the LEAVE instruction. */
9859 ix86_emit_leave (void)
9861 struct machine_function *m = cfun->machine;
9862 rtx insn = emit_insn (ix86_gen_leave ());
9864 ix86_add_queued_cfa_restore_notes (insn);
9866 gcc_assert (m->fs.fp_valid);
9867 m->fs.sp_valid = true;
9868 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9869 m->fs.fp_valid = false;
9871 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9873 m->fs.cfa_reg = stack_pointer_rtx;
9874 m->fs.cfa_offset = m->fs.sp_offset;
9876 add_reg_note (insn, REG_CFA_DEF_CFA,
9877 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
9878 RTX_FRAME_RELATED_P (insn) = 1;
9879 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9884 /* Emit code to restore saved registers using MOV insns.
9885 First register is restored from CFA - CFA_OFFSET. */
9887 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9888 int maybe_eh_return)
9890 struct machine_function *m = cfun->machine;
9893 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9894 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9896 rtx reg = gen_rtx_REG (Pmode, regno);
9899 mem = choose_baseaddr (cfa_offset);
9900 mem = gen_frame_mem (Pmode, mem);
9901 insn = emit_move_insn (reg, mem);
9903 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9905 /* Previously we'd represented the CFA as an expression
9906 like *(%ebp - 8). We've just popped that value from
9907 the stack, which means we need to reset the CFA to
9908 the drap register. This will remain until we restore
9909 the stack pointer. */
9910 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9911 RTX_FRAME_RELATED_P (insn) = 1;
9913 /* This means that the DRAP register is valid for addressing. */
9914 m->fs.drap_valid = true;
9917 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
9919 cfa_offset -= UNITS_PER_WORD;
9923 /* Emit code to restore saved registers using MOV insns.
9924 First register is restored from CFA - CFA_OFFSET. */
9926 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9927 int maybe_eh_return)
9931 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9932 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9934 rtx reg = gen_rtx_REG (V4SFmode, regno);
9937 mem = choose_baseaddr (cfa_offset);
9938 mem = gen_rtx_MEM (V4SFmode, mem);
9939 set_mem_align (mem, 128);
9940 emit_move_insn (reg, mem);
9942 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
9948 /* Restore function stack, frame, and registers. */
9951 ix86_expand_epilogue (int style)
9953 struct machine_function *m = cfun->machine;
9954 struct machine_frame_state frame_state_save = m->fs;
9955 struct ix86_frame frame;
9956 bool restore_regs_via_mov;
9959 ix86_finalize_stack_realign_flags ();
9960 ix86_compute_frame_layout (&frame);
9962 m->fs.sp_valid = (!frame_pointer_needed
9963 || (current_function_sp_is_unchanging
9964 && !stack_realign_fp));
9965 gcc_assert (!m->fs.sp_valid
9966 || m->fs.sp_offset == frame.stack_pointer_offset);
9968 /* The FP must be valid if the frame pointer is present. */
9969 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9970 gcc_assert (!m->fs.fp_valid
9971 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9973 /* We must have *some* valid pointer to the stack frame. */
9974 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9976 /* The DRAP is never valid at this point. */
9977 gcc_assert (!m->fs.drap_valid);
9979 /* See the comment about red zone and frame
9980 pointer usage in ix86_expand_prologue. */
9981 if (frame_pointer_needed && frame.red_zone_size)
9982 emit_insn (gen_memory_blockage ());
9984 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9985 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
9987 /* Determine the CFA offset of the end of the red-zone. */
9988 m->fs.red_zone_offset = 0;
9989 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
9991 /* The red-zone begins below the return address. */
9992 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
9994 /* When the register save area is in the aligned portion of
9995 the stack, determine the maximum runtime displacement that
9996 matches up with the aligned frame. */
9997 if (stack_realign_drap)
9998 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10002 /* Special care must be taken for the normal return case of a function
10003 using eh_return: the eax and edx registers are marked as saved, but
10004 not restored along this path. Adjust the save location to match. */
10005 if (crtl->calls_eh_return && style != 2)
10006 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10008 /* If we're only restoring one register and sp is not valid then
10009 using a move instruction to restore the register since it's
10010 less work than reloading sp and popping the register. */
10011 if (!m->fs.sp_valid && frame.nregs <= 1)
10012 restore_regs_via_mov = true;
10013 /* EH_RETURN requires the use of moves to function properly. */
10014 else if (crtl->calls_eh_return)
10015 restore_regs_via_mov = true;
10016 else if (TARGET_EPILOGUE_USING_MOVE
10017 && cfun->machine->use_fast_prologue_epilogue
10018 && (frame.nregs > 1
10019 || m->fs.sp_offset != frame.reg_save_offset))
10020 restore_regs_via_mov = true;
10021 else if (frame_pointer_needed
10023 && m->fs.sp_offset != frame.reg_save_offset)
10024 restore_regs_via_mov = true;
10025 else if (frame_pointer_needed
10026 && TARGET_USE_LEAVE
10027 && cfun->machine->use_fast_prologue_epilogue
10028 && frame.nregs == 1)
10029 restore_regs_via_mov = true;
10031 restore_regs_via_mov = false;
10033 if (restore_regs_via_mov || frame.nsseregs)
10035 /* Ensure that the entire register save area is addressable via
10036 the stack pointer, if we will restore via sp. */
10038 && m->fs.sp_offset > 0x7fffffff
10039 && !(m->fs.fp_valid || m->fs.drap_valid)
10040 && (frame.nsseregs + frame.nregs) != 0)
10042 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10043 GEN_INT (m->fs.sp_offset
10044 - frame.sse_reg_save_offset),
10046 m->fs.cfa_reg == stack_pointer_rtx);
10050 /* If there are any SSE registers to restore, then we have to do it
10051 via moves, since there's obviously no pop for SSE regs. */
10052 if (frame.nsseregs)
10053 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10056 if (restore_regs_via_mov)
10061 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10063 /* eh_return epilogues need %ecx added to the stack pointer. */
10066 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10068 /* Stack align doesn't work with eh_return. */
10069 gcc_assert (!stack_realign_drap);
10070 /* Neither does regparm nested functions. */
10071 gcc_assert (!ix86_static_chain_on_stack);
10073 if (frame_pointer_needed)
10075 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10076 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10077 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10079 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10080 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10082 /* Note that we use SA as a temporary CFA, as the return
10083 address is at the proper place relative to it. We
10084 pretend this happens at the FP restore insn because
10085 prior to this insn the FP would be stored at the wrong
10086 offset relative to SA, and after this insn we have no
10087 other reasonable register to use for the CFA. We don't
10088 bother resetting the CFA to the SP for the duration of
10089 the return insn. */
10090 add_reg_note (insn, REG_CFA_DEF_CFA,
10091 plus_constant (sa, UNITS_PER_WORD));
10092 ix86_add_queued_cfa_restore_notes (insn);
10093 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10094 RTX_FRAME_RELATED_P (insn) = 1;
10096 m->fs.cfa_reg = sa;
10097 m->fs.cfa_offset = UNITS_PER_WORD;
10098 m->fs.fp_valid = false;
10100 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10101 const0_rtx, style, false);
10105 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10106 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10107 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10108 ix86_add_queued_cfa_restore_notes (insn);
10110 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10111 if (m->fs.cfa_offset != UNITS_PER_WORD)
10113 m->fs.cfa_offset = UNITS_PER_WORD;
10114 add_reg_note (insn, REG_CFA_DEF_CFA,
10115 plus_constant (stack_pointer_rtx,
10117 RTX_FRAME_RELATED_P (insn) = 1;
10120 m->fs.sp_offset = UNITS_PER_WORD;
10121 m->fs.sp_valid = true;
10126 /* First step is to deallocate the stack frame so that we can
10127 pop the registers. */
10128 if (!m->fs.sp_valid)
10130 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10131 GEN_INT (m->fs.fp_offset
10132 - frame.reg_save_offset),
10135 else if (m->fs.sp_offset != frame.reg_save_offset)
10137 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10138 GEN_INT (m->fs.sp_offset
10139 - frame.reg_save_offset),
10141 m->fs.cfa_reg == stack_pointer_rtx);
10144 ix86_emit_restore_regs_using_pop ();
10147 /* If we used a stack pointer and haven't already got rid of it,
10149 if (m->fs.fp_valid)
10151 /* If the stack pointer is valid and pointing at the frame
10152 pointer store address, then we only need a pop. */
10153 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10154 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10155 /* Leave results in shorter dependency chains on CPUs that are
10156 able to grok it fast. */
10157 else if (TARGET_USE_LEAVE
10158 || optimize_function_for_size_p (cfun)
10159 || !cfun->machine->use_fast_prologue_epilogue)
10160 ix86_emit_leave ();
10163 pro_epilogue_adjust_stack (stack_pointer_rtx,
10164 hard_frame_pointer_rtx,
10165 const0_rtx, style, !using_drap);
10166 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10172 int param_ptr_offset = UNITS_PER_WORD;
10175 gcc_assert (stack_realign_drap);
10177 if (ix86_static_chain_on_stack)
10178 param_ptr_offset += UNITS_PER_WORD;
10179 if (!call_used_regs[REGNO (crtl->drap_reg)])
10180 param_ptr_offset += UNITS_PER_WORD;
10182 insn = emit_insn (gen_rtx_SET
10183 (VOIDmode, stack_pointer_rtx,
10184 gen_rtx_PLUS (Pmode,
10186 GEN_INT (-param_ptr_offset))));
10187 m->fs.cfa_reg = stack_pointer_rtx;
10188 m->fs.cfa_offset = param_ptr_offset;
10189 m->fs.sp_offset = param_ptr_offset;
10190 m->fs.realigned = false;
10192 add_reg_note (insn, REG_CFA_DEF_CFA,
10193 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10194 GEN_INT (param_ptr_offset)));
10195 RTX_FRAME_RELATED_P (insn) = 1;
10197 if (!call_used_regs[REGNO (crtl->drap_reg)])
10198 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10201 /* At this point the stack pointer must be valid, and we must have
10202 restored all of the registers. We may not have deallocated the
10203 entire stack frame. We've delayed this until now because it may
10204 be possible to merge the local stack deallocation with the
10205 deallocation forced by ix86_static_chain_on_stack. */
10206 gcc_assert (m->fs.sp_valid);
10207 gcc_assert (!m->fs.fp_valid);
10208 gcc_assert (!m->fs.realigned);
10209 if (m->fs.sp_offset != UNITS_PER_WORD)
10211 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10212 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10216 /* Sibcall epilogues don't want a return instruction. */
10219 m->fs = frame_state_save;
10223 if (crtl->args.pops_args && crtl->args.size)
10225 rtx popc = GEN_INT (crtl->args.pops_args);
10227 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10228 address, do explicit add, and jump indirectly to the caller. */
10230 if (crtl->args.pops_args >= 65536)
10232 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10235 /* There is no "pascal" calling convention in any 64bit ABI. */
10236 gcc_assert (!TARGET_64BIT);
10238 insn = emit_insn (gen_pop (ecx));
10239 m->fs.cfa_offset -= UNITS_PER_WORD;
10240 m->fs.sp_offset -= UNITS_PER_WORD;
10242 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10243 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10244 add_reg_note (insn, REG_CFA_REGISTER,
10245 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10246 RTX_FRAME_RELATED_P (insn) = 1;
10248 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10250 emit_jump_insn (gen_return_indirect_internal (ecx));
10253 emit_jump_insn (gen_return_pop_internal (popc));
10256 emit_jump_insn (gen_return_internal ());
10258 /* Restore the state back to the state from the prologue,
10259 so that it's correct for the next epilogue. */
10260 m->fs = frame_state_save;
10263 /* Reset from the function's potential modifications. */
10266 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10267 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10269 if (pic_offset_table_rtx)
10270 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10272 /* Mach-O doesn't support labels at the end of objects, so if
10273 it looks like we might want one, insert a NOP. */
10275 rtx insn = get_last_insn ();
10278 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10279 insn = PREV_INSN (insn);
10283 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10284 fputs ("\tnop\n", file);
10290 /* Extract the parts of an RTL expression that is a valid memory address
10291 for an instruction. Return 0 if the structure of the address is
10292 grossly off. Return -1 if the address contains ASHIFT, so it is not
10293 strictly valid, but still used for computing length of lea instruction. */
10296 ix86_decompose_address (rtx addr, struct ix86_address *out)
10298 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10299 rtx base_reg, index_reg;
10300 HOST_WIDE_INT scale = 1;
10301 rtx scale_rtx = NULL_RTX;
10304 enum ix86_address_seg seg = SEG_DEFAULT;
10306 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10308 else if (GET_CODE (addr) == PLUS)
10310 rtx addends[4], op;
10318 addends[n++] = XEXP (op, 1);
10321 while (GET_CODE (op) == PLUS);
10326 for (i = n; i >= 0; --i)
10329 switch (GET_CODE (op))
10334 index = XEXP (op, 0);
10335 scale_rtx = XEXP (op, 1);
10341 index = XEXP (op, 0);
10342 tmp = XEXP (op, 1);
10343 if (!CONST_INT_P (tmp))
10345 scale = INTVAL (tmp);
10346 if ((unsigned HOST_WIDE_INT) scale > 3)
10348 scale = 1 << scale;
10352 if (XINT (op, 1) == UNSPEC_TP
10353 && TARGET_TLS_DIRECT_SEG_REFS
10354 && seg == SEG_DEFAULT)
10355 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10384 else if (GET_CODE (addr) == MULT)
10386 index = XEXP (addr, 0); /* index*scale */
10387 scale_rtx = XEXP (addr, 1);
10389 else if (GET_CODE (addr) == ASHIFT)
10391 /* We're called for lea too, which implements ashift on occasion. */
10392 index = XEXP (addr, 0);
10393 tmp = XEXP (addr, 1);
10394 if (!CONST_INT_P (tmp))
10396 scale = INTVAL (tmp);
10397 if ((unsigned HOST_WIDE_INT) scale > 3)
10399 scale = 1 << scale;
10403 disp = addr; /* displacement */
10405 /* Extract the integral value of scale. */
10408 if (!CONST_INT_P (scale_rtx))
10410 scale = INTVAL (scale_rtx);
10413 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10414 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10416 /* Avoid useless 0 displacement. */
10417 if (disp == const0_rtx && (base || index))
10420 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10421 if (base_reg && index_reg && scale == 1
10422 && (index_reg == arg_pointer_rtx
10423 || index_reg == frame_pointer_rtx
10424 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10427 tmp = base, base = index, index = tmp;
10428 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10431 /* Special case: %ebp cannot be encoded as a base without a displacement.
10435 && (base_reg == hard_frame_pointer_rtx
10436 || base_reg == frame_pointer_rtx
10437 || base_reg == arg_pointer_rtx
10438 || (REG_P (base_reg)
10439 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10440 || REGNO (base_reg) == R13_REG))))
10443 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10444 Avoid this by transforming to [%esi+0].
10445 Reload calls address legitimization without cfun defined, so we need
10446 to test cfun for being non-NULL. */
10447 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10448 && base_reg && !index_reg && !disp
10449 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10452 /* Special case: encode reg+reg instead of reg*2. */
10453 if (!base && index && scale == 2)
10454 base = index, base_reg = index_reg, scale = 1;
10456 /* Special case: scaling cannot be encoded without base or displacement. */
10457 if (!base && !disp && index && scale != 1)
10461 out->index = index;
10463 out->scale = scale;
10469 /* Return cost of the memory address x.
10470 For i386, it is better to use a complex address than let gcc copy
10471 the address into a reg and make a new pseudo. But not if the address
10472 requires to two regs - that would mean more pseudos with longer
10475 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10477 struct ix86_address parts;
10479 int ok = ix86_decompose_address (x, &parts);
10483 if (parts.base && GET_CODE (parts.base) == SUBREG)
10484 parts.base = SUBREG_REG (parts.base);
10485 if (parts.index && GET_CODE (parts.index) == SUBREG)
10486 parts.index = SUBREG_REG (parts.index);
10488 /* Attempt to minimize number of registers in the address. */
10490 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10492 && (!REG_P (parts.index)
10493 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10497 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10499 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10500 && parts.base != parts.index)
10503 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10504 since it's predecode logic can't detect the length of instructions
10505 and it degenerates to vector decoded. Increase cost of such
10506 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10507 to split such addresses or even refuse such addresses at all.
10509 Following addressing modes are affected:
10514 The first and last case may be avoidable by explicitly coding the zero in
10515 memory address, but I don't have AMD-K6 machine handy to check this
10519 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10520 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10521 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10527 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10528 this is used for to form addresses to local data when -fPIC is in
10532 darwin_local_data_pic (rtx disp)
10534 return (GET_CODE (disp) == UNSPEC
10535 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10538 /* Determine if a given RTX is a valid constant. We already know this
10539 satisfies CONSTANT_P. */
10542 legitimate_constant_p (rtx x)
10544 switch (GET_CODE (x))
10549 if (GET_CODE (x) == PLUS)
10551 if (!CONST_INT_P (XEXP (x, 1)))
10556 if (TARGET_MACHO && darwin_local_data_pic (x))
10559 /* Only some unspecs are valid as "constants". */
10560 if (GET_CODE (x) == UNSPEC)
10561 switch (XINT (x, 1))
10564 case UNSPEC_GOTOFF:
10565 case UNSPEC_PLTOFF:
10566 return TARGET_64BIT;
10568 case UNSPEC_NTPOFF:
10569 x = XVECEXP (x, 0, 0);
10570 return (GET_CODE (x) == SYMBOL_REF
10571 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10572 case UNSPEC_DTPOFF:
10573 x = XVECEXP (x, 0, 0);
10574 return (GET_CODE (x) == SYMBOL_REF
10575 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10580 /* We must have drilled down to a symbol. */
10581 if (GET_CODE (x) == LABEL_REF)
10583 if (GET_CODE (x) != SYMBOL_REF)
10588 /* TLS symbols are never valid. */
10589 if (SYMBOL_REF_TLS_MODEL (x))
10592 /* DLLIMPORT symbols are never valid. */
10593 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10594 && SYMBOL_REF_DLLIMPORT_P (x))
10599 if (GET_MODE (x) == TImode
10600 && x != CONST0_RTX (TImode)
10606 if (!standard_sse_constant_p (x))
10613 /* Otherwise we handle everything else in the move patterns. */
10617 /* Determine if it's legal to put X into the constant pool. This
10618 is not possible for the address of thread-local symbols, which
10619 is checked above. */
10622 ix86_cannot_force_const_mem (rtx x)
10624 /* We can always put integral constants and vectors in memory. */
10625 switch (GET_CODE (x))
10635 return !legitimate_constant_p (x);
10639 /* Nonzero if the constant value X is a legitimate general operand
10640 when generating PIC code. It is given that flag_pic is on and
10641 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
10644 legitimate_pic_operand_p (rtx x)
10648 switch (GET_CODE (x))
10651 inner = XEXP (x, 0);
10652 if (GET_CODE (inner) == PLUS
10653 && CONST_INT_P (XEXP (inner, 1)))
10654 inner = XEXP (inner, 0);
10656 /* Only some unspecs are valid as "constants". */
10657 if (GET_CODE (inner) == UNSPEC)
10658 switch (XINT (inner, 1))
10661 case UNSPEC_GOTOFF:
10662 case UNSPEC_PLTOFF:
10663 return TARGET_64BIT;
10665 x = XVECEXP (inner, 0, 0);
10666 return (GET_CODE (x) == SYMBOL_REF
10667 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10668 case UNSPEC_MACHOPIC_OFFSET:
10669 return legitimate_pic_address_disp_p (x);
10677 return legitimate_pic_address_disp_p (x);
10684 /* Determine if a given CONST RTX is a valid memory displacement
10688 legitimate_pic_address_disp_p (rtx disp)
10692 /* In 64bit mode we can allow direct addresses of symbols and labels
10693 when they are not dynamic symbols. */
10696 rtx op0 = disp, op1;
10698 switch (GET_CODE (disp))
10704 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10706 op0 = XEXP (XEXP (disp, 0), 0);
10707 op1 = XEXP (XEXP (disp, 0), 1);
10708 if (!CONST_INT_P (op1)
10709 || INTVAL (op1) >= 16*1024*1024
10710 || INTVAL (op1) < -16*1024*1024)
10712 if (GET_CODE (op0) == LABEL_REF)
10714 if (GET_CODE (op0) != SYMBOL_REF)
10719 /* TLS references should always be enclosed in UNSPEC. */
10720 if (SYMBOL_REF_TLS_MODEL (op0))
10722 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
10723 && ix86_cmodel != CM_LARGE_PIC)
10731 if (GET_CODE (disp) != CONST)
10733 disp = XEXP (disp, 0);
10737 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10738 of GOT tables. We should not need these anyway. */
10739 if (GET_CODE (disp) != UNSPEC
10740 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10741 && XINT (disp, 1) != UNSPEC_GOTOFF
10742 && XINT (disp, 1) != UNSPEC_PLTOFF))
10745 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10746 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10752 if (GET_CODE (disp) == PLUS)
10754 if (!CONST_INT_P (XEXP (disp, 1)))
10756 disp = XEXP (disp, 0);
10760 if (TARGET_MACHO && darwin_local_data_pic (disp))
10763 if (GET_CODE (disp) != UNSPEC)
10766 switch (XINT (disp, 1))
10771 /* We need to check for both symbols and labels because VxWorks loads
10772 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10774 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10775 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10776 case UNSPEC_GOTOFF:
10777 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10778 While ABI specify also 32bit relocation but we don't produce it in
10779 small PIC model at all. */
10780 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10781 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10783 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10785 case UNSPEC_GOTTPOFF:
10786 case UNSPEC_GOTNTPOFF:
10787 case UNSPEC_INDNTPOFF:
10790 disp = XVECEXP (disp, 0, 0);
10791 return (GET_CODE (disp) == SYMBOL_REF
10792 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10793 case UNSPEC_NTPOFF:
10794 disp = XVECEXP (disp, 0, 0);
10795 return (GET_CODE (disp) == SYMBOL_REF
10796 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10797 case UNSPEC_DTPOFF:
10798 disp = XVECEXP (disp, 0, 0);
10799 return (GET_CODE (disp) == SYMBOL_REF
10800 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10806 /* Recognizes RTL expressions that are valid memory addresses for an
10807 instruction. The MODE argument is the machine mode for the MEM
10808 expression that wants to use this address.
10810 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10811 convert common non-canonical forms to canonical form so that they will
10815 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
10816 rtx addr, bool strict)
10818 struct ix86_address parts;
10819 rtx base, index, disp;
10820 HOST_WIDE_INT scale;
10822 if (ix86_decompose_address (addr, &parts) <= 0)
10823 /* Decomposition failed. */
10827 index = parts.index;
10829 scale = parts.scale;
10831 /* Validate base register.
10833 Don't allow SUBREG's that span more than a word here. It can lead to spill
10834 failures when the base is one word out of a two word structure, which is
10835 represented internally as a DImode int. */
10843 else if (GET_CODE (base) == SUBREG
10844 && REG_P (SUBREG_REG (base))
10845 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
10847 reg = SUBREG_REG (base);
10849 /* Base is not a register. */
10852 if (GET_MODE (base) != Pmode)
10853 /* Base is not in Pmode. */
10856 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10857 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10858 /* Base is not valid. */
10862 /* Validate index register.
10864 Don't allow SUBREG's that span more than a word here -- same as above. */
10872 else if (GET_CODE (index) == SUBREG
10873 && REG_P (SUBREG_REG (index))
10874 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
10876 reg = SUBREG_REG (index);
10878 /* Index is not a register. */
10881 if (GET_MODE (index) != Pmode)
10882 /* Index is not in Pmode. */
10885 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10886 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10887 /* Index is not valid. */
10891 /* Validate scale factor. */
10895 /* Scale without index. */
10898 if (scale != 2 && scale != 4 && scale != 8)
10899 /* Scale is not a valid multiplier. */
10903 /* Validate displacement. */
10906 if (GET_CODE (disp) == CONST
10907 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10908 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10909 switch (XINT (XEXP (disp, 0), 1))
10911 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
10912 used. While ABI specify also 32bit relocations, we don't produce
10913 them at all and use IP relative instead. */
10915 case UNSPEC_GOTOFF:
10916 gcc_assert (flag_pic);
10918 goto is_legitimate_pic;
10920 /* 64bit address unspec. */
10923 case UNSPEC_GOTPCREL:
10924 gcc_assert (flag_pic);
10925 goto is_legitimate_pic;
10927 case UNSPEC_GOTTPOFF:
10928 case UNSPEC_GOTNTPOFF:
10929 case UNSPEC_INDNTPOFF:
10930 case UNSPEC_NTPOFF:
10931 case UNSPEC_DTPOFF:
10935 /* Invalid address unspec. */
10939 else if (SYMBOLIC_CONST (disp)
10943 && MACHOPIC_INDIRECT
10944 && !machopic_operand_p (disp)
10950 if (TARGET_64BIT && (index || base))
10952 /* foo@dtpoff(%rX) is ok. */
10953 if (GET_CODE (disp) != CONST
10954 || GET_CODE (XEXP (disp, 0)) != PLUS
10955 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10956 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10957 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10958 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10959 /* Non-constant pic memory reference. */
10962 else if (! legitimate_pic_address_disp_p (disp))
10963 /* Displacement is an invalid pic construct. */
10966 /* This code used to verify that a symbolic pic displacement
10967 includes the pic_offset_table_rtx register.
10969 While this is good idea, unfortunately these constructs may
10970 be created by "adds using lea" optimization for incorrect
10979 This code is nonsensical, but results in addressing
10980 GOT table with pic_offset_table_rtx base. We can't
10981 just refuse it easily, since it gets matched by
10982 "addsi3" pattern, that later gets split to lea in the
10983 case output register differs from input. While this
10984 can be handled by separate addsi pattern for this case
10985 that never results in lea, this seems to be easier and
10986 correct fix for crash to disable this test. */
10988 else if (GET_CODE (disp) != LABEL_REF
10989 && !CONST_INT_P (disp)
10990 && (GET_CODE (disp) != CONST
10991 || !legitimate_constant_p (disp))
10992 && (GET_CODE (disp) != SYMBOL_REF
10993 || !legitimate_constant_p (disp)))
10994 /* Displacement is not constant. */
10996 else if (TARGET_64BIT
10997 && !x86_64_immediate_operand (disp, VOIDmode))
10998 /* Displacement is out of range. */
11002 /* Everything looks valid. */
11006 /* Determine if a given RTX is a valid constant address. */
11009 constant_address_p (rtx x)
11011 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11014 /* Return a unique alias set for the GOT. */
11016 static alias_set_type
11017 ix86_GOT_alias_set (void)
11019 static alias_set_type set = -1;
11021 set = new_alias_set ();
11025 /* Return a legitimate reference for ORIG (an address) using the
11026 register REG. If REG is 0, a new pseudo is generated.
11028 There are two types of references that must be handled:
11030 1. Global data references must load the address from the GOT, via
11031 the PIC reg. An insn is emitted to do this load, and the reg is
11034 2. Static data references, constant pool addresses, and code labels
11035 compute the address as an offset from the GOT, whose base is in
11036 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11037 differentiate them from global data objects. The returned
11038 address is the PIC reg + an unspec constant.
11040 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11041 reg also appears in the address. */
11044 legitimize_pic_address (rtx orig, rtx reg)
11047 rtx new_rtx = orig;
11051 if (TARGET_MACHO && !TARGET_64BIT)
11054 reg = gen_reg_rtx (Pmode);
11055 /* Use the generic Mach-O PIC machinery. */
11056 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11060 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11062 else if (TARGET_64BIT
11063 && ix86_cmodel != CM_SMALL_PIC
11064 && gotoff_operand (addr, Pmode))
11067 /* This symbol may be referenced via a displacement from the PIC
11068 base address (@GOTOFF). */
11070 if (reload_in_progress)
11071 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11072 if (GET_CODE (addr) == CONST)
11073 addr = XEXP (addr, 0);
11074 if (GET_CODE (addr) == PLUS)
11076 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11078 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11081 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11082 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11084 tmpreg = gen_reg_rtx (Pmode);
11087 emit_move_insn (tmpreg, new_rtx);
11091 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11092 tmpreg, 1, OPTAB_DIRECT);
11095 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11097 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11099 /* This symbol may be referenced via a displacement from the PIC
11100 base address (@GOTOFF). */
11102 if (reload_in_progress)
11103 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11104 if (GET_CODE (addr) == CONST)
11105 addr = XEXP (addr, 0);
11106 if (GET_CODE (addr) == PLUS)
11108 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11110 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11113 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11114 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11115 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11119 emit_move_insn (reg, new_rtx);
11123 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11124 /* We can't use @GOTOFF for text labels on VxWorks;
11125 see gotoff_operand. */
11126 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11128 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11130 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11131 return legitimize_dllimport_symbol (addr, true);
11132 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11133 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11134 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11136 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11137 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11141 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11143 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11144 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11145 new_rtx = gen_const_mem (Pmode, new_rtx);
11146 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11149 reg = gen_reg_rtx (Pmode);
11150 /* Use directly gen_movsi, otherwise the address is loaded
11151 into register for CSE. We don't want to CSE this addresses,
11152 instead we CSE addresses from the GOT table, so skip this. */
11153 emit_insn (gen_movsi (reg, new_rtx));
11158 /* This symbol must be referenced via a load from the
11159 Global Offset Table (@GOT). */
11161 if (reload_in_progress)
11162 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11163 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11164 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11166 new_rtx = force_reg (Pmode, new_rtx);
11167 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11168 new_rtx = gen_const_mem (Pmode, new_rtx);
11169 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11172 reg = gen_reg_rtx (Pmode);
11173 emit_move_insn (reg, new_rtx);
11179 if (CONST_INT_P (addr)
11180 && !x86_64_immediate_operand (addr, VOIDmode))
11184 emit_move_insn (reg, addr);
11188 new_rtx = force_reg (Pmode, addr);
11190 else if (GET_CODE (addr) == CONST)
11192 addr = XEXP (addr, 0);
11194 /* We must match stuff we generate before. Assume the only
11195 unspecs that can get here are ours. Not that we could do
11196 anything with them anyway.... */
11197 if (GET_CODE (addr) == UNSPEC
11198 || (GET_CODE (addr) == PLUS
11199 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11201 gcc_assert (GET_CODE (addr) == PLUS);
11203 if (GET_CODE (addr) == PLUS)
11205 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11207 /* Check first to see if this is a constant offset from a @GOTOFF
11208 symbol reference. */
11209 if (gotoff_operand (op0, Pmode)
11210 && CONST_INT_P (op1))
11214 if (reload_in_progress)
11215 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11216 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11218 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11219 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11220 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11224 emit_move_insn (reg, new_rtx);
11230 if (INTVAL (op1) < -16*1024*1024
11231 || INTVAL (op1) >= 16*1024*1024)
11233 if (!x86_64_immediate_operand (op1, Pmode))
11234 op1 = force_reg (Pmode, op1);
11235 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11241 base = legitimize_pic_address (XEXP (addr, 0), reg);
11242 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11243 base == reg ? NULL_RTX : reg);
11245 if (CONST_INT_P (new_rtx))
11246 new_rtx = plus_constant (base, INTVAL (new_rtx));
11249 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11251 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11252 new_rtx = XEXP (new_rtx, 1);
11254 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11262 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11265 get_thread_pointer (int to_reg)
11269 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11273 reg = gen_reg_rtx (Pmode);
11274 insn = gen_rtx_SET (VOIDmode, reg, tp);
11275 insn = emit_insn (insn);
11280 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11281 false if we expect this to be used for a memory address and true if
11282 we expect to load the address into a register. */
11285 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11287 rtx dest, base, off, pic, tp;
11292 case TLS_MODEL_GLOBAL_DYNAMIC:
11293 dest = gen_reg_rtx (Pmode);
11294 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11296 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11298 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11301 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11302 insns = get_insns ();
11305 RTL_CONST_CALL_P (insns) = 1;
11306 emit_libcall_block (insns, dest, rax, x);
11308 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11309 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11311 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11313 if (TARGET_GNU2_TLS)
11315 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11317 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11321 case TLS_MODEL_LOCAL_DYNAMIC:
11322 base = gen_reg_rtx (Pmode);
11323 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11325 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11327 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11330 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11331 insns = get_insns ();
11334 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11335 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11336 RTL_CONST_CALL_P (insns) = 1;
11337 emit_libcall_block (insns, base, rax, note);
11339 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11340 emit_insn (gen_tls_local_dynamic_base_64 (base));
11342 emit_insn (gen_tls_local_dynamic_base_32 (base));
11344 if (TARGET_GNU2_TLS)
11346 rtx x = ix86_tls_module_base ();
11348 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11349 gen_rtx_MINUS (Pmode, x, tp));
11352 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11353 off = gen_rtx_CONST (Pmode, off);
11355 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11357 if (TARGET_GNU2_TLS)
11359 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11361 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11366 case TLS_MODEL_INITIAL_EXEC:
11370 type = UNSPEC_GOTNTPOFF;
11374 if (reload_in_progress)
11375 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11376 pic = pic_offset_table_rtx;
11377 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11379 else if (!TARGET_ANY_GNU_TLS)
11381 pic = gen_reg_rtx (Pmode);
11382 emit_insn (gen_set_got (pic));
11383 type = UNSPEC_GOTTPOFF;
11388 type = UNSPEC_INDNTPOFF;
11391 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11392 off = gen_rtx_CONST (Pmode, off);
11394 off = gen_rtx_PLUS (Pmode, pic, off);
11395 off = gen_const_mem (Pmode, off);
11396 set_mem_alias_set (off, ix86_GOT_alias_set ());
11398 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11400 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11401 off = force_reg (Pmode, off);
11402 return gen_rtx_PLUS (Pmode, base, off);
11406 base = get_thread_pointer (true);
11407 dest = gen_reg_rtx (Pmode);
11408 emit_insn (gen_subsi3 (dest, base, off));
11412 case TLS_MODEL_LOCAL_EXEC:
11413 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11414 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11415 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11416 off = gen_rtx_CONST (Pmode, off);
11418 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11420 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11421 return gen_rtx_PLUS (Pmode, base, off);
11425 base = get_thread_pointer (true);
11426 dest = gen_reg_rtx (Pmode);
11427 emit_insn (gen_subsi3 (dest, base, off));
11432 gcc_unreachable ();
11438 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11441 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11442 htab_t dllimport_map;
11445 get_dllimport_decl (tree decl)
11447 struct tree_map *h, in;
11450 const char *prefix;
11451 size_t namelen, prefixlen;
11456 if (!dllimport_map)
11457 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11459 in.hash = htab_hash_pointer (decl);
11460 in.base.from = decl;
11461 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11462 h = (struct tree_map *) *loc;
11466 *loc = h = ggc_alloc_tree_map ();
11468 h->base.from = decl;
11469 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11470 VAR_DECL, NULL, ptr_type_node);
11471 DECL_ARTIFICIAL (to) = 1;
11472 DECL_IGNORED_P (to) = 1;
11473 DECL_EXTERNAL (to) = 1;
11474 TREE_READONLY (to) = 1;
11476 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11477 name = targetm.strip_name_encoding (name);
11478 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11479 ? "*__imp_" : "*__imp__";
11480 namelen = strlen (name);
11481 prefixlen = strlen (prefix);
11482 imp_name = (char *) alloca (namelen + prefixlen + 1);
11483 memcpy (imp_name, prefix, prefixlen);
11484 memcpy (imp_name + prefixlen, name, namelen + 1);
11486 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11487 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11488 SET_SYMBOL_REF_DECL (rtl, to);
11489 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11491 rtl = gen_const_mem (Pmode, rtl);
11492 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11494 SET_DECL_RTL (to, rtl);
11495 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11500 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11501 true if we require the result be a register. */
11504 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11509 gcc_assert (SYMBOL_REF_DECL (symbol));
11510 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11512 x = DECL_RTL (imp_decl);
11514 x = force_reg (Pmode, x);
11518 /* Try machine-dependent ways of modifying an illegitimate address
11519 to be legitimate. If we find one, return the new, valid address.
11520 This macro is used in only one place: `memory_address' in explow.c.
11522 OLDX is the address as it was before break_out_memory_refs was called.
11523 In some cases it is useful to look at this to decide what needs to be done.
11525 It is always safe for this macro to do nothing. It exists to recognize
11526 opportunities to optimize the output.
11528 For the 80386, we handle X+REG by loading X into a register R and
11529 using R+REG. R will go in a general reg and indexing will be used.
11530 However, if REG is a broken-out memory address or multiplication,
11531 nothing needs to be done because REG can certainly go in a general reg.
11533 When -fpic is used, special handling is needed for symbolic references.
11534 See comments by legitimize_pic_address in i386.c for details. */
11537 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11538 enum machine_mode mode)
11543 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11545 return legitimize_tls_address (x, (enum tls_model) log, false);
11546 if (GET_CODE (x) == CONST
11547 && GET_CODE (XEXP (x, 0)) == PLUS
11548 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11549 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11551 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11552 (enum tls_model) log, false);
11553 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11556 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11558 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11559 return legitimize_dllimport_symbol (x, true);
11560 if (GET_CODE (x) == CONST
11561 && GET_CODE (XEXP (x, 0)) == PLUS
11562 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11563 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11565 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11566 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11570 if (flag_pic && SYMBOLIC_CONST (x))
11571 return legitimize_pic_address (x, 0);
11573 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11574 if (GET_CODE (x) == ASHIFT
11575 && CONST_INT_P (XEXP (x, 1))
11576 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11579 log = INTVAL (XEXP (x, 1));
11580 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11581 GEN_INT (1 << log));
11584 if (GET_CODE (x) == PLUS)
11586 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11588 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11589 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11590 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11593 log = INTVAL (XEXP (XEXP (x, 0), 1));
11594 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11595 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11596 GEN_INT (1 << log));
11599 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11600 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11601 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11604 log = INTVAL (XEXP (XEXP (x, 1), 1));
11605 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11606 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11607 GEN_INT (1 << log));
11610 /* Put multiply first if it isn't already. */
11611 if (GET_CODE (XEXP (x, 1)) == MULT)
11613 rtx tmp = XEXP (x, 0);
11614 XEXP (x, 0) = XEXP (x, 1);
11619 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11620 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11621 created by virtual register instantiation, register elimination, and
11622 similar optimizations. */
11623 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11626 x = gen_rtx_PLUS (Pmode,
11627 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11628 XEXP (XEXP (x, 1), 0)),
11629 XEXP (XEXP (x, 1), 1));
11633 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11634 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11635 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11636 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11637 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11638 && CONSTANT_P (XEXP (x, 1)))
11641 rtx other = NULL_RTX;
11643 if (CONST_INT_P (XEXP (x, 1)))
11645 constant = XEXP (x, 1);
11646 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11648 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11650 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11651 other = XEXP (x, 1);
11659 x = gen_rtx_PLUS (Pmode,
11660 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11661 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11662 plus_constant (other, INTVAL (constant)));
11666 if (changed && ix86_legitimate_address_p (mode, x, false))
11669 if (GET_CODE (XEXP (x, 0)) == MULT)
11672 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
11675 if (GET_CODE (XEXP (x, 1)) == MULT)
11678 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
11682 && REG_P (XEXP (x, 1))
11683 && REG_P (XEXP (x, 0)))
11686 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11689 x = legitimize_pic_address (x, 0);
11692 if (changed && ix86_legitimate_address_p (mode, x, false))
11695 if (REG_P (XEXP (x, 0)))
11697 rtx temp = gen_reg_rtx (Pmode);
11698 rtx val = force_operand (XEXP (x, 1), temp);
11700 emit_move_insn (temp, val);
11702 XEXP (x, 1) = temp;
11706 else if (REG_P (XEXP (x, 1)))
11708 rtx temp = gen_reg_rtx (Pmode);
11709 rtx val = force_operand (XEXP (x, 0), temp);
11711 emit_move_insn (temp, val);
11713 XEXP (x, 0) = temp;
11721 /* Print an integer constant expression in assembler syntax. Addition
11722 and subtraction are the only arithmetic that may appear in these
11723 expressions. FILE is the stdio stream to write to, X is the rtx, and
11724 CODE is the operand print code from the output string. */
11727 output_pic_addr_const (FILE *file, rtx x, int code)
11731 switch (GET_CODE (x))
11734 gcc_assert (flag_pic);
11739 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
11740 output_addr_const (file, x);
11743 const char *name = XSTR (x, 0);
11745 /* Mark the decl as referenced so that cgraph will
11746 output the function. */
11747 if (SYMBOL_REF_DECL (x))
11748 mark_decl_referenced (SYMBOL_REF_DECL (x));
11751 if (MACHOPIC_INDIRECT
11752 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11753 name = machopic_indirection_name (x, /*stub_p=*/true);
11755 assemble_name (file, name);
11757 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11758 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11759 fputs ("@PLT", file);
11766 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11767 assemble_name (asm_out_file, buf);
11771 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11775 /* This used to output parentheses around the expression,
11776 but that does not work on the 386 (either ATT or BSD assembler). */
11777 output_pic_addr_const (file, XEXP (x, 0), code);
11781 if (GET_MODE (x) == VOIDmode)
11783 /* We can use %d if the number is <32 bits and positive. */
11784 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
11785 fprintf (file, "0x%lx%08lx",
11786 (unsigned long) CONST_DOUBLE_HIGH (x),
11787 (unsigned long) CONST_DOUBLE_LOW (x));
11789 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
11792 /* We can't handle floating point constants;
11793 TARGET_PRINT_OPERAND must handle them. */
11794 output_operand_lossage ("floating constant misused");
11798 /* Some assemblers need integer constants to appear first. */
11799 if (CONST_INT_P (XEXP (x, 0)))
11801 output_pic_addr_const (file, XEXP (x, 0), code);
11803 output_pic_addr_const (file, XEXP (x, 1), code);
11807 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11808 output_pic_addr_const (file, XEXP (x, 1), code);
11810 output_pic_addr_const (file, XEXP (x, 0), code);
11816 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11817 output_pic_addr_const (file, XEXP (x, 0), code);
11819 output_pic_addr_const (file, XEXP (x, 1), code);
11821 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11825 gcc_assert (XVECLEN (x, 0) == 1);
11826 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11827 switch (XINT (x, 1))
11830 fputs ("@GOT", file);
11832 case UNSPEC_GOTOFF:
11833 fputs ("@GOTOFF", file);
11835 case UNSPEC_PLTOFF:
11836 fputs ("@PLTOFF", file);
11838 case UNSPEC_GOTPCREL:
11839 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11840 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11842 case UNSPEC_GOTTPOFF:
11843 /* FIXME: This might be @TPOFF in Sun ld too. */
11844 fputs ("@gottpoff", file);
11847 fputs ("@tpoff", file);
11849 case UNSPEC_NTPOFF:
11851 fputs ("@tpoff", file);
11853 fputs ("@ntpoff", file);
11855 case UNSPEC_DTPOFF:
11856 fputs ("@dtpoff", file);
11858 case UNSPEC_GOTNTPOFF:
11860 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11861 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11863 fputs ("@gotntpoff", file);
11865 case UNSPEC_INDNTPOFF:
11866 fputs ("@indntpoff", file);
11869 case UNSPEC_MACHOPIC_OFFSET:
11871 machopic_output_function_base_name (file);
11875 output_operand_lossage ("invalid UNSPEC as operand");
11881 output_operand_lossage ("invalid expression as operand");
11885 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11886 We need to emit DTP-relative relocations. */
11888 static void ATTRIBUTE_UNUSED
11889 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11891 fputs (ASM_LONG, file);
11892 output_addr_const (file, x);
11893 fputs ("@dtpoff", file);
11899 fputs (", 0", file);
11902 gcc_unreachable ();
11906 /* Return true if X is a representation of the PIC register. This copes
11907 with calls from ix86_find_base_term, where the register might have
11908 been replaced by a cselib value. */
11911 ix86_pic_register_p (rtx x)
11913 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11914 return (pic_offset_table_rtx
11915 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11917 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11920 /* In the name of slightly smaller debug output, and to cater to
11921 general assembler lossage, recognize PIC+GOTOFF and turn it back
11922 into a direct symbol reference.
11924 On Darwin, this is necessary to avoid a crash, because Darwin
11925 has a different PIC label for each routine but the DWARF debugging
11926 information is not associated with any particular routine, so it's
11927 necessary to remove references to the PIC label from RTL stored by
11928 the DWARF output code. */
11931 ix86_delegitimize_address (rtx x)
11933 rtx orig_x = delegitimize_mem_from_attrs (x);
11934 /* addend is NULL or some rtx if x is something+GOTOFF where
11935 something doesn't include the PIC register. */
11936 rtx addend = NULL_RTX;
11937 /* reg_addend is NULL or a multiple of some register. */
11938 rtx reg_addend = NULL_RTX;
11939 /* const_addend is NULL or a const_int. */
11940 rtx const_addend = NULL_RTX;
11941 /* This is the result, or NULL. */
11942 rtx result = NULL_RTX;
11951 if (GET_CODE (x) != CONST
11952 || GET_CODE (XEXP (x, 0)) != UNSPEC
11953 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
11954 || !MEM_P (orig_x))
11956 x = XVECEXP (XEXP (x, 0), 0, 0);
11957 if (GET_MODE (orig_x) != Pmode)
11958 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
11962 if (GET_CODE (x) != PLUS
11963 || GET_CODE (XEXP (x, 1)) != CONST)
11966 if (ix86_pic_register_p (XEXP (x, 0)))
11967 /* %ebx + GOT/GOTOFF */
11969 else if (GET_CODE (XEXP (x, 0)) == PLUS)
11971 /* %ebx + %reg * scale + GOT/GOTOFF */
11972 reg_addend = XEXP (x, 0);
11973 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11974 reg_addend = XEXP (reg_addend, 1);
11975 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11976 reg_addend = XEXP (reg_addend, 0);
11979 reg_addend = NULL_RTX;
11980 addend = XEXP (x, 0);
11984 addend = XEXP (x, 0);
11986 x = XEXP (XEXP (x, 1), 0);
11987 if (GET_CODE (x) == PLUS
11988 && CONST_INT_P (XEXP (x, 1)))
11990 const_addend = XEXP (x, 1);
11994 if (GET_CODE (x) == UNSPEC
11995 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
11996 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
11997 result = XVECEXP (x, 0, 0);
11999 if (TARGET_MACHO && darwin_local_data_pic (x)
12000 && !MEM_P (orig_x))
12001 result = XVECEXP (x, 0, 0);
12007 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12009 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12012 /* If the rest of original X doesn't involve the PIC register, add
12013 addend and subtract pic_offset_table_rtx. This can happen e.g.
12015 leal (%ebx, %ecx, 4), %ecx
12017 movl foo@GOTOFF(%ecx), %edx
12018 in which case we return (%ecx - %ebx) + foo. */
12019 if (pic_offset_table_rtx)
12020 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12021 pic_offset_table_rtx),
12026 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12027 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12031 /* If X is a machine specific address (i.e. a symbol or label being
12032 referenced as a displacement from the GOT implemented using an
12033 UNSPEC), then return the base term. Otherwise return X. */
12036 ix86_find_base_term (rtx x)
12042 if (GET_CODE (x) != CONST)
12044 term = XEXP (x, 0);
12045 if (GET_CODE (term) == PLUS
12046 && (CONST_INT_P (XEXP (term, 1))
12047 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12048 term = XEXP (term, 0);
12049 if (GET_CODE (term) != UNSPEC
12050 || XINT (term, 1) != UNSPEC_GOTPCREL)
12053 return XVECEXP (term, 0, 0);
12056 return ix86_delegitimize_address (x);
12060 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12061 int fp, FILE *file)
12063 const char *suffix;
12065 if (mode == CCFPmode || mode == CCFPUmode)
12067 code = ix86_fp_compare_code_to_integer (code);
12071 code = reverse_condition (code);
12122 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12126 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12127 Those same assemblers have the same but opposite lossage on cmov. */
12128 if (mode == CCmode)
12129 suffix = fp ? "nbe" : "a";
12130 else if (mode == CCCmode)
12133 gcc_unreachable ();
12149 gcc_unreachable ();
12153 gcc_assert (mode == CCmode || mode == CCCmode);
12170 gcc_unreachable ();
12174 /* ??? As above. */
12175 gcc_assert (mode == CCmode || mode == CCCmode);
12176 suffix = fp ? "nb" : "ae";
12179 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12183 /* ??? As above. */
12184 if (mode == CCmode)
12186 else if (mode == CCCmode)
12187 suffix = fp ? "nb" : "ae";
12189 gcc_unreachable ();
12192 suffix = fp ? "u" : "p";
12195 suffix = fp ? "nu" : "np";
12198 gcc_unreachable ();
12200 fputs (suffix, file);
12203 /* Print the name of register X to FILE based on its machine mode and number.
12204 If CODE is 'w', pretend the mode is HImode.
12205 If CODE is 'b', pretend the mode is QImode.
12206 If CODE is 'k', pretend the mode is SImode.
12207 If CODE is 'q', pretend the mode is DImode.
12208 If CODE is 'x', pretend the mode is V4SFmode.
12209 If CODE is 't', pretend the mode is V8SFmode.
12210 If CODE is 'h', pretend the reg is the 'high' byte register.
12211 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12212 If CODE is 'd', duplicate the operand for AVX instruction.
12216 print_reg (rtx x, int code, FILE *file)
12219 bool duplicated = code == 'd' && TARGET_AVX;
12221 gcc_assert (x == pc_rtx
12222 || (REGNO (x) != ARG_POINTER_REGNUM
12223 && REGNO (x) != FRAME_POINTER_REGNUM
12224 && REGNO (x) != FLAGS_REG
12225 && REGNO (x) != FPSR_REG
12226 && REGNO (x) != FPCR_REG));
12228 if (ASSEMBLER_DIALECT == ASM_ATT)
12233 gcc_assert (TARGET_64BIT);
12234 fputs ("rip", file);
12238 if (code == 'w' || MMX_REG_P (x))
12240 else if (code == 'b')
12242 else if (code == 'k')
12244 else if (code == 'q')
12246 else if (code == 'y')
12248 else if (code == 'h')
12250 else if (code == 'x')
12252 else if (code == 't')
12255 code = GET_MODE_SIZE (GET_MODE (x));
12257 /* Irritatingly, AMD extended registers use different naming convention
12258 from the normal registers. */
12259 if (REX_INT_REG_P (x))
12261 gcc_assert (TARGET_64BIT);
12265 error ("extended registers have no high halves");
12268 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12271 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12274 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12277 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12280 error ("unsupported operand size for extended register");
12290 if (STACK_TOP_P (x))
12299 if (! ANY_FP_REG_P (x))
12300 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12305 reg = hi_reg_name[REGNO (x)];
12308 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12310 reg = qi_reg_name[REGNO (x)];
12313 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12315 reg = qi_high_reg_name[REGNO (x)];
12320 gcc_assert (!duplicated);
12322 fputs (hi_reg_name[REGNO (x)] + 1, file);
12327 gcc_unreachable ();
12333 if (ASSEMBLER_DIALECT == ASM_ATT)
12334 fprintf (file, ", %%%s", reg);
12336 fprintf (file, ", %s", reg);
12340 /* Locate some local-dynamic symbol still in use by this function
12341 so that we can print its name in some tls_local_dynamic_base
12345 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12349 if (GET_CODE (x) == SYMBOL_REF
12350 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12352 cfun->machine->some_ld_name = XSTR (x, 0);
12359 static const char *
12360 get_some_local_dynamic_name (void)
12364 if (cfun->machine->some_ld_name)
12365 return cfun->machine->some_ld_name;
12367 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12368 if (NONDEBUG_INSN_P (insn)
12369 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12370 return cfun->machine->some_ld_name;
12375 /* Meaning of CODE:
12376 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12377 C -- print opcode suffix for set/cmov insn.
12378 c -- like C, but print reversed condition
12379 F,f -- likewise, but for floating-point.
12380 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12382 R -- print the prefix for register names.
12383 z -- print the opcode suffix for the size of the current operand.
12384 Z -- likewise, with special suffixes for x87 instructions.
12385 * -- print a star (in certain assembler syntax)
12386 A -- print an absolute memory reference.
12387 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12388 s -- print a shift double count, followed by the assemblers argument
12390 b -- print the QImode name of the register for the indicated operand.
12391 %b0 would print %al if operands[0] is reg 0.
12392 w -- likewise, print the HImode name of the register.
12393 k -- likewise, print the SImode name of the register.
12394 q -- likewise, print the DImode name of the register.
12395 x -- likewise, print the V4SFmode name of the register.
12396 t -- likewise, print the V8SFmode name of the register.
12397 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12398 y -- print "st(0)" instead of "st" as a register.
12399 d -- print duplicated register operand for AVX instruction.
12400 D -- print condition for SSE cmp instruction.
12401 P -- if PIC, print an @PLT suffix.
12402 X -- don't print any sort of PIC '@' suffix for a symbol.
12403 & -- print some in-use local-dynamic symbol name.
12404 H -- print a memory address offset by 8; used for sse high-parts
12405 Y -- print condition for XOP pcom* instruction.
12406 + -- print a branch hint as 'cs' or 'ds' prefix
12407 ; -- print a semicolon (after prefixes due to bug in older gas).
12408 @ -- print a segment register of thread base pointer load
12412 ix86_print_operand (FILE *file, rtx x, int code)
12419 if (ASSEMBLER_DIALECT == ASM_ATT)
12425 const char *name = get_some_local_dynamic_name ();
12427 output_operand_lossage ("'%%&' used without any "
12428 "local dynamic TLS references");
12430 assemble_name (file, name);
12435 switch (ASSEMBLER_DIALECT)
12442 /* Intel syntax. For absolute addresses, registers should not
12443 be surrounded by braces. */
12447 ix86_print_operand (file, x, 0);
12454 gcc_unreachable ();
12457 ix86_print_operand (file, x, 0);
12462 if (ASSEMBLER_DIALECT == ASM_ATT)
12467 if (ASSEMBLER_DIALECT == ASM_ATT)
12472 if (ASSEMBLER_DIALECT == ASM_ATT)
12477 if (ASSEMBLER_DIALECT == ASM_ATT)
12482 if (ASSEMBLER_DIALECT == ASM_ATT)
12487 if (ASSEMBLER_DIALECT == ASM_ATT)
12492 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12494 /* Opcodes don't get size suffixes if using Intel opcodes. */
12495 if (ASSEMBLER_DIALECT == ASM_INTEL)
12498 switch (GET_MODE_SIZE (GET_MODE (x)))
12517 output_operand_lossage
12518 ("invalid operand size for operand code '%c'", code);
12523 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12525 (0, "non-integer operand used with operand code '%c'", code);
12529 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12530 if (ASSEMBLER_DIALECT == ASM_INTEL)
12533 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12535 switch (GET_MODE_SIZE (GET_MODE (x)))
12538 #ifdef HAVE_AS_IX86_FILDS
12548 #ifdef HAVE_AS_IX86_FILDQ
12551 fputs ("ll", file);
12559 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12561 /* 387 opcodes don't get size suffixes
12562 if the operands are registers. */
12563 if (STACK_REG_P (x))
12566 switch (GET_MODE_SIZE (GET_MODE (x)))
12587 output_operand_lossage
12588 ("invalid operand type used with operand code '%c'", code);
12592 output_operand_lossage
12593 ("invalid operand size for operand code '%c'", code);
12610 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12612 ix86_print_operand (file, x, 0);
12613 fputs (", ", file);
12618 /* Little bit of braindamage here. The SSE compare instructions
12619 does use completely different names for the comparisons that the
12620 fp conditional moves. */
12623 switch (GET_CODE (x))
12626 fputs ("eq", file);
12629 fputs ("eq_us", file);
12632 fputs ("lt", file);
12635 fputs ("nge", file);
12638 fputs ("le", file);
12641 fputs ("ngt", file);
12644 fputs ("unord", file);
12647 fputs ("neq", file);
12650 fputs ("neq_oq", file);
12653 fputs ("ge", file);
12656 fputs ("nlt", file);
12659 fputs ("gt", file);
12662 fputs ("nle", file);
12665 fputs ("ord", file);
12668 output_operand_lossage ("operand is not a condition code, "
12669 "invalid operand code 'D'");
12675 switch (GET_CODE (x))
12679 fputs ("eq", file);
12683 fputs ("lt", file);
12687 fputs ("le", file);
12690 fputs ("unord", file);
12694 fputs ("neq", file);
12698 fputs ("nlt", file);
12702 fputs ("nle", file);
12705 fputs ("ord", file);
12708 output_operand_lossage ("operand is not a condition code, "
12709 "invalid operand code 'D'");
12715 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12716 if (ASSEMBLER_DIALECT == ASM_ATT)
12718 switch (GET_MODE (x))
12720 case HImode: putc ('w', file); break;
12722 case SFmode: putc ('l', file); break;
12724 case DFmode: putc ('q', file); break;
12725 default: gcc_unreachable ();
12732 if (!COMPARISON_P (x))
12734 output_operand_lossage ("operand is neither a constant nor a "
12735 "condition code, invalid operand code "
12739 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
12742 if (!COMPARISON_P (x))
12744 output_operand_lossage ("operand is neither a constant nor a "
12745 "condition code, invalid operand code "
12749 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12750 if (ASSEMBLER_DIALECT == ASM_ATT)
12753 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
12756 /* Like above, but reverse condition */
12758 /* Check to see if argument to %c is really a constant
12759 and not a condition code which needs to be reversed. */
12760 if (!COMPARISON_P (x))
12762 output_operand_lossage ("operand is neither a constant nor a "
12763 "condition code, invalid operand "
12767 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
12770 if (!COMPARISON_P (x))
12772 output_operand_lossage ("operand is neither a constant nor a "
12773 "condition code, invalid operand "
12777 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12778 if (ASSEMBLER_DIALECT == ASM_ATT)
12781 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
12785 /* It doesn't actually matter what mode we use here, as we're
12786 only going to use this for printing. */
12787 x = adjust_address_nv (x, DImode, 8);
12795 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
12798 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12801 int pred_val = INTVAL (XEXP (x, 0));
12803 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12804 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12806 int taken = pred_val > REG_BR_PROB_BASE / 2;
12807 int cputaken = final_forward_branch_p (current_output_insn) == 0;
12809 /* Emit hints only in the case default branch prediction
12810 heuristics would fail. */
12811 if (taken != cputaken)
12813 /* We use 3e (DS) prefix for taken branches and
12814 2e (CS) prefix for not taken branches. */
12816 fputs ("ds ; ", file);
12818 fputs ("cs ; ", file);
12826 switch (GET_CODE (x))
12829 fputs ("neq", file);
12832 fputs ("eq", file);
12836 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12840 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12844 fputs ("le", file);
12848 fputs ("lt", file);
12851 fputs ("unord", file);
12854 fputs ("ord", file);
12857 fputs ("ueq", file);
12860 fputs ("nlt", file);
12863 fputs ("nle", file);
12866 fputs ("ule", file);
12869 fputs ("ult", file);
12872 fputs ("une", file);
12875 output_operand_lossage ("operand is not a condition code, "
12876 "invalid operand code 'Y'");
12882 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12888 if (ASSEMBLER_DIALECT == ASM_ATT)
12891 /* The kernel uses a different segment register for performance
12892 reasons; a system call would not have to trash the userspace
12893 segment register, which would be expensive. */
12894 if (TARGET_64BIT && ix86_cmodel != CM_KERNEL)
12895 fputs ("fs", file);
12897 fputs ("gs", file);
12901 output_operand_lossage ("invalid operand code '%c'", code);
12906 print_reg (x, code, file);
12908 else if (MEM_P (x))
12910 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
12911 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
12912 && GET_MODE (x) != BLKmode)
12915 switch (GET_MODE_SIZE (GET_MODE (x)))
12917 case 1: size = "BYTE"; break;
12918 case 2: size = "WORD"; break;
12919 case 4: size = "DWORD"; break;
12920 case 8: size = "QWORD"; break;
12921 case 12: size = "TBYTE"; break;
12923 if (GET_MODE (x) == XFmode)
12928 case 32: size = "YMMWORD"; break;
12930 gcc_unreachable ();
12933 /* Check for explicit size override (codes 'b', 'w' and 'k') */
12936 else if (code == 'w')
12938 else if (code == 'k')
12941 fputs (size, file);
12942 fputs (" PTR ", file);
12946 /* Avoid (%rip) for call operands. */
12947 if (CONSTANT_ADDRESS_P (x) && code == 'P'
12948 && !CONST_INT_P (x))
12949 output_addr_const (file, x);
12950 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
12951 output_operand_lossage ("invalid constraints for operand");
12953 output_address (x);
12956 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
12961 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12962 REAL_VALUE_TO_TARGET_SINGLE (r, l);
12964 if (ASSEMBLER_DIALECT == ASM_ATT)
12966 /* Sign extend 32bit SFmode immediate to 8 bytes. */
12968 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
12970 fprintf (file, "0x%08x", (unsigned int) l);
12973 /* These float cases don't actually occur as immediate operands. */
12974 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
12978 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12979 fputs (dstr, file);
12982 else if (GET_CODE (x) == CONST_DOUBLE
12983 && GET_MODE (x) == XFmode)
12987 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12988 fputs (dstr, file);
12993 /* We have patterns that allow zero sets of memory, for instance.
12994 In 64-bit mode, we should probably support all 8-byte vectors,
12995 since we can in fact encode that into an immediate. */
12996 if (GET_CODE (x) == CONST_VECTOR)
12998 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
13004 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
13006 if (ASSEMBLER_DIALECT == ASM_ATT)
13009 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13010 || GET_CODE (x) == LABEL_REF)
13012 if (ASSEMBLER_DIALECT == ASM_ATT)
13015 fputs ("OFFSET FLAT:", file);
13018 if (CONST_INT_P (x))
13019 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13021 output_pic_addr_const (file, x, code);
13023 output_addr_const (file, x);
13028 ix86_print_operand_punct_valid_p (unsigned char code)
13030 return (code == '@' || code == '*' || code == '+'
13031 || code == '&' || code == ';');
13034 /* Print a memory operand whose address is ADDR. */
13037 ix86_print_operand_address (FILE *file, rtx addr)
13039 struct ix86_address parts;
13040 rtx base, index, disp;
13042 int ok = ix86_decompose_address (addr, &parts);
13047 index = parts.index;
13049 scale = parts.scale;
13057 if (ASSEMBLER_DIALECT == ASM_ATT)
13059 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13062 gcc_unreachable ();
13065 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13066 if (TARGET_64BIT && !base && !index)
13070 if (GET_CODE (disp) == CONST
13071 && GET_CODE (XEXP (disp, 0)) == PLUS
13072 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13073 symbol = XEXP (XEXP (disp, 0), 0);
13075 if (GET_CODE (symbol) == LABEL_REF
13076 || (GET_CODE (symbol) == SYMBOL_REF
13077 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13080 if (!base && !index)
13082 /* Displacement only requires special attention. */
13084 if (CONST_INT_P (disp))
13086 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13087 fputs ("ds:", file);
13088 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13091 output_pic_addr_const (file, disp, 0);
13093 output_addr_const (file, disp);
13097 if (ASSEMBLER_DIALECT == ASM_ATT)
13102 output_pic_addr_const (file, disp, 0);
13103 else if (GET_CODE (disp) == LABEL_REF)
13104 output_asm_label (disp);
13106 output_addr_const (file, disp);
13111 print_reg (base, 0, file);
13115 print_reg (index, 0, file);
13117 fprintf (file, ",%d", scale);
13123 rtx offset = NULL_RTX;
13127 /* Pull out the offset of a symbol; print any symbol itself. */
13128 if (GET_CODE (disp) == CONST
13129 && GET_CODE (XEXP (disp, 0)) == PLUS
13130 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13132 offset = XEXP (XEXP (disp, 0), 1);
13133 disp = gen_rtx_CONST (VOIDmode,
13134 XEXP (XEXP (disp, 0), 0));
13138 output_pic_addr_const (file, disp, 0);
13139 else if (GET_CODE (disp) == LABEL_REF)
13140 output_asm_label (disp);
13141 else if (CONST_INT_P (disp))
13144 output_addr_const (file, disp);
13150 print_reg (base, 0, file);
13153 if (INTVAL (offset) >= 0)
13155 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13159 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13166 print_reg (index, 0, file);
13168 fprintf (file, "*%d", scale);
13175 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13178 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13182 if (GET_CODE (x) != UNSPEC)
13185 op = XVECEXP (x, 0, 0);
13186 switch (XINT (x, 1))
13188 case UNSPEC_GOTTPOFF:
13189 output_addr_const (file, op);
13190 /* FIXME: This might be @TPOFF in Sun ld. */
13191 fputs ("@gottpoff", file);
13194 output_addr_const (file, op);
13195 fputs ("@tpoff", file);
13197 case UNSPEC_NTPOFF:
13198 output_addr_const (file, op);
13200 fputs ("@tpoff", file);
13202 fputs ("@ntpoff", file);
13204 case UNSPEC_DTPOFF:
13205 output_addr_const (file, op);
13206 fputs ("@dtpoff", file);
13208 case UNSPEC_GOTNTPOFF:
13209 output_addr_const (file, op);
13211 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13212 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13214 fputs ("@gotntpoff", file);
13216 case UNSPEC_INDNTPOFF:
13217 output_addr_const (file, op);
13218 fputs ("@indntpoff", file);
13221 case UNSPEC_MACHOPIC_OFFSET:
13222 output_addr_const (file, op);
13224 machopic_output_function_base_name (file);
13235 /* Split one or more DImode RTL references into pairs of SImode
13236 references. The RTL can be REG, offsettable MEM, integer constant, or
13237 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13238 split and "num" is its length. lo_half and hi_half are output arrays
13239 that parallel "operands". */
13242 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13246 rtx op = operands[num];
13248 /* simplify_subreg refuse to split volatile memory addresses,
13249 but we still have to handle it. */
13252 lo_half[num] = adjust_address (op, SImode, 0);
13253 hi_half[num] = adjust_address (op, SImode, 4);
13257 lo_half[num] = simplify_gen_subreg (SImode, op,
13258 GET_MODE (op) == VOIDmode
13259 ? DImode : GET_MODE (op), 0);
13260 hi_half[num] = simplify_gen_subreg (SImode, op,
13261 GET_MODE (op) == VOIDmode
13262 ? DImode : GET_MODE (op), 4);
13266 /* Split one or more TImode RTL references into pairs of DImode
13267 references. The RTL can be REG, offsettable MEM, integer constant, or
13268 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13269 split and "num" is its length. lo_half and hi_half are output arrays
13270 that parallel "operands". */
13273 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13277 rtx op = operands[num];
13279 /* simplify_subreg refuse to split volatile memory addresses, but we
13280 still have to handle it. */
13283 lo_half[num] = adjust_address (op, DImode, 0);
13284 hi_half[num] = adjust_address (op, DImode, 8);
13288 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
13289 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
13294 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13295 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13296 is the expression of the binary operation. The output may either be
13297 emitted here, or returned to the caller, like all output_* functions.
13299 There is no guarantee that the operands are the same mode, as they
13300 might be within FLOAT or FLOAT_EXTEND expressions. */
13302 #ifndef SYSV386_COMPAT
13303 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13304 wants to fix the assemblers because that causes incompatibility
13305 with gcc. No-one wants to fix gcc because that causes
13306 incompatibility with assemblers... You can use the option of
13307 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13308 #define SYSV386_COMPAT 1
13312 output_387_binary_op (rtx insn, rtx *operands)
13314 static char buf[40];
13317 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13319 #ifdef ENABLE_CHECKING
13320 /* Even if we do not want to check the inputs, this documents input
13321 constraints. Which helps in understanding the following code. */
13322 if (STACK_REG_P (operands[0])
13323 && ((REG_P (operands[1])
13324 && REGNO (operands[0]) == REGNO (operands[1])
13325 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13326 || (REG_P (operands[2])
13327 && REGNO (operands[0]) == REGNO (operands[2])
13328 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13329 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13332 gcc_assert (is_sse);
13335 switch (GET_CODE (operands[3]))
13338 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13339 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13347 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13348 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13356 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13357 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13365 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13366 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13374 gcc_unreachable ();
13381 strcpy (buf, ssep);
13382 if (GET_MODE (operands[0]) == SFmode)
13383 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13385 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13389 strcpy (buf, ssep + 1);
13390 if (GET_MODE (operands[0]) == SFmode)
13391 strcat (buf, "ss\t{%2, %0|%0, %2}");
13393 strcat (buf, "sd\t{%2, %0|%0, %2}");
13399 switch (GET_CODE (operands[3]))
13403 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13405 rtx temp = operands[2];
13406 operands[2] = operands[1];
13407 operands[1] = temp;
13410 /* know operands[0] == operands[1]. */
13412 if (MEM_P (operands[2]))
13418 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13420 if (STACK_TOP_P (operands[0]))
13421 /* How is it that we are storing to a dead operand[2]?
13422 Well, presumably operands[1] is dead too. We can't
13423 store the result to st(0) as st(0) gets popped on this
13424 instruction. Instead store to operands[2] (which I
13425 think has to be st(1)). st(1) will be popped later.
13426 gcc <= 2.8.1 didn't have this check and generated
13427 assembly code that the Unixware assembler rejected. */
13428 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13430 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13434 if (STACK_TOP_P (operands[0]))
13435 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13437 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13442 if (MEM_P (operands[1]))
13448 if (MEM_P (operands[2]))
13454 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13457 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13458 derived assemblers, confusingly reverse the direction of
13459 the operation for fsub{r} and fdiv{r} when the
13460 destination register is not st(0). The Intel assembler
13461 doesn't have this brain damage. Read !SYSV386_COMPAT to
13462 figure out what the hardware really does. */
13463 if (STACK_TOP_P (operands[0]))
13464 p = "{p\t%0, %2|rp\t%2, %0}";
13466 p = "{rp\t%2, %0|p\t%0, %2}";
13468 if (STACK_TOP_P (operands[0]))
13469 /* As above for fmul/fadd, we can't store to st(0). */
13470 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13472 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13477 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13480 if (STACK_TOP_P (operands[0]))
13481 p = "{rp\t%0, %1|p\t%1, %0}";
13483 p = "{p\t%1, %0|rp\t%0, %1}";
13485 if (STACK_TOP_P (operands[0]))
13486 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13488 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13493 if (STACK_TOP_P (operands[0]))
13495 if (STACK_TOP_P (operands[1]))
13496 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13498 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13501 else if (STACK_TOP_P (operands[1]))
13504 p = "{\t%1, %0|r\t%0, %1}";
13506 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13512 p = "{r\t%2, %0|\t%0, %2}";
13514 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13520 gcc_unreachable ();
13527 /* Return needed mode for entity in optimize_mode_switching pass. */
13530 ix86_mode_needed (int entity, rtx insn)
13532 enum attr_i387_cw mode;
13534 /* The mode UNINITIALIZED is used to store control word after a
13535 function call or ASM pattern. The mode ANY specify that function
13536 has no requirements on the control word and make no changes in the
13537 bits we are interested in. */
13540 || (NONJUMP_INSN_P (insn)
13541 && (asm_noperands (PATTERN (insn)) >= 0
13542 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13543 return I387_CW_UNINITIALIZED;
13545 if (recog_memoized (insn) < 0)
13546 return I387_CW_ANY;
13548 mode = get_attr_i387_cw (insn);
13553 if (mode == I387_CW_TRUNC)
13558 if (mode == I387_CW_FLOOR)
13563 if (mode == I387_CW_CEIL)
13568 if (mode == I387_CW_MASK_PM)
13573 gcc_unreachable ();
13576 return I387_CW_ANY;
13579 /* Output code to initialize control word copies used by trunc?f?i and
13580 rounding patterns. CURRENT_MODE is set to current control word,
13581 while NEW_MODE is set to new control word. */
13584 emit_i387_cw_initialization (int mode)
13586 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13589 enum ix86_stack_slot slot;
13591 rtx reg = gen_reg_rtx (HImode);
13593 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13594 emit_move_insn (reg, copy_rtx (stored_mode));
13596 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13597 || optimize_function_for_size_p (cfun))
13601 case I387_CW_TRUNC:
13602 /* round toward zero (truncate) */
13603 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13604 slot = SLOT_CW_TRUNC;
13607 case I387_CW_FLOOR:
13608 /* round down toward -oo */
13609 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13610 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
13611 slot = SLOT_CW_FLOOR;
13615 /* round up toward +oo */
13616 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13617 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
13618 slot = SLOT_CW_CEIL;
13621 case I387_CW_MASK_PM:
13622 /* mask precision exception for nearbyint() */
13623 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13624 slot = SLOT_CW_MASK_PM;
13628 gcc_unreachable ();
13635 case I387_CW_TRUNC:
13636 /* round toward zero (truncate) */
13637 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
13638 slot = SLOT_CW_TRUNC;
13641 case I387_CW_FLOOR:
13642 /* round down toward -oo */
13643 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
13644 slot = SLOT_CW_FLOOR;
13648 /* round up toward +oo */
13649 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
13650 slot = SLOT_CW_CEIL;
13653 case I387_CW_MASK_PM:
13654 /* mask precision exception for nearbyint() */
13655 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13656 slot = SLOT_CW_MASK_PM;
13660 gcc_unreachable ();
13664 gcc_assert (slot < MAX_386_STACK_LOCALS);
13666 new_mode = assign_386_stack_local (HImode, slot);
13667 emit_move_insn (new_mode, reg);
13670 /* Output code for INSN to convert a float to a signed int. OPERANDS
13671 are the insn operands. The output may be [HSD]Imode and the input
13672 operand may be [SDX]Fmode. */
13675 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
13677 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13678 int dimode_p = GET_MODE (operands[0]) == DImode;
13679 int round_mode = get_attr_i387_cw (insn);
13681 /* Jump through a hoop or two for DImode, since the hardware has no
13682 non-popping instruction. We used to do this a different way, but
13683 that was somewhat fragile and broke with post-reload splitters. */
13684 if ((dimode_p || fisttp) && !stack_top_dies)
13685 output_asm_insn ("fld\t%y1", operands);
13687 gcc_assert (STACK_TOP_P (operands[1]));
13688 gcc_assert (MEM_P (operands[0]));
13689 gcc_assert (GET_MODE (operands[1]) != TFmode);
13692 output_asm_insn ("fisttp%Z0\t%0", operands);
13695 if (round_mode != I387_CW_ANY)
13696 output_asm_insn ("fldcw\t%3", operands);
13697 if (stack_top_dies || dimode_p)
13698 output_asm_insn ("fistp%Z0\t%0", operands);
13700 output_asm_insn ("fist%Z0\t%0", operands);
13701 if (round_mode != I387_CW_ANY)
13702 output_asm_insn ("fldcw\t%2", operands);
13708 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13709 have the values zero or one, indicates the ffreep insn's operand
13710 from the OPERANDS array. */
13712 static const char *
13713 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
13715 if (TARGET_USE_FFREEP)
13716 #ifdef HAVE_AS_IX86_FFREEP
13717 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
13720 static char retval[32];
13721 int regno = REGNO (operands[opno]);
13723 gcc_assert (FP_REGNO_P (regno));
13725 regno -= FIRST_STACK_REG;
13727 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
13732 return opno ? "fstp\t%y1" : "fstp\t%y0";
13736 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13737 should be used. UNORDERED_P is true when fucom should be used. */
13740 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
13742 int stack_top_dies;
13743 rtx cmp_op0, cmp_op1;
13744 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
13748 cmp_op0 = operands[0];
13749 cmp_op1 = operands[1];
13753 cmp_op0 = operands[1];
13754 cmp_op1 = operands[2];
13759 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
13760 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
13761 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
13762 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
13764 if (GET_MODE (operands[0]) == SFmode)
13766 return &ucomiss[TARGET_AVX ? 0 : 1];
13768 return &comiss[TARGET_AVX ? 0 : 1];
13771 return &ucomisd[TARGET_AVX ? 0 : 1];
13773 return &comisd[TARGET_AVX ? 0 : 1];
13776 gcc_assert (STACK_TOP_P (cmp_op0));
13778 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13780 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
13782 if (stack_top_dies)
13784 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
13785 return output_387_ffreep (operands, 1);
13788 return "ftst\n\tfnstsw\t%0";
13791 if (STACK_REG_P (cmp_op1)
13793 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
13794 && REGNO (cmp_op1) != FIRST_STACK_REG)
13796 /* If both the top of the 387 stack dies, and the other operand
13797 is also a stack register that dies, then this must be a
13798 `fcompp' float compare */
13802 /* There is no double popping fcomi variant. Fortunately,
13803 eflags is immune from the fstp's cc clobbering. */
13805 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
13807 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
13808 return output_387_ffreep (operands, 0);
13813 return "fucompp\n\tfnstsw\t%0";
13815 return "fcompp\n\tfnstsw\t%0";
13820 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
13822 static const char * const alt[16] =
13824 "fcom%Z2\t%y2\n\tfnstsw\t%0",
13825 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
13826 "fucom%Z2\t%y2\n\tfnstsw\t%0",
13827 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
13829 "ficom%Z2\t%y2\n\tfnstsw\t%0",
13830 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
13834 "fcomi\t{%y1, %0|%0, %y1}",
13835 "fcomip\t{%y1, %0|%0, %y1}",
13836 "fucomi\t{%y1, %0|%0, %y1}",
13837 "fucomip\t{%y1, %0|%0, %y1}",
13848 mask = eflags_p << 3;
13849 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
13850 mask |= unordered_p << 1;
13851 mask |= stack_top_dies;
13853 gcc_assert (mask < 16);
13862 ix86_output_addr_vec_elt (FILE *file, int value)
13864 const char *directive = ASM_LONG;
13868 directive = ASM_QUAD;
13870 gcc_assert (!TARGET_64BIT);
13873 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
13877 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
13879 const char *directive = ASM_LONG;
13882 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
13883 directive = ASM_QUAD;
13885 gcc_assert (!TARGET_64BIT);
13887 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
13888 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
13889 fprintf (file, "%s%s%d-%s%d\n",
13890 directive, LPREFIX, value, LPREFIX, rel);
13891 else if (HAVE_AS_GOTOFF_IN_DATA)
13892 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
13894 else if (TARGET_MACHO)
13896 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
13897 machopic_output_function_base_name (file);
13902 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
13903 GOT_SYMBOL_NAME, LPREFIX, value);
13906 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
13910 ix86_expand_clear (rtx dest)
13914 /* We play register width games, which are only valid after reload. */
13915 gcc_assert (reload_completed);
13917 /* Avoid HImode and its attendant prefix byte. */
13918 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
13919 dest = gen_rtx_REG (SImode, REGNO (dest));
13920 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
13922 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
13923 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
13925 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13926 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
13932 /* X is an unchanging MEM. If it is a constant pool reference, return
13933 the constant pool rtx, else NULL. */
13936 maybe_get_pool_constant (rtx x)
13938 x = ix86_delegitimize_address (XEXP (x, 0));
13940 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
13941 return get_pool_constant (x);
13947 ix86_expand_move (enum machine_mode mode, rtx operands[])
13950 enum tls_model model;
13955 if (GET_CODE (op1) == SYMBOL_REF)
13957 model = SYMBOL_REF_TLS_MODEL (op1);
13960 op1 = legitimize_tls_address (op1, model, true);
13961 op1 = force_operand (op1, op0);
13965 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13966 && SYMBOL_REF_DLLIMPORT_P (op1))
13967 op1 = legitimize_dllimport_symbol (op1, false);
13969 else if (GET_CODE (op1) == CONST
13970 && GET_CODE (XEXP (op1, 0)) == PLUS
13971 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
13973 rtx addend = XEXP (XEXP (op1, 0), 1);
13974 rtx symbol = XEXP (XEXP (op1, 0), 0);
13977 model = SYMBOL_REF_TLS_MODEL (symbol);
13979 tmp = legitimize_tls_address (symbol, model, true);
13980 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13981 && SYMBOL_REF_DLLIMPORT_P (symbol))
13982 tmp = legitimize_dllimport_symbol (symbol, true);
13986 tmp = force_operand (tmp, NULL);
13987 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
13988 op0, 1, OPTAB_DIRECT);
13994 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
13996 if (TARGET_MACHO && !TARGET_64BIT)
14001 rtx temp = ((reload_in_progress
14002 || ((op0 && REG_P (op0))
14004 ? op0 : gen_reg_rtx (Pmode));
14005 op1 = machopic_indirect_data_reference (op1, temp);
14006 op1 = machopic_legitimize_pic_address (op1, mode,
14007 temp == op1 ? 0 : temp);
14009 else if (MACHOPIC_INDIRECT)
14010 op1 = machopic_indirect_data_reference (op1, 0);
14018 op1 = force_reg (Pmode, op1);
14019 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14021 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14022 op1 = legitimize_pic_address (op1, reg);
14031 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14032 || !push_operand (op0, mode))
14034 op1 = force_reg (mode, op1);
14036 if (push_operand (op0, mode)
14037 && ! general_no_elim_operand (op1, mode))
14038 op1 = copy_to_mode_reg (mode, op1);
14040 /* Force large constants in 64bit compilation into register
14041 to get them CSEed. */
14042 if (can_create_pseudo_p ()
14043 && (mode == DImode) && TARGET_64BIT
14044 && immediate_operand (op1, mode)
14045 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14046 && !register_operand (op0, mode)
14048 op1 = copy_to_mode_reg (mode, op1);
14050 if (can_create_pseudo_p ()
14051 && FLOAT_MODE_P (mode)
14052 && GET_CODE (op1) == CONST_DOUBLE)
14054 /* If we are loading a floating point constant to a register,
14055 force the value to memory now, since we'll get better code
14056 out the back end. */
14058 op1 = validize_mem (force_const_mem (mode, op1));
14059 if (!register_operand (op0, mode))
14061 rtx temp = gen_reg_rtx (mode);
14062 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14063 emit_move_insn (op0, temp);
14069 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14073 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14075 rtx op0 = operands[0], op1 = operands[1];
14076 unsigned int align = GET_MODE_ALIGNMENT (mode);
14078 /* Force constants other than zero into memory. We do not know how
14079 the instructions used to build constants modify the upper 64 bits
14080 of the register, once we have that information we may be able
14081 to handle some of them more efficiently. */
14082 if (can_create_pseudo_p ()
14083 && register_operand (op0, mode)
14084 && (CONSTANT_P (op1)
14085 || (GET_CODE (op1) == SUBREG
14086 && CONSTANT_P (SUBREG_REG (op1))))
14087 && !standard_sse_constant_p (op1))
14088 op1 = validize_mem (force_const_mem (mode, op1));
14090 /* We need to check memory alignment for SSE mode since attribute
14091 can make operands unaligned. */
14092 if (can_create_pseudo_p ()
14093 && SSE_REG_MODE_P (mode)
14094 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14095 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14099 /* ix86_expand_vector_move_misalign() does not like constants ... */
14100 if (CONSTANT_P (op1)
14101 || (GET_CODE (op1) == SUBREG
14102 && CONSTANT_P (SUBREG_REG (op1))))
14103 op1 = validize_mem (force_const_mem (mode, op1));
14105 /* ... nor both arguments in memory. */
14106 if (!register_operand (op0, mode)
14107 && !register_operand (op1, mode))
14108 op1 = force_reg (mode, op1);
14110 tmp[0] = op0; tmp[1] = op1;
14111 ix86_expand_vector_move_misalign (mode, tmp);
14115 /* Make operand1 a register if it isn't already. */
14116 if (can_create_pseudo_p ()
14117 && !register_operand (op0, mode)
14118 && !register_operand (op1, mode))
14120 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14124 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14127 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14128 straight to ix86_expand_vector_move. */
14129 /* Code generation for scalar reg-reg moves of single and double precision data:
14130 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14134 if (x86_sse_partial_reg_dependency == true)
14139 Code generation for scalar loads of double precision data:
14140 if (x86_sse_split_regs == true)
14141 movlpd mem, reg (gas syntax)
14145 Code generation for unaligned packed loads of single precision data
14146 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14147 if (x86_sse_unaligned_move_optimal)
14150 if (x86_sse_partial_reg_dependency == true)
14162 Code generation for unaligned packed loads of double precision data
14163 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14164 if (x86_sse_unaligned_move_optimal)
14167 if (x86_sse_split_regs == true)
14180 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14189 switch (GET_MODE_CLASS (mode))
14191 case MODE_VECTOR_INT:
14193 switch (GET_MODE_SIZE (mode))
14196 /* If we're optimizing for size, movups is the smallest. */
14197 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14199 op0 = gen_lowpart (V4SFmode, op0);
14200 op1 = gen_lowpart (V4SFmode, op1);
14201 emit_insn (gen_avx_movups (op0, op1));
14204 op0 = gen_lowpart (V16QImode, op0);
14205 op1 = gen_lowpart (V16QImode, op1);
14206 emit_insn (gen_avx_movdqu (op0, op1));
14209 op0 = gen_lowpart (V32QImode, op0);
14210 op1 = gen_lowpart (V32QImode, op1);
14211 emit_insn (gen_avx_movdqu256 (op0, op1));
14214 gcc_unreachable ();
14217 case MODE_VECTOR_FLOAT:
14218 op0 = gen_lowpart (mode, op0);
14219 op1 = gen_lowpart (mode, op1);
14224 emit_insn (gen_avx_movups (op0, op1));
14227 emit_insn (gen_avx_movups256 (op0, op1));
14230 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14232 op0 = gen_lowpart (V4SFmode, op0);
14233 op1 = gen_lowpart (V4SFmode, op1);
14234 emit_insn (gen_avx_movups (op0, op1));
14237 emit_insn (gen_avx_movupd (op0, op1));
14240 emit_insn (gen_avx_movupd256 (op0, op1));
14243 gcc_unreachable ();
14248 gcc_unreachable ();
14256 /* If we're optimizing for size, movups is the smallest. */
14257 if (optimize_insn_for_size_p ()
14258 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14260 op0 = gen_lowpart (V4SFmode, op0);
14261 op1 = gen_lowpart (V4SFmode, op1);
14262 emit_insn (gen_sse_movups (op0, op1));
14266 /* ??? If we have typed data, then it would appear that using
14267 movdqu is the only way to get unaligned data loaded with
14269 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14271 op0 = gen_lowpart (V16QImode, op0);
14272 op1 = gen_lowpart (V16QImode, op1);
14273 emit_insn (gen_sse2_movdqu (op0, op1));
14277 if (TARGET_SSE2 && mode == V2DFmode)
14281 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14283 op0 = gen_lowpart (V2DFmode, op0);
14284 op1 = gen_lowpart (V2DFmode, op1);
14285 emit_insn (gen_sse2_movupd (op0, op1));
14289 /* When SSE registers are split into halves, we can avoid
14290 writing to the top half twice. */
14291 if (TARGET_SSE_SPLIT_REGS)
14293 emit_clobber (op0);
14298 /* ??? Not sure about the best option for the Intel chips.
14299 The following would seem to satisfy; the register is
14300 entirely cleared, breaking the dependency chain. We
14301 then store to the upper half, with a dependency depth
14302 of one. A rumor has it that Intel recommends two movsd
14303 followed by an unpacklpd, but this is unconfirmed. And
14304 given that the dependency depth of the unpacklpd would
14305 still be one, I'm not sure why this would be better. */
14306 zero = CONST0_RTX (V2DFmode);
14309 m = adjust_address (op1, DFmode, 0);
14310 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14311 m = adjust_address (op1, DFmode, 8);
14312 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14316 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14318 op0 = gen_lowpart (V4SFmode, op0);
14319 op1 = gen_lowpart (V4SFmode, op1);
14320 emit_insn (gen_sse_movups (op0, op1));
14324 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14325 emit_move_insn (op0, CONST0_RTX (mode));
14327 emit_clobber (op0);
14329 if (mode != V4SFmode)
14330 op0 = gen_lowpart (V4SFmode, op0);
14331 m = adjust_address (op1, V2SFmode, 0);
14332 emit_insn (gen_sse_loadlps (op0, op0, m));
14333 m = adjust_address (op1, V2SFmode, 8);
14334 emit_insn (gen_sse_loadhps (op0, op0, m));
14337 else if (MEM_P (op0))
14339 /* If we're optimizing for size, movups is the smallest. */
14340 if (optimize_insn_for_size_p ()
14341 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14343 op0 = gen_lowpart (V4SFmode, op0);
14344 op1 = gen_lowpart (V4SFmode, op1);
14345 emit_insn (gen_sse_movups (op0, op1));
14349 /* ??? Similar to above, only less clear because of quote
14350 typeless stores unquote. */
14351 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14352 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14354 op0 = gen_lowpart (V16QImode, op0);
14355 op1 = gen_lowpart (V16QImode, op1);
14356 emit_insn (gen_sse2_movdqu (op0, op1));
14360 if (TARGET_SSE2 && mode == V2DFmode)
14362 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14364 op0 = gen_lowpart (V2DFmode, op0);
14365 op1 = gen_lowpart (V2DFmode, op1);
14366 emit_insn (gen_sse2_movupd (op0, op1));
14370 m = adjust_address (op0, DFmode, 0);
14371 emit_insn (gen_sse2_storelpd (m, op1));
14372 m = adjust_address (op0, DFmode, 8);
14373 emit_insn (gen_sse2_storehpd (m, op1));
14378 if (mode != V4SFmode)
14379 op1 = gen_lowpart (V4SFmode, op1);
14381 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14383 op0 = gen_lowpart (V4SFmode, op0);
14384 emit_insn (gen_sse_movups (op0, op1));
14388 m = adjust_address (op0, V2SFmode, 0);
14389 emit_insn (gen_sse_storelps (m, op1));
14390 m = adjust_address (op0, V2SFmode, 8);
14391 emit_insn (gen_sse_storehps (m, op1));
14396 gcc_unreachable ();
14399 /* Expand a push in MODE. This is some mode for which we do not support
14400 proper push instructions, at least from the registers that we expect
14401 the value to live in. */
14404 ix86_expand_push (enum machine_mode mode, rtx x)
14408 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14409 GEN_INT (-GET_MODE_SIZE (mode)),
14410 stack_pointer_rtx, 1, OPTAB_DIRECT);
14411 if (tmp != stack_pointer_rtx)
14412 emit_move_insn (stack_pointer_rtx, tmp);
14414 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14416 /* When we push an operand onto stack, it has to be aligned at least
14417 at the function argument boundary. However since we don't have
14418 the argument type, we can't determine the actual argument
14420 emit_move_insn (tmp, x);
14423 /* Helper function of ix86_fixup_binary_operands to canonicalize
14424 operand order. Returns true if the operands should be swapped. */
14427 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14430 rtx dst = operands[0];
14431 rtx src1 = operands[1];
14432 rtx src2 = operands[2];
14434 /* If the operation is not commutative, we can't do anything. */
14435 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14438 /* Highest priority is that src1 should match dst. */
14439 if (rtx_equal_p (dst, src1))
14441 if (rtx_equal_p (dst, src2))
14444 /* Next highest priority is that immediate constants come second. */
14445 if (immediate_operand (src2, mode))
14447 if (immediate_operand (src1, mode))
14450 /* Lowest priority is that memory references should come second. */
14460 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14461 destination to use for the operation. If different from the true
14462 destination in operands[0], a copy operation will be required. */
14465 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14468 rtx dst = operands[0];
14469 rtx src1 = operands[1];
14470 rtx src2 = operands[2];
14472 /* Canonicalize operand order. */
14473 if (ix86_swap_binary_operands_p (code, mode, operands))
14477 /* It is invalid to swap operands of different modes. */
14478 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14485 /* Both source operands cannot be in memory. */
14486 if (MEM_P (src1) && MEM_P (src2))
14488 /* Optimization: Only read from memory once. */
14489 if (rtx_equal_p (src1, src2))
14491 src2 = force_reg (mode, src2);
14495 src2 = force_reg (mode, src2);
14498 /* If the destination is memory, and we do not have matching source
14499 operands, do things in registers. */
14500 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14501 dst = gen_reg_rtx (mode);
14503 /* Source 1 cannot be a constant. */
14504 if (CONSTANT_P (src1))
14505 src1 = force_reg (mode, src1);
14507 /* Source 1 cannot be a non-matching memory. */
14508 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14509 src1 = force_reg (mode, src1);
14511 operands[1] = src1;
14512 operands[2] = src2;
14516 /* Similarly, but assume that the destination has already been
14517 set up properly. */
14520 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14521 enum machine_mode mode, rtx operands[])
14523 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14524 gcc_assert (dst == operands[0]);
14527 /* Attempt to expand a binary operator. Make the expansion closer to the
14528 actual machine, then just general_operand, which will allow 3 separate
14529 memory references (one output, two input) in a single insn. */
14532 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14535 rtx src1, src2, dst, op, clob;
14537 dst = ix86_fixup_binary_operands (code, mode, operands);
14538 src1 = operands[1];
14539 src2 = operands[2];
14541 /* Emit the instruction. */
14543 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14544 if (reload_in_progress)
14546 /* Reload doesn't know about the flags register, and doesn't know that
14547 it doesn't want to clobber it. We can only do this with PLUS. */
14548 gcc_assert (code == PLUS);
14553 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14554 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14557 /* Fix up the destination if needed. */
14558 if (dst != operands[0])
14559 emit_move_insn (operands[0], dst);
14562 /* Return TRUE or FALSE depending on whether the binary operator meets the
14563 appropriate constraints. */
14566 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14569 rtx dst = operands[0];
14570 rtx src1 = operands[1];
14571 rtx src2 = operands[2];
14573 /* Both source operands cannot be in memory. */
14574 if (MEM_P (src1) && MEM_P (src2))
14577 /* Canonicalize operand order for commutative operators. */
14578 if (ix86_swap_binary_operands_p (code, mode, operands))
14585 /* If the destination is memory, we must have a matching source operand. */
14586 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14589 /* Source 1 cannot be a constant. */
14590 if (CONSTANT_P (src1))
14593 /* Source 1 cannot be a non-matching memory. */
14594 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14600 /* Attempt to expand a unary operator. Make the expansion closer to the
14601 actual machine, then just general_operand, which will allow 2 separate
14602 memory references (one output, one input) in a single insn. */
14605 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
14608 int matching_memory;
14609 rtx src, dst, op, clob;
14614 /* If the destination is memory, and we do not have matching source
14615 operands, do things in registers. */
14616 matching_memory = 0;
14619 if (rtx_equal_p (dst, src))
14620 matching_memory = 1;
14622 dst = gen_reg_rtx (mode);
14625 /* When source operand is memory, destination must match. */
14626 if (MEM_P (src) && !matching_memory)
14627 src = force_reg (mode, src);
14629 /* Emit the instruction. */
14631 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
14632 if (reload_in_progress || code == NOT)
14634 /* Reload doesn't know about the flags register, and doesn't know that
14635 it doesn't want to clobber it. */
14636 gcc_assert (code == NOT);
14641 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14642 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14645 /* Fix up the destination if needed. */
14646 if (dst != operands[0])
14647 emit_move_insn (operands[0], dst);
14650 #define LEA_SEARCH_THRESHOLD 12
14652 /* Search backward for non-agu definition of register number REGNO1
14653 or register number REGNO2 in INSN's basic block until
14654 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14655 2. Reach BB boundary, or
14656 3. Reach agu definition.
14657 Returns the distance between the non-agu definition point and INSN.
14658 If no definition point, returns -1. */
14661 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14664 basic_block bb = BLOCK_FOR_INSN (insn);
14667 enum attr_type insn_type;
14669 if (insn != BB_HEAD (bb))
14671 rtx prev = PREV_INSN (insn);
14672 while (prev && distance < LEA_SEARCH_THRESHOLD)
14674 if (NONDEBUG_INSN_P (prev))
14677 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14678 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14679 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14680 && (regno1 == DF_REF_REGNO (*def_rec)
14681 || regno2 == DF_REF_REGNO (*def_rec)))
14683 insn_type = get_attr_type (prev);
14684 if (insn_type != TYPE_LEA)
14688 if (prev == BB_HEAD (bb))
14690 prev = PREV_INSN (prev);
14694 if (distance < LEA_SEARCH_THRESHOLD)
14698 bool simple_loop = false;
14700 FOR_EACH_EDGE (e, ei, bb->preds)
14703 simple_loop = true;
14709 rtx prev = BB_END (bb);
14712 && distance < LEA_SEARCH_THRESHOLD)
14714 if (NONDEBUG_INSN_P (prev))
14717 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14718 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14719 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14720 && (regno1 == DF_REF_REGNO (*def_rec)
14721 || regno2 == DF_REF_REGNO (*def_rec)))
14723 insn_type = get_attr_type (prev);
14724 if (insn_type != TYPE_LEA)
14728 prev = PREV_INSN (prev);
14736 /* get_attr_type may modify recog data. We want to make sure
14737 that recog data is valid for instruction INSN, on which
14738 distance_non_agu_define is called. INSN is unchanged here. */
14739 extract_insn_cached (insn);
14743 /* Return the distance between INSN and the next insn that uses
14744 register number REGNO0 in memory address. Return -1 if no such
14745 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14748 distance_agu_use (unsigned int regno0, rtx insn)
14750 basic_block bb = BLOCK_FOR_INSN (insn);
14755 if (insn != BB_END (bb))
14757 rtx next = NEXT_INSN (insn);
14758 while (next && distance < LEA_SEARCH_THRESHOLD)
14760 if (NONDEBUG_INSN_P (next))
14764 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14765 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14766 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14767 && regno0 == DF_REF_REGNO (*use_rec))
14769 /* Return DISTANCE if OP0 is used in memory
14770 address in NEXT. */
14774 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14775 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14776 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14777 && regno0 == DF_REF_REGNO (*def_rec))
14779 /* Return -1 if OP0 is set in NEXT. */
14783 if (next == BB_END (bb))
14785 next = NEXT_INSN (next);
14789 if (distance < LEA_SEARCH_THRESHOLD)
14793 bool simple_loop = false;
14795 FOR_EACH_EDGE (e, ei, bb->succs)
14798 simple_loop = true;
14804 rtx next = BB_HEAD (bb);
14807 && distance < LEA_SEARCH_THRESHOLD)
14809 if (NONDEBUG_INSN_P (next))
14813 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14814 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14815 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14816 && regno0 == DF_REF_REGNO (*use_rec))
14818 /* Return DISTANCE if OP0 is used in memory
14819 address in NEXT. */
14823 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14824 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14825 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14826 && regno0 == DF_REF_REGNO (*def_rec))
14828 /* Return -1 if OP0 is set in NEXT. */
14833 next = NEXT_INSN (next);
14841 /* Define this macro to tune LEA priority vs ADD, it take effect when
14842 there is a dilemma of choicing LEA or ADD
14843 Negative value: ADD is more preferred than LEA
14845 Positive value: LEA is more preferred than ADD*/
14846 #define IX86_LEA_PRIORITY 2
14848 /* Return true if it is ok to optimize an ADD operation to LEA
14849 operation to avoid flag register consumation. For most processors,
14850 ADD is faster than LEA. For the processors like ATOM, if the
14851 destination register of LEA holds an actual address which will be
14852 used soon, LEA is better and otherwise ADD is better. */
14855 ix86_lea_for_add_ok (rtx insn, rtx operands[])
14857 unsigned int regno0 = true_regnum (operands[0]);
14858 unsigned int regno1 = true_regnum (operands[1]);
14859 unsigned int regno2 = true_regnum (operands[2]);
14861 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14862 if (regno0 != regno1 && regno0 != regno2)
14865 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14869 int dist_define, dist_use;
14870 dist_define = distance_non_agu_define (regno1, regno2, insn);
14871 if (dist_define <= 0)
14874 /* If this insn has both backward non-agu dependence and forward
14875 agu dependence, the one with short distance take effect. */
14876 dist_use = distance_agu_use (regno0, insn);
14878 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
14885 /* Return true if destination reg of SET_BODY is shift count of
14889 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14895 /* Retrieve destination of SET_BODY. */
14896 switch (GET_CODE (set_body))
14899 set_dest = SET_DEST (set_body);
14900 if (!set_dest || !REG_P (set_dest))
14904 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14905 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14913 /* Retrieve shift count of USE_BODY. */
14914 switch (GET_CODE (use_body))
14917 shift_rtx = XEXP (use_body, 1);
14920 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14921 if (ix86_dep_by_shift_count_body (set_body,
14922 XVECEXP (use_body, 0, i)))
14930 && (GET_CODE (shift_rtx) == ASHIFT
14931 || GET_CODE (shift_rtx) == LSHIFTRT
14932 || GET_CODE (shift_rtx) == ASHIFTRT
14933 || GET_CODE (shift_rtx) == ROTATE
14934 || GET_CODE (shift_rtx) == ROTATERT))
14936 rtx shift_count = XEXP (shift_rtx, 1);
14938 /* Return true if shift count is dest of SET_BODY. */
14939 if (REG_P (shift_count)
14940 && true_regnum (set_dest) == true_regnum (shift_count))
14947 /* Return true if destination reg of SET_INSN is shift count of
14951 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
14953 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
14954 PATTERN (use_insn));
14957 /* Return TRUE or FALSE depending on whether the unary operator meets the
14958 appropriate constraints. */
14961 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14962 enum machine_mode mode ATTRIBUTE_UNUSED,
14963 rtx operands[2] ATTRIBUTE_UNUSED)
14965 /* If one of operands is memory, source and destination must match. */
14966 if ((MEM_P (operands[0])
14967 || MEM_P (operands[1]))
14968 && ! rtx_equal_p (operands[0], operands[1]))
14973 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14974 are ok, keeping in mind the possible movddup alternative. */
14977 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
14979 if (MEM_P (operands[0]))
14980 return rtx_equal_p (operands[0], operands[1 + high]);
14981 if (MEM_P (operands[1]) && MEM_P (operands[2]))
14982 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
14986 /* Post-reload splitter for converting an SF or DFmode value in an
14987 SSE register into an unsigned SImode. */
14990 ix86_split_convert_uns_si_sse (rtx operands[])
14992 enum machine_mode vecmode;
14993 rtx value, large, zero_or_two31, input, two31, x;
14995 large = operands[1];
14996 zero_or_two31 = operands[2];
14997 input = operands[3];
14998 two31 = operands[4];
14999 vecmode = GET_MODE (large);
15000 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
15002 /* Load up the value into the low element. We must ensure that the other
15003 elements are valid floats -- zero is the easiest such value. */
15006 if (vecmode == V4SFmode)
15007 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
15009 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
15013 input = gen_rtx_REG (vecmode, REGNO (input));
15014 emit_move_insn (value, CONST0_RTX (vecmode));
15015 if (vecmode == V4SFmode)
15016 emit_insn (gen_sse_movss (value, value, input));
15018 emit_insn (gen_sse2_movsd (value, value, input));
15021 emit_move_insn (large, two31);
15022 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
15024 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
15025 emit_insn (gen_rtx_SET (VOIDmode, large, x));
15027 x = gen_rtx_AND (vecmode, zero_or_two31, large);
15028 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
15030 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
15031 emit_insn (gen_rtx_SET (VOIDmode, value, x));
15033 large = gen_rtx_REG (V4SImode, REGNO (large));
15034 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
15036 x = gen_rtx_REG (V4SImode, REGNO (value));
15037 if (vecmode == V4SFmode)
15038 emit_insn (gen_sse2_cvttps2dq (x, value));
15040 emit_insn (gen_sse2_cvttpd2dq (x, value));
15043 emit_insn (gen_xorv4si3 (value, value, large));
15046 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15047 Expects the 64-bit DImode to be supplied in a pair of integral
15048 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15049 -mfpmath=sse, !optimize_size only. */
15052 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15054 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15055 rtx int_xmm, fp_xmm;
15056 rtx biases, exponents;
15059 int_xmm = gen_reg_rtx (V4SImode);
15060 if (TARGET_INTER_UNIT_MOVES)
15061 emit_insn (gen_movdi_to_sse (int_xmm, input));
15062 else if (TARGET_SSE_SPLIT_REGS)
15064 emit_clobber (int_xmm);
15065 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15069 x = gen_reg_rtx (V2DImode);
15070 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15071 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15074 x = gen_rtx_CONST_VECTOR (V4SImode,
15075 gen_rtvec (4, GEN_INT (0x43300000UL),
15076 GEN_INT (0x45300000UL),
15077 const0_rtx, const0_rtx));
15078 exponents = validize_mem (force_const_mem (V4SImode, x));
15080 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15081 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15083 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15084 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15085 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15086 (0x1.0p84 + double(fp_value_hi_xmm)).
15087 Note these exponents differ by 32. */
15089 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15091 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15092 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15093 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15094 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15095 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15096 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15097 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15098 biases = validize_mem (force_const_mem (V2DFmode, biases));
15099 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15101 /* Add the upper and lower DFmode values together. */
15103 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15106 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15107 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15108 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15111 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15114 /* Not used, but eases macroization of patterns. */
15116 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15117 rtx input ATTRIBUTE_UNUSED)
15119 gcc_unreachable ();
15122 /* Convert an unsigned SImode value into a DFmode. Only currently used
15123 for SSE, but applicable anywhere. */
15126 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15128 REAL_VALUE_TYPE TWO31r;
15131 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15132 NULL, 1, OPTAB_DIRECT);
15134 fp = gen_reg_rtx (DFmode);
15135 emit_insn (gen_floatsidf2 (fp, x));
15137 real_ldexp (&TWO31r, &dconst1, 31);
15138 x = const_double_from_real_value (TWO31r, DFmode);
15140 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15142 emit_move_insn (target, x);
15145 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15146 32-bit mode; otherwise we have a direct convert instruction. */
15149 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15151 REAL_VALUE_TYPE TWO32r;
15152 rtx fp_lo, fp_hi, x;
15154 fp_lo = gen_reg_rtx (DFmode);
15155 fp_hi = gen_reg_rtx (DFmode);
15157 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15159 real_ldexp (&TWO32r, &dconst1, 32);
15160 x = const_double_from_real_value (TWO32r, DFmode);
15161 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15163 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15165 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15168 emit_move_insn (target, x);
15171 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15172 For x86_32, -mfpmath=sse, !optimize_size only. */
15174 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15176 REAL_VALUE_TYPE ONE16r;
15177 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15179 real_ldexp (&ONE16r, &dconst1, 16);
15180 x = const_double_from_real_value (ONE16r, SFmode);
15181 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15182 NULL, 0, OPTAB_DIRECT);
15183 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15184 NULL, 0, OPTAB_DIRECT);
15185 fp_hi = gen_reg_rtx (SFmode);
15186 fp_lo = gen_reg_rtx (SFmode);
15187 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15188 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15189 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15191 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15193 if (!rtx_equal_p (target, fp_hi))
15194 emit_move_insn (target, fp_hi);
15197 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15198 then replicate the value for all elements of the vector
15202 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15209 v = gen_rtvec (4, value, value, value, value);
15210 return gen_rtx_CONST_VECTOR (V4SImode, v);
15214 v = gen_rtvec (2, value, value);
15215 return gen_rtx_CONST_VECTOR (V2DImode, v);
15219 v = gen_rtvec (4, value, value, value, value);
15221 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15222 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15223 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15227 v = gen_rtvec (2, value, value);
15229 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15230 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15233 gcc_unreachable ();
15237 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15238 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15239 for an SSE register. If VECT is true, then replicate the mask for
15240 all elements of the vector register. If INVERT is true, then create
15241 a mask excluding the sign bit. */
15244 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15246 enum machine_mode vec_mode, imode;
15247 HOST_WIDE_INT hi, lo;
15252 /* Find the sign bit, sign extended to 2*HWI. */
15258 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15259 lo = 0x80000000, hi = lo < 0;
15265 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15266 if (HOST_BITS_PER_WIDE_INT >= 64)
15267 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15269 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15274 vec_mode = VOIDmode;
15275 if (HOST_BITS_PER_WIDE_INT >= 64)
15278 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15285 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15289 lo = ~lo, hi = ~hi;
15295 mask = immed_double_const (lo, hi, imode);
15297 vec = gen_rtvec (2, v, mask);
15298 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15299 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15306 gcc_unreachable ();
15310 lo = ~lo, hi = ~hi;
15312 /* Force this value into the low part of a fp vector constant. */
15313 mask = immed_double_const (lo, hi, imode);
15314 mask = gen_lowpart (mode, mask);
15316 if (vec_mode == VOIDmode)
15317 return force_reg (mode, mask);
15319 v = ix86_build_const_vector (mode, vect, mask);
15320 return force_reg (vec_mode, v);
15323 /* Generate code for floating point ABS or NEG. */
15326 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15329 rtx mask, set, use, clob, dst, src;
15330 bool use_sse = false;
15331 bool vector_mode = VECTOR_MODE_P (mode);
15332 enum machine_mode elt_mode = mode;
15336 elt_mode = GET_MODE_INNER (mode);
15339 else if (mode == TFmode)
15341 else if (TARGET_SSE_MATH)
15342 use_sse = SSE_FLOAT_MODE_P (mode);
15344 /* NEG and ABS performed with SSE use bitwise mask operations.
15345 Create the appropriate mask now. */
15347 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15356 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15357 set = gen_rtx_SET (VOIDmode, dst, set);
15362 set = gen_rtx_fmt_e (code, mode, src);
15363 set = gen_rtx_SET (VOIDmode, dst, set);
15366 use = gen_rtx_USE (VOIDmode, mask);
15367 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15368 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15369 gen_rtvec (3, set, use, clob)));
15376 /* Expand a copysign operation. Special case operand 0 being a constant. */
15379 ix86_expand_copysign (rtx operands[])
15381 enum machine_mode mode;
15382 rtx dest, op0, op1, mask, nmask;
15384 dest = operands[0];
15388 mode = GET_MODE (dest);
15390 if (GET_CODE (op0) == CONST_DOUBLE)
15392 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15394 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15395 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15397 if (mode == SFmode || mode == DFmode)
15399 enum machine_mode vmode;
15401 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15403 if (op0 == CONST0_RTX (mode))
15404 op0 = CONST0_RTX (vmode);
15407 rtx v = ix86_build_const_vector (mode, false, op0);
15409 op0 = force_reg (vmode, v);
15412 else if (op0 != CONST0_RTX (mode))
15413 op0 = force_reg (mode, op0);
15415 mask = ix86_build_signbit_mask (mode, 0, 0);
15417 if (mode == SFmode)
15418 copysign_insn = gen_copysignsf3_const;
15419 else if (mode == DFmode)
15420 copysign_insn = gen_copysigndf3_const;
15422 copysign_insn = gen_copysigntf3_const;
15424 emit_insn (copysign_insn (dest, op0, op1, mask));
15428 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15430 nmask = ix86_build_signbit_mask (mode, 0, 1);
15431 mask = ix86_build_signbit_mask (mode, 0, 0);
15433 if (mode == SFmode)
15434 copysign_insn = gen_copysignsf3_var;
15435 else if (mode == DFmode)
15436 copysign_insn = gen_copysigndf3_var;
15438 copysign_insn = gen_copysigntf3_var;
15440 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15444 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15445 be a constant, and so has already been expanded into a vector constant. */
15448 ix86_split_copysign_const (rtx operands[])
15450 enum machine_mode mode, vmode;
15451 rtx dest, op0, mask, x;
15453 dest = operands[0];
15455 mask = operands[3];
15457 mode = GET_MODE (dest);
15458 vmode = GET_MODE (mask);
15460 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15461 x = gen_rtx_AND (vmode, dest, mask);
15462 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15464 if (op0 != CONST0_RTX (vmode))
15466 x = gen_rtx_IOR (vmode, dest, op0);
15467 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15471 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15472 so we have to do two masks. */
15475 ix86_split_copysign_var (rtx operands[])
15477 enum machine_mode mode, vmode;
15478 rtx dest, scratch, op0, op1, mask, nmask, x;
15480 dest = operands[0];
15481 scratch = operands[1];
15484 nmask = operands[4];
15485 mask = operands[5];
15487 mode = GET_MODE (dest);
15488 vmode = GET_MODE (mask);
15490 if (rtx_equal_p (op0, op1))
15492 /* Shouldn't happen often (it's useless, obviously), but when it does
15493 we'd generate incorrect code if we continue below. */
15494 emit_move_insn (dest, op0);
15498 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15500 gcc_assert (REGNO (op1) == REGNO (scratch));
15502 x = gen_rtx_AND (vmode, scratch, mask);
15503 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15506 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15507 x = gen_rtx_NOT (vmode, dest);
15508 x = gen_rtx_AND (vmode, x, op0);
15509 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15513 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
15515 x = gen_rtx_AND (vmode, scratch, mask);
15517 else /* alternative 2,4 */
15519 gcc_assert (REGNO (mask) == REGNO (scratch));
15520 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
15521 x = gen_rtx_AND (vmode, scratch, op1);
15523 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15525 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
15527 dest = simplify_gen_subreg (vmode, op0, mode, 0);
15528 x = gen_rtx_AND (vmode, dest, nmask);
15530 else /* alternative 3,4 */
15532 gcc_assert (REGNO (nmask) == REGNO (dest));
15534 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15535 x = gen_rtx_AND (vmode, dest, op0);
15537 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15540 x = gen_rtx_IOR (vmode, dest, scratch);
15541 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15544 /* Return TRUE or FALSE depending on whether the first SET in INSN
15545 has source and destination with matching CC modes, and that the
15546 CC mode is at least as constrained as REQ_MODE. */
15549 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
15552 enum machine_mode set_mode;
15554 set = PATTERN (insn);
15555 if (GET_CODE (set) == PARALLEL)
15556 set = XVECEXP (set, 0, 0);
15557 gcc_assert (GET_CODE (set) == SET);
15558 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15560 set_mode = GET_MODE (SET_DEST (set));
15564 if (req_mode != CCNOmode
15565 && (req_mode != CCmode
15566 || XEXP (SET_SRC (set), 1) != const0_rtx))
15570 if (req_mode == CCGCmode)
15574 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15578 if (req_mode == CCZmode)
15589 gcc_unreachable ();
15592 return GET_MODE (SET_SRC (set)) == set_mode;
15595 /* Generate insn patterns to do an integer compare of OPERANDS. */
15598 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
15600 enum machine_mode cmpmode;
15603 cmpmode = SELECT_CC_MODE (code, op0, op1);
15604 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
15606 /* This is very simple, but making the interface the same as in the
15607 FP case makes the rest of the code easier. */
15608 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
15609 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
15611 /* Return the test that should be put into the flags user, i.e.
15612 the bcc, scc, or cmov instruction. */
15613 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
15616 /* Figure out whether to use ordered or unordered fp comparisons.
15617 Return the appropriate mode to use. */
15620 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
15622 /* ??? In order to make all comparisons reversible, we do all comparisons
15623 non-trapping when compiling for IEEE. Once gcc is able to distinguish
15624 all forms trapping and nontrapping comparisons, we can make inequality
15625 comparisons trapping again, since it results in better code when using
15626 FCOM based compares. */
15627 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
15631 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15633 enum machine_mode mode = GET_MODE (op0);
15635 if (SCALAR_FLOAT_MODE_P (mode))
15637 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15638 return ix86_fp_compare_mode (code);
15643 /* Only zero flag is needed. */
15644 case EQ: /* ZF=0 */
15645 case NE: /* ZF!=0 */
15647 /* Codes needing carry flag. */
15648 case GEU: /* CF=0 */
15649 case LTU: /* CF=1 */
15650 /* Detect overflow checks. They need just the carry flag. */
15651 if (GET_CODE (op0) == PLUS
15652 && rtx_equal_p (op1, XEXP (op0, 0)))
15656 case GTU: /* CF=0 & ZF=0 */
15657 case LEU: /* CF=1 | ZF=1 */
15658 /* Detect overflow checks. They need just the carry flag. */
15659 if (GET_CODE (op0) == MINUS
15660 && rtx_equal_p (op1, XEXP (op0, 0)))
15664 /* Codes possibly doable only with sign flag when
15665 comparing against zero. */
15666 case GE: /* SF=OF or SF=0 */
15667 case LT: /* SF<>OF or SF=1 */
15668 if (op1 == const0_rtx)
15671 /* For other cases Carry flag is not required. */
15673 /* Codes doable only with sign flag when comparing
15674 against zero, but we miss jump instruction for it
15675 so we need to use relational tests against overflow
15676 that thus needs to be zero. */
15677 case GT: /* ZF=0 & SF=OF */
15678 case LE: /* ZF=1 | SF<>OF */
15679 if (op1 == const0_rtx)
15683 /* strcmp pattern do (use flags) and combine may ask us for proper
15688 gcc_unreachable ();
15692 /* Return the fixed registers used for condition codes. */
15695 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15702 /* If two condition code modes are compatible, return a condition code
15703 mode which is compatible with both. Otherwise, return
15706 static enum machine_mode
15707 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
15712 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15715 if ((m1 == CCGCmode && m2 == CCGOCmode)
15716 || (m1 == CCGOCmode && m2 == CCGCmode))
15722 gcc_unreachable ();
15752 /* These are only compatible with themselves, which we already
15759 /* Return a comparison we can do and that it is equivalent to
15760 swap_condition (code) apart possibly from orderedness.
15761 But, never change orderedness if TARGET_IEEE_FP, returning
15762 UNKNOWN in that case if necessary. */
15764 static enum rtx_code
15765 ix86_fp_swap_condition (enum rtx_code code)
15769 case GT: /* GTU - CF=0 & ZF=0 */
15770 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
15771 case GE: /* GEU - CF=0 */
15772 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
15773 case UNLT: /* LTU - CF=1 */
15774 return TARGET_IEEE_FP ? UNKNOWN : GT;
15775 case UNLE: /* LEU - CF=1 | ZF=1 */
15776 return TARGET_IEEE_FP ? UNKNOWN : GE;
15778 return swap_condition (code);
15782 /* Return cost of comparison CODE using the best strategy for performance.
15783 All following functions do use number of instructions as a cost metrics.
15784 In future this should be tweaked to compute bytes for optimize_size and
15785 take into account performance of various instructions on various CPUs. */
15788 ix86_fp_comparison_cost (enum rtx_code code)
15792 /* The cost of code using bit-twiddling on %ah. */
15809 arith_cost = TARGET_IEEE_FP ? 5 : 4;
15813 arith_cost = TARGET_IEEE_FP ? 6 : 4;
15816 gcc_unreachable ();
15819 switch (ix86_fp_comparison_strategy (code))
15821 case IX86_FPCMP_COMI:
15822 return arith_cost > 4 ? 3 : 2;
15823 case IX86_FPCMP_SAHF:
15824 return arith_cost > 4 ? 4 : 3;
15830 /* Return strategy to use for floating-point. We assume that fcomi is always
15831 preferrable where available, since that is also true when looking at size
15832 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15834 enum ix86_fpcmp_strategy
15835 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
15837 /* Do fcomi/sahf based test when profitable. */
15840 return IX86_FPCMP_COMI;
15842 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
15843 return IX86_FPCMP_SAHF;
15845 return IX86_FPCMP_ARITH;
15848 /* Swap, force into registers, or otherwise massage the two operands
15849 to a fp comparison. The operands are updated in place; the new
15850 comparison code is returned. */
15852 static enum rtx_code
15853 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
15855 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
15856 rtx op0 = *pop0, op1 = *pop1;
15857 enum machine_mode op_mode = GET_MODE (op0);
15858 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
15860 /* All of the unordered compare instructions only work on registers.
15861 The same is true of the fcomi compare instructions. The XFmode
15862 compare instructions require registers except when comparing
15863 against zero or when converting operand 1 from fixed point to
15867 && (fpcmp_mode == CCFPUmode
15868 || (op_mode == XFmode
15869 && ! (standard_80387_constant_p (op0) == 1
15870 || standard_80387_constant_p (op1) == 1)
15871 && GET_CODE (op1) != FLOAT)
15872 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
15874 op0 = force_reg (op_mode, op0);
15875 op1 = force_reg (op_mode, op1);
15879 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
15880 things around if they appear profitable, otherwise force op0
15881 into a register. */
15883 if (standard_80387_constant_p (op0) == 0
15885 && ! (standard_80387_constant_p (op1) == 0
15888 enum rtx_code new_code = ix86_fp_swap_condition (code);
15889 if (new_code != UNKNOWN)
15892 tmp = op0, op0 = op1, op1 = tmp;
15898 op0 = force_reg (op_mode, op0);
15900 if (CONSTANT_P (op1))
15902 int tmp = standard_80387_constant_p (op1);
15904 op1 = validize_mem (force_const_mem (op_mode, op1));
15908 op1 = force_reg (op_mode, op1);
15911 op1 = force_reg (op_mode, op1);
15915 /* Try to rearrange the comparison to make it cheaper. */
15916 if (ix86_fp_comparison_cost (code)
15917 > ix86_fp_comparison_cost (swap_condition (code))
15918 && (REG_P (op1) || can_create_pseudo_p ()))
15921 tmp = op0, op0 = op1, op1 = tmp;
15922 code = swap_condition (code);
15924 op0 = force_reg (op_mode, op0);
15932 /* Convert comparison codes we use to represent FP comparison to integer
15933 code that will result in proper branch. Return UNKNOWN if no such code
15937 ix86_fp_compare_code_to_integer (enum rtx_code code)
15966 /* Generate insn patterns to do a floating point compare of OPERANDS. */
15969 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
15971 enum machine_mode fpcmp_mode, intcmp_mode;
15974 fpcmp_mode = ix86_fp_compare_mode (code);
15975 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
15977 /* Do fcomi/sahf based test when profitable. */
15978 switch (ix86_fp_comparison_strategy (code))
15980 case IX86_FPCMP_COMI:
15981 intcmp_mode = fpcmp_mode;
15982 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15983 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15988 case IX86_FPCMP_SAHF:
15989 intcmp_mode = fpcmp_mode;
15990 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15991 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15995 scratch = gen_reg_rtx (HImode);
15996 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
15997 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
16000 case IX86_FPCMP_ARITH:
16001 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
16002 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
16003 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
16005 scratch = gen_reg_rtx (HImode);
16006 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
16008 /* In the unordered case, we have to check C2 for NaN's, which
16009 doesn't happen to work out to anything nice combination-wise.
16010 So do some bit twiddling on the value we've got in AH to come
16011 up with an appropriate set of condition codes. */
16013 intcmp_mode = CCNOmode;
16018 if (code == GT || !TARGET_IEEE_FP)
16020 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16025 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16026 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16027 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
16028 intcmp_mode = CCmode;
16034 if (code == LT && TARGET_IEEE_FP)
16036 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16037 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16038 intcmp_mode = CCmode;
16043 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16049 if (code == GE || !TARGET_IEEE_FP)
16051 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16056 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16057 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16063 if (code == LE && TARGET_IEEE_FP)
16065 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16066 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16067 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16068 intcmp_mode = CCmode;
16073 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16079 if (code == EQ && TARGET_IEEE_FP)
16081 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16082 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16083 intcmp_mode = CCmode;
16088 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16094 if (code == NE && TARGET_IEEE_FP)
16096 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16097 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16103 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16109 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16113 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16118 gcc_unreachable ();
16126 /* Return the test that should be put into the flags user, i.e.
16127 the bcc, scc, or cmov instruction. */
16128 return gen_rtx_fmt_ee (code, VOIDmode,
16129 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16134 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16138 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16139 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16141 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16143 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16144 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16147 ret = ix86_expand_int_compare (code, op0, op1);
16153 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16157 switch (GET_MODE (op0))
16166 tmp = ix86_expand_compare (code, op0, op1);
16167 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16168 gen_rtx_LABEL_REF (VOIDmode, label),
16170 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16177 /* Expand DImode branch into multiple compare+branch. */
16179 rtx lo[2], hi[2], label2;
16180 enum rtx_code code1, code2, code3;
16181 enum machine_mode submode;
16183 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16185 tmp = op0, op0 = op1, op1 = tmp;
16186 code = swap_condition (code);
16188 if (GET_MODE (op0) == DImode)
16190 split_di (&op0, 1, lo+0, hi+0);
16191 split_di (&op1, 1, lo+1, hi+1);
16196 split_ti (&op0, 1, lo+0, hi+0);
16197 split_ti (&op1, 1, lo+1, hi+1);
16201 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16202 avoid two branches. This costs one extra insn, so disable when
16203 optimizing for size. */
16205 if ((code == EQ || code == NE)
16206 && (!optimize_insn_for_size_p ()
16207 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16212 if (hi[1] != const0_rtx)
16213 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16214 NULL_RTX, 0, OPTAB_WIDEN);
16217 if (lo[1] != const0_rtx)
16218 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16219 NULL_RTX, 0, OPTAB_WIDEN);
16221 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16222 NULL_RTX, 0, OPTAB_WIDEN);
16224 ix86_expand_branch (code, tmp, const0_rtx, label);
16228 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16229 op1 is a constant and the low word is zero, then we can just
16230 examine the high word. Similarly for low word -1 and
16231 less-or-equal-than or greater-than. */
16233 if (CONST_INT_P (hi[1]))
16236 case LT: case LTU: case GE: case GEU:
16237 if (lo[1] == const0_rtx)
16239 ix86_expand_branch (code, hi[0], hi[1], label);
16243 case LE: case LEU: case GT: case GTU:
16244 if (lo[1] == constm1_rtx)
16246 ix86_expand_branch (code, hi[0], hi[1], label);
16254 /* Otherwise, we need two or three jumps. */
16256 label2 = gen_label_rtx ();
16259 code2 = swap_condition (code);
16260 code3 = unsigned_condition (code);
16264 case LT: case GT: case LTU: case GTU:
16267 case LE: code1 = LT; code2 = GT; break;
16268 case GE: code1 = GT; code2 = LT; break;
16269 case LEU: code1 = LTU; code2 = GTU; break;
16270 case GEU: code1 = GTU; code2 = LTU; break;
16272 case EQ: code1 = UNKNOWN; code2 = NE; break;
16273 case NE: code2 = UNKNOWN; break;
16276 gcc_unreachable ();
16281 * if (hi(a) < hi(b)) goto true;
16282 * if (hi(a) > hi(b)) goto false;
16283 * if (lo(a) < lo(b)) goto true;
16287 if (code1 != UNKNOWN)
16288 ix86_expand_branch (code1, hi[0], hi[1], label);
16289 if (code2 != UNKNOWN)
16290 ix86_expand_branch (code2, hi[0], hi[1], label2);
16292 ix86_expand_branch (code3, lo[0], lo[1], label);
16294 if (code2 != UNKNOWN)
16295 emit_label (label2);
16300 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16305 /* Split branch based on floating point condition. */
16307 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16308 rtx target1, rtx target2, rtx tmp, rtx pushed)
16313 if (target2 != pc_rtx)
16316 code = reverse_condition_maybe_unordered (code);
16321 condition = ix86_expand_fp_compare (code, op1, op2,
16324 /* Remove pushed operand from stack. */
16326 ix86_free_from_memory (GET_MODE (pushed));
16328 i = emit_jump_insn (gen_rtx_SET
16330 gen_rtx_IF_THEN_ELSE (VOIDmode,
16331 condition, target1, target2)));
16332 if (split_branch_probability >= 0)
16333 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16337 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16341 gcc_assert (GET_MODE (dest) == QImode);
16343 ret = ix86_expand_compare (code, op0, op1);
16344 PUT_MODE (ret, QImode);
16345 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16348 /* Expand comparison setting or clearing carry flag. Return true when
16349 successful and set pop for the operation. */
16351 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16353 enum machine_mode mode =
16354 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16356 /* Do not handle DImode compares that go through special path. */
16357 if (mode == (TARGET_64BIT ? TImode : DImode))
16360 if (SCALAR_FLOAT_MODE_P (mode))
16362 rtx compare_op, compare_seq;
16364 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16366 /* Shortcut: following common codes never translate
16367 into carry flag compares. */
16368 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16369 || code == ORDERED || code == UNORDERED)
16372 /* These comparisons require zero flag; swap operands so they won't. */
16373 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16374 && !TARGET_IEEE_FP)
16379 code = swap_condition (code);
16382 /* Try to expand the comparison and verify that we end up with
16383 carry flag based comparison. This fails to be true only when
16384 we decide to expand comparison using arithmetic that is not
16385 too common scenario. */
16387 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16388 compare_seq = get_insns ();
16391 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16392 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16393 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16395 code = GET_CODE (compare_op);
16397 if (code != LTU && code != GEU)
16400 emit_insn (compare_seq);
16405 if (!INTEGRAL_MODE_P (mode))
16414 /* Convert a==0 into (unsigned)a<1. */
16417 if (op1 != const0_rtx)
16420 code = (code == EQ ? LTU : GEU);
16423 /* Convert a>b into b<a or a>=b-1. */
16426 if (CONST_INT_P (op1))
16428 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16429 /* Bail out on overflow. We still can swap operands but that
16430 would force loading of the constant into register. */
16431 if (op1 == const0_rtx
16432 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16434 code = (code == GTU ? GEU : LTU);
16441 code = (code == GTU ? LTU : GEU);
16445 /* Convert a>=0 into (unsigned)a<0x80000000. */
16448 if (mode == DImode || op1 != const0_rtx)
16450 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16451 code = (code == LT ? GEU : LTU);
16455 if (mode == DImode || op1 != constm1_rtx)
16457 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16458 code = (code == LE ? GEU : LTU);
16464 /* Swapping operands may cause constant to appear as first operand. */
16465 if (!nonimmediate_operand (op0, VOIDmode))
16467 if (!can_create_pseudo_p ())
16469 op0 = force_reg (mode, op0);
16471 *pop = ix86_expand_compare (code, op0, op1);
16472 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16477 ix86_expand_int_movcc (rtx operands[])
16479 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16480 rtx compare_seq, compare_op;
16481 enum machine_mode mode = GET_MODE (operands[0]);
16482 bool sign_bit_compare_p = false;
16483 rtx op0 = XEXP (operands[1], 0);
16484 rtx op1 = XEXP (operands[1], 1);
16487 compare_op = ix86_expand_compare (code, op0, op1);
16488 compare_seq = get_insns ();
16491 compare_code = GET_CODE (compare_op);
16493 if ((op1 == const0_rtx && (code == GE || code == LT))
16494 || (op1 == constm1_rtx && (code == GT || code == LE)))
16495 sign_bit_compare_p = true;
16497 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16498 HImode insns, we'd be swallowed in word prefix ops. */
16500 if ((mode != HImode || TARGET_FAST_PREFIX)
16501 && (mode != (TARGET_64BIT ? TImode : DImode))
16502 && CONST_INT_P (operands[2])
16503 && CONST_INT_P (operands[3]))
16505 rtx out = operands[0];
16506 HOST_WIDE_INT ct = INTVAL (operands[2]);
16507 HOST_WIDE_INT cf = INTVAL (operands[3]);
16508 HOST_WIDE_INT diff;
16511 /* Sign bit compares are better done using shifts than we do by using
16513 if (sign_bit_compare_p
16514 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
16516 /* Detect overlap between destination and compare sources. */
16519 if (!sign_bit_compare_p)
16522 bool fpcmp = false;
16524 compare_code = GET_CODE (compare_op);
16526 flags = XEXP (compare_op, 0);
16528 if (GET_MODE (flags) == CCFPmode
16529 || GET_MODE (flags) == CCFPUmode)
16533 = ix86_fp_compare_code_to_integer (compare_code);
16536 /* To simplify rest of code, restrict to the GEU case. */
16537 if (compare_code == LTU)
16539 HOST_WIDE_INT tmp = ct;
16542 compare_code = reverse_condition (compare_code);
16543 code = reverse_condition (code);
16548 PUT_CODE (compare_op,
16549 reverse_condition_maybe_unordered
16550 (GET_CODE (compare_op)));
16552 PUT_CODE (compare_op,
16553 reverse_condition (GET_CODE (compare_op)));
16557 if (reg_overlap_mentioned_p (out, op0)
16558 || reg_overlap_mentioned_p (out, op1))
16559 tmp = gen_reg_rtx (mode);
16561 if (mode == DImode)
16562 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
16564 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
16565 flags, compare_op));
16569 if (code == GT || code == GE)
16570 code = reverse_condition (code);
16573 HOST_WIDE_INT tmp = ct;
16578 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
16591 tmp = expand_simple_binop (mode, PLUS,
16593 copy_rtx (tmp), 1, OPTAB_DIRECT);
16604 tmp = expand_simple_binop (mode, IOR,
16606 copy_rtx (tmp), 1, OPTAB_DIRECT);
16608 else if (diff == -1 && ct)
16618 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16620 tmp = expand_simple_binop (mode, PLUS,
16621 copy_rtx (tmp), GEN_INT (cf),
16622 copy_rtx (tmp), 1, OPTAB_DIRECT);
16630 * andl cf - ct, dest
16640 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16643 tmp = expand_simple_binop (mode, AND,
16645 gen_int_mode (cf - ct, mode),
16646 copy_rtx (tmp), 1, OPTAB_DIRECT);
16648 tmp = expand_simple_binop (mode, PLUS,
16649 copy_rtx (tmp), GEN_INT (ct),
16650 copy_rtx (tmp), 1, OPTAB_DIRECT);
16653 if (!rtx_equal_p (tmp, out))
16654 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
16661 enum machine_mode cmp_mode = GET_MODE (op0);
16664 tmp = ct, ct = cf, cf = tmp;
16667 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16669 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16671 /* We may be reversing unordered compare to normal compare, that
16672 is not valid in general (we may convert non-trapping condition
16673 to trapping one), however on i386 we currently emit all
16674 comparisons unordered. */
16675 compare_code = reverse_condition_maybe_unordered (compare_code);
16676 code = reverse_condition_maybe_unordered (code);
16680 compare_code = reverse_condition (compare_code);
16681 code = reverse_condition (code);
16685 compare_code = UNKNOWN;
16686 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
16687 && CONST_INT_P (op1))
16689 if (op1 == const0_rtx
16690 && (code == LT || code == GE))
16691 compare_code = code;
16692 else if (op1 == constm1_rtx)
16696 else if (code == GT)
16701 /* Optimize dest = (op0 < 0) ? -1 : cf. */
16702 if (compare_code != UNKNOWN
16703 && GET_MODE (op0) == GET_MODE (out)
16704 && (cf == -1 || ct == -1))
16706 /* If lea code below could be used, only optimize
16707 if it results in a 2 insn sequence. */
16709 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
16710 || diff == 3 || diff == 5 || diff == 9)
16711 || (compare_code == LT && ct == -1)
16712 || (compare_code == GE && cf == -1))
16715 * notl op1 (if necessary)
16723 code = reverse_condition (code);
16726 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16728 out = expand_simple_binop (mode, IOR,
16730 out, 1, OPTAB_DIRECT);
16731 if (out != operands[0])
16732 emit_move_insn (operands[0], out);
16739 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
16740 || diff == 3 || diff == 5 || diff == 9)
16741 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
16743 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
16749 * lea cf(dest*(ct-cf)),dest
16753 * This also catches the degenerate setcc-only case.
16759 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16762 /* On x86_64 the lea instruction operates on Pmode, so we need
16763 to get arithmetics done in proper mode to match. */
16765 tmp = copy_rtx (out);
16769 out1 = copy_rtx (out);
16770 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
16774 tmp = gen_rtx_PLUS (mode, tmp, out1);
16780 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
16783 if (!rtx_equal_p (tmp, out))
16786 out = force_operand (tmp, copy_rtx (out));
16788 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
16790 if (!rtx_equal_p (out, operands[0]))
16791 emit_move_insn (operands[0], copy_rtx (out));
16797 * General case: Jumpful:
16798 * xorl dest,dest cmpl op1, op2
16799 * cmpl op1, op2 movl ct, dest
16800 * setcc dest jcc 1f
16801 * decl dest movl cf, dest
16802 * andl (cf-ct),dest 1:
16805 * Size 20. Size 14.
16807 * This is reasonably steep, but branch mispredict costs are
16808 * high on modern cpus, so consider failing only if optimizing
16812 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16813 && BRANCH_COST (optimize_insn_for_speed_p (),
16818 enum machine_mode cmp_mode = GET_MODE (op0);
16823 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16825 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16827 /* We may be reversing unordered compare to normal compare,
16828 that is not valid in general (we may convert non-trapping
16829 condition to trapping one), however on i386 we currently
16830 emit all comparisons unordered. */
16831 code = reverse_condition_maybe_unordered (code);
16835 code = reverse_condition (code);
16836 if (compare_code != UNKNOWN)
16837 compare_code = reverse_condition (compare_code);
16841 if (compare_code != UNKNOWN)
16843 /* notl op1 (if needed)
16848 For x < 0 (resp. x <= -1) there will be no notl,
16849 so if possible swap the constants to get rid of the
16851 True/false will be -1/0 while code below (store flag
16852 followed by decrement) is 0/-1, so the constants need
16853 to be exchanged once more. */
16855 if (compare_code == GE || !cf)
16857 code = reverse_condition (code);
16862 HOST_WIDE_INT tmp = cf;
16867 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16871 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16873 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
16875 copy_rtx (out), 1, OPTAB_DIRECT);
16878 out = expand_simple_binop (mode, AND, copy_rtx (out),
16879 gen_int_mode (cf - ct, mode),
16880 copy_rtx (out), 1, OPTAB_DIRECT);
16882 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
16883 copy_rtx (out), 1, OPTAB_DIRECT);
16884 if (!rtx_equal_p (out, operands[0]))
16885 emit_move_insn (operands[0], copy_rtx (out));
16891 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16893 /* Try a few things more with specific constants and a variable. */
16896 rtx var, orig_out, out, tmp;
16898 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
16901 /* If one of the two operands is an interesting constant, load a
16902 constant with the above and mask it in with a logical operation. */
16904 if (CONST_INT_P (operands[2]))
16907 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
16908 operands[3] = constm1_rtx, op = and_optab;
16909 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
16910 operands[3] = const0_rtx, op = ior_optab;
16914 else if (CONST_INT_P (operands[3]))
16917 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
16918 operands[2] = constm1_rtx, op = and_optab;
16919 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
16920 operands[2] = const0_rtx, op = ior_optab;
16927 orig_out = operands[0];
16928 tmp = gen_reg_rtx (mode);
16931 /* Recurse to get the constant loaded. */
16932 if (ix86_expand_int_movcc (operands) == 0)
16935 /* Mask in the interesting variable. */
16936 out = expand_binop (mode, op, var, tmp, orig_out, 0,
16938 if (!rtx_equal_p (out, orig_out))
16939 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
16945 * For comparison with above,
16955 if (! nonimmediate_operand (operands[2], mode))
16956 operands[2] = force_reg (mode, operands[2]);
16957 if (! nonimmediate_operand (operands[3], mode))
16958 operands[3] = force_reg (mode, operands[3]);
16960 if (! register_operand (operands[2], VOIDmode)
16962 || ! register_operand (operands[3], VOIDmode)))
16963 operands[2] = force_reg (mode, operands[2]);
16966 && ! register_operand (operands[3], VOIDmode))
16967 operands[3] = force_reg (mode, operands[3]);
16969 emit_insn (compare_seq);
16970 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16971 gen_rtx_IF_THEN_ELSE (mode,
16972 compare_op, operands[2],
16977 /* Swap, force into registers, or otherwise massage the two operands
16978 to an sse comparison with a mask result. Thus we differ a bit from
16979 ix86_prepare_fp_compare_args which expects to produce a flags result.
16981 The DEST operand exists to help determine whether to commute commutative
16982 operators. The POP0/POP1 operands are updated in place. The new
16983 comparison code is returned, or UNKNOWN if not implementable. */
16985 static enum rtx_code
16986 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
16987 rtx *pop0, rtx *pop1)
16995 /* We have no LTGT as an operator. We could implement it with
16996 NE & ORDERED, but this requires an extra temporary. It's
16997 not clear that it's worth it. */
17004 /* These are supported directly. */
17011 /* For commutative operators, try to canonicalize the destination
17012 operand to be first in the comparison - this helps reload to
17013 avoid extra moves. */
17014 if (!dest || !rtx_equal_p (dest, *pop1))
17022 /* These are not supported directly. Swap the comparison operands
17023 to transform into something that is supported. */
17027 code = swap_condition (code);
17031 gcc_unreachable ();
17037 /* Detect conditional moves that exactly match min/max operational
17038 semantics. Note that this is IEEE safe, as long as we don't
17039 interchange the operands.
17041 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17042 and TRUE if the operation is successful and instructions are emitted. */
17045 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17046 rtx cmp_op1, rtx if_true, rtx if_false)
17048 enum machine_mode mode;
17054 else if (code == UNGE)
17057 if_true = if_false;
17063 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17065 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17070 mode = GET_MODE (dest);
17072 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17073 but MODE may be a vector mode and thus not appropriate. */
17074 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17076 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17079 if_true = force_reg (mode, if_true);
17080 v = gen_rtvec (2, if_true, if_false);
17081 tmp = gen_rtx_UNSPEC (mode, v, u);
17085 code = is_min ? SMIN : SMAX;
17086 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17089 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17093 /* Expand an sse vector comparison. Return the register with the result. */
17096 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17097 rtx op_true, rtx op_false)
17099 enum machine_mode mode = GET_MODE (dest);
17102 cmp_op0 = force_reg (mode, cmp_op0);
17103 if (!nonimmediate_operand (cmp_op1, mode))
17104 cmp_op1 = force_reg (mode, cmp_op1);
17107 || reg_overlap_mentioned_p (dest, op_true)
17108 || reg_overlap_mentioned_p (dest, op_false))
17109 dest = gen_reg_rtx (mode);
17111 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17112 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17117 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17118 operations. This is used for both scalar and vector conditional moves. */
17121 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17123 enum machine_mode mode = GET_MODE (dest);
17126 if (op_false == CONST0_RTX (mode))
17128 op_true = force_reg (mode, op_true);
17129 x = gen_rtx_AND (mode, cmp, op_true);
17130 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17132 else if (op_true == CONST0_RTX (mode))
17134 op_false = force_reg (mode, op_false);
17135 x = gen_rtx_NOT (mode, cmp);
17136 x = gen_rtx_AND (mode, x, op_false);
17137 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17139 else if (TARGET_XOP)
17141 rtx pcmov = gen_rtx_SET (mode, dest,
17142 gen_rtx_IF_THEN_ELSE (mode, cmp,
17149 op_true = force_reg (mode, op_true);
17150 op_false = force_reg (mode, op_false);
17152 t2 = gen_reg_rtx (mode);
17154 t3 = gen_reg_rtx (mode);
17158 x = gen_rtx_AND (mode, op_true, cmp);
17159 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17161 x = gen_rtx_NOT (mode, cmp);
17162 x = gen_rtx_AND (mode, x, op_false);
17163 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17165 x = gen_rtx_IOR (mode, t3, t2);
17166 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17170 /* Expand a floating-point conditional move. Return true if successful. */
17173 ix86_expand_fp_movcc (rtx operands[])
17175 enum machine_mode mode = GET_MODE (operands[0]);
17176 enum rtx_code code = GET_CODE (operands[1]);
17177 rtx tmp, compare_op;
17178 rtx op0 = XEXP (operands[1], 0);
17179 rtx op1 = XEXP (operands[1], 1);
17181 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17183 enum machine_mode cmode;
17185 /* Since we've no cmove for sse registers, don't force bad register
17186 allocation just to gain access to it. Deny movcc when the
17187 comparison mode doesn't match the move mode. */
17188 cmode = GET_MODE (op0);
17189 if (cmode == VOIDmode)
17190 cmode = GET_MODE (op1);
17194 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17195 if (code == UNKNOWN)
17198 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17199 operands[2], operands[3]))
17202 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17203 operands[2], operands[3]);
17204 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17208 /* The floating point conditional move instructions don't directly
17209 support conditions resulting from a signed integer comparison. */
17211 compare_op = ix86_expand_compare (code, op0, op1);
17212 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17214 tmp = gen_reg_rtx (QImode);
17215 ix86_expand_setcc (tmp, code, op0, op1);
17217 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17220 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17221 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17222 operands[2], operands[3])));
17227 /* Expand a floating-point vector conditional move; a vcond operation
17228 rather than a movcc operation. */
17231 ix86_expand_fp_vcond (rtx operands[])
17233 enum rtx_code code = GET_CODE (operands[3]);
17236 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17237 &operands[4], &operands[5]);
17238 if (code == UNKNOWN)
17241 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17242 operands[5], operands[1], operands[2]))
17245 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17246 operands[1], operands[2]);
17247 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17251 /* Expand a signed/unsigned integral vector conditional move. */
17254 ix86_expand_int_vcond (rtx operands[])
17256 enum machine_mode mode = GET_MODE (operands[0]);
17257 enum rtx_code code = GET_CODE (operands[3]);
17258 bool negate = false;
17261 cop0 = operands[4];
17262 cop1 = operands[5];
17264 /* XOP supports all of the comparisons on all vector int types. */
17267 /* Canonicalize the comparison to EQ, GT, GTU. */
17278 code = reverse_condition (code);
17284 code = reverse_condition (code);
17290 code = swap_condition (code);
17291 x = cop0, cop0 = cop1, cop1 = x;
17295 gcc_unreachable ();
17298 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17299 if (mode == V2DImode)
17304 /* SSE4.1 supports EQ. */
17305 if (!TARGET_SSE4_1)
17311 /* SSE4.2 supports GT/GTU. */
17312 if (!TARGET_SSE4_2)
17317 gcc_unreachable ();
17321 /* Unsigned parallel compare is not supported by the hardware.
17322 Play some tricks to turn this into a signed comparison
17326 cop0 = force_reg (mode, cop0);
17334 rtx (*gen_sub3) (rtx, rtx, rtx);
17336 /* Subtract (-(INT MAX) - 1) from both operands to make
17338 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17340 gen_sub3 = (mode == V4SImode
17341 ? gen_subv4si3 : gen_subv2di3);
17342 t1 = gen_reg_rtx (mode);
17343 emit_insn (gen_sub3 (t1, cop0, mask));
17345 t2 = gen_reg_rtx (mode);
17346 emit_insn (gen_sub3 (t2, cop1, mask));
17356 /* Perform a parallel unsigned saturating subtraction. */
17357 x = gen_reg_rtx (mode);
17358 emit_insn (gen_rtx_SET (VOIDmode, x,
17359 gen_rtx_US_MINUS (mode, cop0, cop1)));
17362 cop1 = CONST0_RTX (mode);
17368 gcc_unreachable ();
17373 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17374 operands[1+negate], operands[2-negate]);
17376 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17377 operands[2-negate]);
17381 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17382 true if we should do zero extension, else sign extension. HIGH_P is
17383 true if we want the N/2 high elements, else the low elements. */
17386 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17388 enum machine_mode imode = GET_MODE (operands[1]);
17389 rtx (*unpack)(rtx, rtx, rtx);
17396 unpack = gen_vec_interleave_highv16qi;
17398 unpack = gen_vec_interleave_lowv16qi;
17402 unpack = gen_vec_interleave_highv8hi;
17404 unpack = gen_vec_interleave_lowv8hi;
17408 unpack = gen_vec_interleave_highv4si;
17410 unpack = gen_vec_interleave_lowv4si;
17413 gcc_unreachable ();
17416 dest = gen_lowpart (imode, operands[0]);
17419 se = force_reg (imode, CONST0_RTX (imode));
17421 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17422 operands[1], pc_rtx, pc_rtx);
17424 emit_insn (unpack (dest, operands[1], se));
17427 /* This function performs the same task as ix86_expand_sse_unpack,
17428 but with SSE4.1 instructions. */
17431 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17433 enum machine_mode imode = GET_MODE (operands[1]);
17434 rtx (*unpack)(rtx, rtx);
17441 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17443 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
17447 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17449 unpack = gen_sse4_1_sign_extendv4hiv4si2;
17453 unpack = gen_sse4_1_zero_extendv2siv2di2;
17455 unpack = gen_sse4_1_sign_extendv2siv2di2;
17458 gcc_unreachable ();
17461 dest = operands[0];
17464 /* Shift higher 8 bytes to lower 8 bytes. */
17465 src = gen_reg_rtx (imode);
17466 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17467 gen_lowpart (V1TImode, operands[1]),
17473 emit_insn (unpack (dest, src));
17476 /* Expand conditional increment or decrement using adb/sbb instructions.
17477 The default case using setcc followed by the conditional move can be
17478 done by generic code. */
17480 ix86_expand_int_addcc (rtx operands[])
17482 enum rtx_code code = GET_CODE (operands[1]);
17484 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17486 rtx val = const0_rtx;
17487 bool fpcmp = false;
17488 enum machine_mode mode;
17489 rtx op0 = XEXP (operands[1], 0);
17490 rtx op1 = XEXP (operands[1], 1);
17492 if (operands[3] != const1_rtx
17493 && operands[3] != constm1_rtx)
17495 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17497 code = GET_CODE (compare_op);
17499 flags = XEXP (compare_op, 0);
17501 if (GET_MODE (flags) == CCFPmode
17502 || GET_MODE (flags) == CCFPUmode)
17505 code = ix86_fp_compare_code_to_integer (code);
17512 PUT_CODE (compare_op,
17513 reverse_condition_maybe_unordered
17514 (GET_CODE (compare_op)));
17516 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
17519 mode = GET_MODE (operands[0]);
17521 /* Construct either adc or sbb insn. */
17522 if ((code == LTU) == (operands[3] == constm1_rtx))
17527 insn = gen_subqi3_carry;
17530 insn = gen_subhi3_carry;
17533 insn = gen_subsi3_carry;
17536 insn = gen_subdi3_carry;
17539 gcc_unreachable ();
17547 insn = gen_addqi3_carry;
17550 insn = gen_addhi3_carry;
17553 insn = gen_addsi3_carry;
17556 insn = gen_adddi3_carry;
17559 gcc_unreachable ();
17562 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
17568 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
17569 works for floating pointer parameters and nonoffsetable memories.
17570 For pushes, it returns just stack offsets; the values will be saved
17571 in the right order. Maximally three parts are generated. */
17574 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
17579 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
17581 size = (GET_MODE_SIZE (mode) + 4) / 8;
17583 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
17584 gcc_assert (size >= 2 && size <= 4);
17586 /* Optimize constant pool reference to immediates. This is used by fp
17587 moves, that force all constants to memory to allow combining. */
17588 if (MEM_P (operand) && MEM_READONLY_P (operand))
17590 rtx tmp = maybe_get_pool_constant (operand);
17595 if (MEM_P (operand) && !offsettable_memref_p (operand))
17597 /* The only non-offsetable memories we handle are pushes. */
17598 int ok = push_operand (operand, VOIDmode);
17602 operand = copy_rtx (operand);
17603 PUT_MODE (operand, Pmode);
17604 parts[0] = parts[1] = parts[2] = parts[3] = operand;
17608 if (GET_CODE (operand) == CONST_VECTOR)
17610 enum machine_mode imode = int_mode_for_mode (mode);
17611 /* Caution: if we looked through a constant pool memory above,
17612 the operand may actually have a different mode now. That's
17613 ok, since we want to pun this all the way back to an integer. */
17614 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
17615 gcc_assert (operand != NULL);
17621 if (mode == DImode)
17622 split_di (&operand, 1, &parts[0], &parts[1]);
17627 if (REG_P (operand))
17629 gcc_assert (reload_completed);
17630 for (i = 0; i < size; i++)
17631 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
17633 else if (offsettable_memref_p (operand))
17635 operand = adjust_address (operand, SImode, 0);
17636 parts[0] = operand;
17637 for (i = 1; i < size; i++)
17638 parts[i] = adjust_address (operand, SImode, 4 * i);
17640 else if (GET_CODE (operand) == CONST_DOUBLE)
17645 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17649 real_to_target (l, &r, mode);
17650 parts[3] = gen_int_mode (l[3], SImode);
17651 parts[2] = gen_int_mode (l[2], SImode);
17654 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
17655 parts[2] = gen_int_mode (l[2], SImode);
17658 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
17661 gcc_unreachable ();
17663 parts[1] = gen_int_mode (l[1], SImode);
17664 parts[0] = gen_int_mode (l[0], SImode);
17667 gcc_unreachable ();
17672 if (mode == TImode)
17673 split_ti (&operand, 1, &parts[0], &parts[1]);
17674 if (mode == XFmode || mode == TFmode)
17676 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
17677 if (REG_P (operand))
17679 gcc_assert (reload_completed);
17680 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
17681 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
17683 else if (offsettable_memref_p (operand))
17685 operand = adjust_address (operand, DImode, 0);
17686 parts[0] = operand;
17687 parts[1] = adjust_address (operand, upper_mode, 8);
17689 else if (GET_CODE (operand) == CONST_DOUBLE)
17694 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17695 real_to_target (l, &r, mode);
17697 /* Do not use shift by 32 to avoid warning on 32bit systems. */
17698 if (HOST_BITS_PER_WIDE_INT >= 64)
17701 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
17702 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
17705 parts[0] = immed_double_const (l[0], l[1], DImode);
17707 if (upper_mode == SImode)
17708 parts[1] = gen_int_mode (l[2], SImode);
17709 else if (HOST_BITS_PER_WIDE_INT >= 64)
17712 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
17713 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
17716 parts[1] = immed_double_const (l[2], l[3], DImode);
17719 gcc_unreachable ();
17726 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
17727 Return false when normal moves are needed; true when all required
17728 insns have been emitted. Operands 2-4 contain the input values
17729 int the correct order; operands 5-7 contain the output values. */
17732 ix86_split_long_move (rtx operands[])
17737 int collisions = 0;
17738 enum machine_mode mode = GET_MODE (operands[0]);
17739 bool collisionparts[4];
17741 /* The DFmode expanders may ask us to move double.
17742 For 64bit target this is single move. By hiding the fact
17743 here we simplify i386.md splitters. */
17744 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
17746 /* Optimize constant pool reference to immediates. This is used by
17747 fp moves, that force all constants to memory to allow combining. */
17749 if (MEM_P (operands[1])
17750 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
17751 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
17752 operands[1] = get_pool_constant (XEXP (operands[1], 0));
17753 if (push_operand (operands[0], VOIDmode))
17755 operands[0] = copy_rtx (operands[0]);
17756 PUT_MODE (operands[0], Pmode);
17759 operands[0] = gen_lowpart (DImode, operands[0]);
17760 operands[1] = gen_lowpart (DImode, operands[1]);
17761 emit_move_insn (operands[0], operands[1]);
17765 /* The only non-offsettable memory we handle is push. */
17766 if (push_operand (operands[0], VOIDmode))
17769 gcc_assert (!MEM_P (operands[0])
17770 || offsettable_memref_p (operands[0]));
17772 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
17773 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
17775 /* When emitting push, take care for source operands on the stack. */
17776 if (push && MEM_P (operands[1])
17777 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
17779 rtx src_base = XEXP (part[1][nparts - 1], 0);
17781 /* Compensate for the stack decrement by 4. */
17782 if (!TARGET_64BIT && nparts == 3
17783 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
17784 src_base = plus_constant (src_base, 4);
17786 /* src_base refers to the stack pointer and is
17787 automatically decreased by emitted push. */
17788 for (i = 0; i < nparts; i++)
17789 part[1][i] = change_address (part[1][i],
17790 GET_MODE (part[1][i]), src_base);
17793 /* We need to do copy in the right order in case an address register
17794 of the source overlaps the destination. */
17795 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
17799 for (i = 0; i < nparts; i++)
17802 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
17803 if (collisionparts[i])
17807 /* Collision in the middle part can be handled by reordering. */
17808 if (collisions == 1 && nparts == 3 && collisionparts [1])
17810 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17811 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17813 else if (collisions == 1
17815 && (collisionparts [1] || collisionparts [2]))
17817 if (collisionparts [1])
17819 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17820 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17824 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
17825 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
17829 /* If there are more collisions, we can't handle it by reordering.
17830 Do an lea to the last part and use only one colliding move. */
17831 else if (collisions > 1)
17837 base = part[0][nparts - 1];
17839 /* Handle the case when the last part isn't valid for lea.
17840 Happens in 64-bit mode storing the 12-byte XFmode. */
17841 if (GET_MODE (base) != Pmode)
17842 base = gen_rtx_REG (Pmode, REGNO (base));
17844 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
17845 part[1][0] = replace_equiv_address (part[1][0], base);
17846 for (i = 1; i < nparts; i++)
17848 tmp = plus_constant (base, UNITS_PER_WORD * i);
17849 part[1][i] = replace_equiv_address (part[1][i], tmp);
17860 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
17861 emit_insn (gen_addsi3 (stack_pointer_rtx,
17862 stack_pointer_rtx, GEN_INT (-4)));
17863 emit_move_insn (part[0][2], part[1][2]);
17865 else if (nparts == 4)
17867 emit_move_insn (part[0][3], part[1][3]);
17868 emit_move_insn (part[0][2], part[1][2]);
17873 /* In 64bit mode we don't have 32bit push available. In case this is
17874 register, it is OK - we will just use larger counterpart. We also
17875 retype memory - these comes from attempt to avoid REX prefix on
17876 moving of second half of TFmode value. */
17877 if (GET_MODE (part[1][1]) == SImode)
17879 switch (GET_CODE (part[1][1]))
17882 part[1][1] = adjust_address (part[1][1], DImode, 0);
17886 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
17890 gcc_unreachable ();
17893 if (GET_MODE (part[1][0]) == SImode)
17894 part[1][0] = part[1][1];
17897 emit_move_insn (part[0][1], part[1][1]);
17898 emit_move_insn (part[0][0], part[1][0]);
17902 /* Choose correct order to not overwrite the source before it is copied. */
17903 if ((REG_P (part[0][0])
17904 && REG_P (part[1][1])
17905 && (REGNO (part[0][0]) == REGNO (part[1][1])
17907 && REGNO (part[0][0]) == REGNO (part[1][2]))
17909 && REGNO (part[0][0]) == REGNO (part[1][3]))))
17911 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
17913 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
17915 operands[2 + i] = part[0][j];
17916 operands[6 + i] = part[1][j];
17921 for (i = 0; i < nparts; i++)
17923 operands[2 + i] = part[0][i];
17924 operands[6 + i] = part[1][i];
17928 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
17929 if (optimize_insn_for_size_p ())
17931 for (j = 0; j < nparts - 1; j++)
17932 if (CONST_INT_P (operands[6 + j])
17933 && operands[6 + j] != const0_rtx
17934 && REG_P (operands[2 + j]))
17935 for (i = j; i < nparts - 1; i++)
17936 if (CONST_INT_P (operands[7 + i])
17937 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
17938 operands[7 + i] = operands[2 + j];
17941 for (i = 0; i < nparts; i++)
17942 emit_move_insn (operands[2 + i], operands[6 + i]);
17947 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
17948 left shift by a constant, either using a single shift or
17949 a sequence of add instructions. */
17952 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
17956 emit_insn ((mode == DImode
17958 : gen_adddi3) (operand, operand, operand));
17960 else if (!optimize_insn_for_size_p ()
17961 && count * ix86_cost->add <= ix86_cost->shift_const)
17964 for (i=0; i<count; i++)
17966 emit_insn ((mode == DImode
17968 : gen_adddi3) (operand, operand, operand));
17972 emit_insn ((mode == DImode
17974 : gen_ashldi3) (operand, operand, GEN_INT (count)));
17978 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
17980 rtx low[2], high[2];
17982 const int single_width = mode == DImode ? 32 : 64;
17984 if (CONST_INT_P (operands[2]))
17986 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17987 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17989 if (count >= single_width)
17991 emit_move_insn (high[0], low[1]);
17992 emit_move_insn (low[0], const0_rtx);
17994 if (count > single_width)
17995 ix86_expand_ashl_const (high[0], count - single_width, mode);
17999 if (!rtx_equal_p (operands[0], operands[1]))
18000 emit_move_insn (operands[0], operands[1]);
18001 emit_insn ((mode == DImode
18003 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
18004 ix86_expand_ashl_const (low[0], count, mode);
18009 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18011 if (operands[1] == const1_rtx)
18013 /* Assuming we've chosen a QImode capable registers, then 1 << N
18014 can be done with two 32/64-bit shifts, no branches, no cmoves. */
18015 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
18017 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
18019 ix86_expand_clear (low[0]);
18020 ix86_expand_clear (high[0]);
18021 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
18023 d = gen_lowpart (QImode, low[0]);
18024 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18025 s = gen_rtx_EQ (QImode, flags, const0_rtx);
18026 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18028 d = gen_lowpart (QImode, high[0]);
18029 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18030 s = gen_rtx_NE (QImode, flags, const0_rtx);
18031 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18034 /* Otherwise, we can get the same results by manually performing
18035 a bit extract operation on bit 5/6, and then performing the two
18036 shifts. The two methods of getting 0/1 into low/high are exactly
18037 the same size. Avoiding the shift in the bit extract case helps
18038 pentium4 a bit; no one else seems to care much either way. */
18043 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18044 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
18046 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
18047 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18049 emit_insn ((mode == DImode
18051 : gen_lshrdi3) (high[0], high[0],
18052 GEN_INT (mode == DImode ? 5 : 6)));
18053 emit_insn ((mode == DImode
18055 : gen_anddi3) (high[0], high[0], const1_rtx));
18056 emit_move_insn (low[0], high[0]);
18057 emit_insn ((mode == DImode
18059 : gen_xordi3) (low[0], low[0], const1_rtx));
18062 emit_insn ((mode == DImode
18064 : gen_ashldi3) (low[0], low[0], operands[2]));
18065 emit_insn ((mode == DImode
18067 : gen_ashldi3) (high[0], high[0], operands[2]));
18071 if (operands[1] == constm1_rtx)
18073 /* For -1 << N, we can avoid the shld instruction, because we
18074 know that we're shifting 0...31/63 ones into a -1. */
18075 emit_move_insn (low[0], constm1_rtx);
18076 if (optimize_insn_for_size_p ())
18077 emit_move_insn (high[0], low[0]);
18079 emit_move_insn (high[0], constm1_rtx);
18083 if (!rtx_equal_p (operands[0], operands[1]))
18084 emit_move_insn (operands[0], operands[1]);
18086 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18087 emit_insn ((mode == DImode
18089 : gen_x86_64_shld) (high[0], low[0], operands[2]));
18092 emit_insn ((mode == DImode
18094 : gen_ashldi3) (low[0], low[0], operands[2]));
18096 if (TARGET_CMOVE && scratch)
18098 ix86_expand_clear (scratch);
18099 emit_insn ((mode == DImode
18100 ? gen_x86_shiftsi_adj_1
18101 : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
18105 emit_insn ((mode == DImode
18106 ? gen_x86_shiftsi_adj_2
18107 : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
18111 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18113 rtx low[2], high[2];
18115 const int single_width = mode == DImode ? 32 : 64;
18117 if (CONST_INT_P (operands[2]))
18119 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18120 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18122 if (count == single_width * 2 - 1)
18124 emit_move_insn (high[0], high[1]);
18125 emit_insn ((mode == DImode
18127 : gen_ashrdi3) (high[0], high[0],
18128 GEN_INT (single_width - 1)));
18129 emit_move_insn (low[0], high[0]);
18132 else if (count >= single_width)
18134 emit_move_insn (low[0], high[1]);
18135 emit_move_insn (high[0], low[0]);
18136 emit_insn ((mode == DImode
18138 : gen_ashrdi3) (high[0], high[0],
18139 GEN_INT (single_width - 1)));
18140 if (count > single_width)
18141 emit_insn ((mode == DImode
18143 : gen_ashrdi3) (low[0], low[0],
18144 GEN_INT (count - single_width)));
18148 if (!rtx_equal_p (operands[0], operands[1]))
18149 emit_move_insn (operands[0], operands[1]);
18150 emit_insn ((mode == DImode
18152 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18153 emit_insn ((mode == DImode
18155 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
18160 if (!rtx_equal_p (operands[0], operands[1]))
18161 emit_move_insn (operands[0], operands[1]);
18163 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18165 emit_insn ((mode == DImode
18167 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18168 emit_insn ((mode == DImode
18170 : gen_ashrdi3) (high[0], high[0], operands[2]));
18172 if (TARGET_CMOVE && scratch)
18174 emit_move_insn (scratch, high[0]);
18175 emit_insn ((mode == DImode
18177 : gen_ashrdi3) (scratch, scratch,
18178 GEN_INT (single_width - 1)));
18179 emit_insn ((mode == DImode
18180 ? gen_x86_shiftsi_adj_1
18181 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18185 emit_insn ((mode == DImode
18186 ? gen_x86_shiftsi_adj_3
18187 : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
18192 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18194 rtx low[2], high[2];
18196 const int single_width = mode == DImode ? 32 : 64;
18198 if (CONST_INT_P (operands[2]))
18200 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18201 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18203 if (count >= single_width)
18205 emit_move_insn (low[0], high[1]);
18206 ix86_expand_clear (high[0]);
18208 if (count > single_width)
18209 emit_insn ((mode == DImode
18211 : gen_lshrdi3) (low[0], low[0],
18212 GEN_INT (count - single_width)));
18216 if (!rtx_equal_p (operands[0], operands[1]))
18217 emit_move_insn (operands[0], operands[1]);
18218 emit_insn ((mode == DImode
18220 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18221 emit_insn ((mode == DImode
18223 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
18228 if (!rtx_equal_p (operands[0], operands[1]))
18229 emit_move_insn (operands[0], operands[1]);
18231 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18233 emit_insn ((mode == DImode
18235 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18236 emit_insn ((mode == DImode
18238 : gen_lshrdi3) (high[0], high[0], operands[2]));
18240 /* Heh. By reversing the arguments, we can reuse this pattern. */
18241 if (TARGET_CMOVE && scratch)
18243 ix86_expand_clear (scratch);
18244 emit_insn ((mode == DImode
18245 ? gen_x86_shiftsi_adj_1
18246 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18250 emit_insn ((mode == DImode
18251 ? gen_x86_shiftsi_adj_2
18252 : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
18256 /* Predict just emitted jump instruction to be taken with probability PROB. */
18258 predict_jump (int prob)
18260 rtx insn = get_last_insn ();
18261 gcc_assert (JUMP_P (insn));
18262 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18265 /* Helper function for the string operations below. Dest VARIABLE whether
18266 it is aligned to VALUE bytes. If true, jump to the label. */
18268 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18270 rtx label = gen_label_rtx ();
18271 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18272 if (GET_MODE (variable) == DImode)
18273 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18275 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18276 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18279 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18281 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18285 /* Adjust COUNTER by the VALUE. */
18287 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18289 if (GET_MODE (countreg) == DImode)
18290 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
18292 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
18295 /* Zero extend possibly SImode EXP to Pmode register. */
18297 ix86_zero_extend_to_Pmode (rtx exp)
18300 if (GET_MODE (exp) == VOIDmode)
18301 return force_reg (Pmode, exp);
18302 if (GET_MODE (exp) == Pmode)
18303 return copy_to_mode_reg (Pmode, exp);
18304 r = gen_reg_rtx (Pmode);
18305 emit_insn (gen_zero_extendsidi2 (r, exp));
18309 /* Divide COUNTREG by SCALE. */
18311 scale_counter (rtx countreg, int scale)
18317 if (CONST_INT_P (countreg))
18318 return GEN_INT (INTVAL (countreg) / scale);
18319 gcc_assert (REG_P (countreg));
18321 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18322 GEN_INT (exact_log2 (scale)),
18323 NULL, 1, OPTAB_DIRECT);
18327 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18328 DImode for constant loop counts. */
18330 static enum machine_mode
18331 counter_mode (rtx count_exp)
18333 if (GET_MODE (count_exp) != VOIDmode)
18334 return GET_MODE (count_exp);
18335 if (!CONST_INT_P (count_exp))
18337 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18342 /* When SRCPTR is non-NULL, output simple loop to move memory
18343 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18344 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18345 equivalent loop to set memory by VALUE (supposed to be in MODE).
18347 The size is rounded down to whole number of chunk size moved at once.
18348 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18352 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18353 rtx destptr, rtx srcptr, rtx value,
18354 rtx count, enum machine_mode mode, int unroll,
18357 rtx out_label, top_label, iter, tmp;
18358 enum machine_mode iter_mode = counter_mode (count);
18359 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18360 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18366 top_label = gen_label_rtx ();
18367 out_label = gen_label_rtx ();
18368 iter = gen_reg_rtx (iter_mode);
18370 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18371 NULL, 1, OPTAB_DIRECT);
18372 /* Those two should combine. */
18373 if (piece_size == const1_rtx)
18375 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18377 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18379 emit_move_insn (iter, const0_rtx);
18381 emit_label (top_label);
18383 tmp = convert_modes (Pmode, iter_mode, iter, true);
18384 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18385 destmem = change_address (destmem, mode, x_addr);
18389 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18390 srcmem = change_address (srcmem, mode, y_addr);
18392 /* When unrolling for chips that reorder memory reads and writes,
18393 we can save registers by using single temporary.
18394 Also using 4 temporaries is overkill in 32bit mode. */
18395 if (!TARGET_64BIT && 0)
18397 for (i = 0; i < unroll; i++)
18402 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18404 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18406 emit_move_insn (destmem, srcmem);
18412 gcc_assert (unroll <= 4);
18413 for (i = 0; i < unroll; i++)
18415 tmpreg[i] = gen_reg_rtx (mode);
18419 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18421 emit_move_insn (tmpreg[i], srcmem);
18423 for (i = 0; i < unroll; i++)
18428 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18430 emit_move_insn (destmem, tmpreg[i]);
18435 for (i = 0; i < unroll; i++)
18439 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18440 emit_move_insn (destmem, value);
18443 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18444 true, OPTAB_LIB_WIDEN);
18446 emit_move_insn (iter, tmp);
18448 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18450 if (expected_size != -1)
18452 expected_size /= GET_MODE_SIZE (mode) * unroll;
18453 if (expected_size == 0)
18455 else if (expected_size > REG_BR_PROB_BASE)
18456 predict_jump (REG_BR_PROB_BASE - 1);
18458 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18461 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18462 iter = ix86_zero_extend_to_Pmode (iter);
18463 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18464 true, OPTAB_LIB_WIDEN);
18465 if (tmp != destptr)
18466 emit_move_insn (destptr, tmp);
18469 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18470 true, OPTAB_LIB_WIDEN);
18472 emit_move_insn (srcptr, tmp);
18474 emit_label (out_label);
18477 /* Output "rep; mov" instruction.
18478 Arguments have same meaning as for previous function */
18480 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18481 rtx destptr, rtx srcptr,
18483 enum machine_mode mode)
18489 /* If the size is known, it is shorter to use rep movs. */
18490 if (mode == QImode && CONST_INT_P (count)
18491 && !(INTVAL (count) & 3))
18494 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18495 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18496 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
18497 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
18498 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18499 if (mode != QImode)
18501 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18502 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18503 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18504 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
18505 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18506 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
18510 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18511 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
18513 if (CONST_INT_P (count))
18515 count = GEN_INT (INTVAL (count)
18516 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18517 destmem = shallow_copy_rtx (destmem);
18518 srcmem = shallow_copy_rtx (srcmem);
18519 set_mem_size (destmem, count);
18520 set_mem_size (srcmem, count);
18524 if (MEM_SIZE (destmem))
18525 set_mem_size (destmem, NULL_RTX);
18526 if (MEM_SIZE (srcmem))
18527 set_mem_size (srcmem, NULL_RTX);
18529 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
18533 /* Output "rep; stos" instruction.
18534 Arguments have same meaning as for previous function */
18536 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
18537 rtx count, enum machine_mode mode,
18543 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18544 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18545 value = force_reg (mode, gen_lowpart (mode, value));
18546 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18547 if (mode != QImode)
18549 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18550 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18551 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18554 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18555 if (orig_value == const0_rtx && CONST_INT_P (count))
18557 count = GEN_INT (INTVAL (count)
18558 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18559 destmem = shallow_copy_rtx (destmem);
18560 set_mem_size (destmem, count);
18562 else if (MEM_SIZE (destmem))
18563 set_mem_size (destmem, NULL_RTX);
18564 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
18568 emit_strmov (rtx destmem, rtx srcmem,
18569 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
18571 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
18572 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
18573 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18576 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
18578 expand_movmem_epilogue (rtx destmem, rtx srcmem,
18579 rtx destptr, rtx srcptr, rtx count, int max_size)
18582 if (CONST_INT_P (count))
18584 HOST_WIDE_INT countval = INTVAL (count);
18587 if ((countval & 0x10) && max_size > 16)
18591 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18592 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
18595 gcc_unreachable ();
18598 if ((countval & 0x08) && max_size > 8)
18601 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18604 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18605 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
18609 if ((countval & 0x04) && max_size > 4)
18611 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18614 if ((countval & 0x02) && max_size > 2)
18616 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
18619 if ((countval & 0x01) && max_size > 1)
18621 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
18628 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
18629 count, 1, OPTAB_DIRECT);
18630 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
18631 count, QImode, 1, 4);
18635 /* When there are stringops, we can cheaply increase dest and src pointers.
18636 Otherwise we save code size by maintaining offset (zero is readily
18637 available from preceding rep operation) and using x86 addressing modes.
18639 if (TARGET_SINGLE_STRINGOP)
18643 rtx label = ix86_expand_aligntest (count, 4, true);
18644 src = change_address (srcmem, SImode, srcptr);
18645 dest = change_address (destmem, SImode, destptr);
18646 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18647 emit_label (label);
18648 LABEL_NUSES (label) = 1;
18652 rtx label = ix86_expand_aligntest (count, 2, true);
18653 src = change_address (srcmem, HImode, srcptr);
18654 dest = change_address (destmem, HImode, destptr);
18655 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18656 emit_label (label);
18657 LABEL_NUSES (label) = 1;
18661 rtx label = ix86_expand_aligntest (count, 1, true);
18662 src = change_address (srcmem, QImode, srcptr);
18663 dest = change_address (destmem, QImode, destptr);
18664 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18665 emit_label (label);
18666 LABEL_NUSES (label) = 1;
18671 rtx offset = force_reg (Pmode, const0_rtx);
18676 rtx label = ix86_expand_aligntest (count, 4, true);
18677 src = change_address (srcmem, SImode, srcptr);
18678 dest = change_address (destmem, SImode, destptr);
18679 emit_move_insn (dest, src);
18680 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
18681 true, OPTAB_LIB_WIDEN);
18683 emit_move_insn (offset, tmp);
18684 emit_label (label);
18685 LABEL_NUSES (label) = 1;
18689 rtx label = ix86_expand_aligntest (count, 2, true);
18690 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18691 src = change_address (srcmem, HImode, tmp);
18692 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18693 dest = change_address (destmem, HImode, tmp);
18694 emit_move_insn (dest, src);
18695 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
18696 true, OPTAB_LIB_WIDEN);
18698 emit_move_insn (offset, tmp);
18699 emit_label (label);
18700 LABEL_NUSES (label) = 1;
18704 rtx label = ix86_expand_aligntest (count, 1, true);
18705 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18706 src = change_address (srcmem, QImode, tmp);
18707 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18708 dest = change_address (destmem, QImode, tmp);
18709 emit_move_insn (dest, src);
18710 emit_label (label);
18711 LABEL_NUSES (label) = 1;
18716 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18718 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
18719 rtx count, int max_size)
18722 expand_simple_binop (counter_mode (count), AND, count,
18723 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
18724 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
18725 gen_lowpart (QImode, value), count, QImode,
18729 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18731 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
18735 if (CONST_INT_P (count))
18737 HOST_WIDE_INT countval = INTVAL (count);
18740 if ((countval & 0x10) && max_size > 16)
18744 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18745 emit_insn (gen_strset (destptr, dest, value));
18746 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
18747 emit_insn (gen_strset (destptr, dest, value));
18750 gcc_unreachable ();
18753 if ((countval & 0x08) && max_size > 8)
18757 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18758 emit_insn (gen_strset (destptr, dest, value));
18762 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18763 emit_insn (gen_strset (destptr, dest, value));
18764 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
18765 emit_insn (gen_strset (destptr, dest, value));
18769 if ((countval & 0x04) && max_size > 4)
18771 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18772 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18775 if ((countval & 0x02) && max_size > 2)
18777 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
18778 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18781 if ((countval & 0x01) && max_size > 1)
18783 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
18784 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18791 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
18796 rtx label = ix86_expand_aligntest (count, 16, true);
18799 dest = change_address (destmem, DImode, destptr);
18800 emit_insn (gen_strset (destptr, dest, value));
18801 emit_insn (gen_strset (destptr, dest, value));
18805 dest = change_address (destmem, SImode, destptr);
18806 emit_insn (gen_strset (destptr, dest, value));
18807 emit_insn (gen_strset (destptr, dest, value));
18808 emit_insn (gen_strset (destptr, dest, value));
18809 emit_insn (gen_strset (destptr, dest, value));
18811 emit_label (label);
18812 LABEL_NUSES (label) = 1;
18816 rtx label = ix86_expand_aligntest (count, 8, true);
18819 dest = change_address (destmem, DImode, destptr);
18820 emit_insn (gen_strset (destptr, dest, value));
18824 dest = change_address (destmem, SImode, destptr);
18825 emit_insn (gen_strset (destptr, dest, value));
18826 emit_insn (gen_strset (destptr, dest, value));
18828 emit_label (label);
18829 LABEL_NUSES (label) = 1;
18833 rtx label = ix86_expand_aligntest (count, 4, true);
18834 dest = change_address (destmem, SImode, destptr);
18835 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18836 emit_label (label);
18837 LABEL_NUSES (label) = 1;
18841 rtx label = ix86_expand_aligntest (count, 2, true);
18842 dest = change_address (destmem, HImode, destptr);
18843 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18844 emit_label (label);
18845 LABEL_NUSES (label) = 1;
18849 rtx label = ix86_expand_aligntest (count, 1, true);
18850 dest = change_address (destmem, QImode, destptr);
18851 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18852 emit_label (label);
18853 LABEL_NUSES (label) = 1;
18857 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
18858 DESIRED_ALIGNMENT. */
18860 expand_movmem_prologue (rtx destmem, rtx srcmem,
18861 rtx destptr, rtx srcptr, rtx count,
18862 int align, int desired_alignment)
18864 if (align <= 1 && desired_alignment > 1)
18866 rtx label = ix86_expand_aligntest (destptr, 1, false);
18867 srcmem = change_address (srcmem, QImode, srcptr);
18868 destmem = change_address (destmem, QImode, destptr);
18869 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18870 ix86_adjust_counter (count, 1);
18871 emit_label (label);
18872 LABEL_NUSES (label) = 1;
18874 if (align <= 2 && desired_alignment > 2)
18876 rtx label = ix86_expand_aligntest (destptr, 2, false);
18877 srcmem = change_address (srcmem, HImode, srcptr);
18878 destmem = change_address (destmem, HImode, destptr);
18879 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18880 ix86_adjust_counter (count, 2);
18881 emit_label (label);
18882 LABEL_NUSES (label) = 1;
18884 if (align <= 4 && desired_alignment > 4)
18886 rtx label = ix86_expand_aligntest (destptr, 4, false);
18887 srcmem = change_address (srcmem, SImode, srcptr);
18888 destmem = change_address (destmem, SImode, destptr);
18889 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18890 ix86_adjust_counter (count, 4);
18891 emit_label (label);
18892 LABEL_NUSES (label) = 1;
18894 gcc_assert (desired_alignment <= 8);
18897 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
18898 ALIGN_BYTES is how many bytes need to be copied. */
18900 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
18901 int desired_align, int align_bytes)
18904 rtx src_size, dst_size;
18906 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
18907 if (src_align_bytes >= 0)
18908 src_align_bytes = desired_align - src_align_bytes;
18909 src_size = MEM_SIZE (src);
18910 dst_size = MEM_SIZE (dst);
18911 if (align_bytes & 1)
18913 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18914 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
18916 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18918 if (align_bytes & 2)
18920 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18921 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
18922 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18923 set_mem_align (dst, 2 * BITS_PER_UNIT);
18924 if (src_align_bytes >= 0
18925 && (src_align_bytes & 1) == (align_bytes & 1)
18926 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
18927 set_mem_align (src, 2 * BITS_PER_UNIT);
18929 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18931 if (align_bytes & 4)
18933 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18934 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
18935 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18936 set_mem_align (dst, 4 * BITS_PER_UNIT);
18937 if (src_align_bytes >= 0)
18939 unsigned int src_align = 0;
18940 if ((src_align_bytes & 3) == (align_bytes & 3))
18942 else if ((src_align_bytes & 1) == (align_bytes & 1))
18944 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18945 set_mem_align (src, src_align * BITS_PER_UNIT);
18948 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18950 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18951 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
18952 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18953 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18954 if (src_align_bytes >= 0)
18956 unsigned int src_align = 0;
18957 if ((src_align_bytes & 7) == (align_bytes & 7))
18959 else if ((src_align_bytes & 3) == (align_bytes & 3))
18961 else if ((src_align_bytes & 1) == (align_bytes & 1))
18963 if (src_align > (unsigned int) desired_align)
18964 src_align = desired_align;
18965 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18966 set_mem_align (src, src_align * BITS_PER_UNIT);
18969 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18971 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
18976 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
18977 DESIRED_ALIGNMENT. */
18979 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
18980 int align, int desired_alignment)
18982 if (align <= 1 && desired_alignment > 1)
18984 rtx label = ix86_expand_aligntest (destptr, 1, false);
18985 destmem = change_address (destmem, QImode, destptr);
18986 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
18987 ix86_adjust_counter (count, 1);
18988 emit_label (label);
18989 LABEL_NUSES (label) = 1;
18991 if (align <= 2 && desired_alignment > 2)
18993 rtx label = ix86_expand_aligntest (destptr, 2, false);
18994 destmem = change_address (destmem, HImode, destptr);
18995 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
18996 ix86_adjust_counter (count, 2);
18997 emit_label (label);
18998 LABEL_NUSES (label) = 1;
19000 if (align <= 4 && desired_alignment > 4)
19002 rtx label = ix86_expand_aligntest (destptr, 4, false);
19003 destmem = change_address (destmem, SImode, destptr);
19004 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
19005 ix86_adjust_counter (count, 4);
19006 emit_label (label);
19007 LABEL_NUSES (label) = 1;
19009 gcc_assert (desired_alignment <= 8);
19012 /* Set enough from DST to align DST known to by aligned by ALIGN to
19013 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
19015 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
19016 int desired_align, int align_bytes)
19019 rtx dst_size = MEM_SIZE (dst);
19020 if (align_bytes & 1)
19022 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19024 emit_insn (gen_strset (destreg, dst,
19025 gen_lowpart (QImode, value)));
19027 if (align_bytes & 2)
19029 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19030 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19031 set_mem_align (dst, 2 * BITS_PER_UNIT);
19033 emit_insn (gen_strset (destreg, dst,
19034 gen_lowpart (HImode, value)));
19036 if (align_bytes & 4)
19038 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19039 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19040 set_mem_align (dst, 4 * BITS_PER_UNIT);
19042 emit_insn (gen_strset (destreg, dst,
19043 gen_lowpart (SImode, value)));
19045 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19046 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19047 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19049 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19053 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19054 static enum stringop_alg
19055 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19056 int *dynamic_check)
19058 const struct stringop_algs * algs;
19059 bool optimize_for_speed;
19060 /* Algorithms using the rep prefix want at least edi and ecx;
19061 additionally, memset wants eax and memcpy wants esi. Don't
19062 consider such algorithms if the user has appropriated those
19063 registers for their own purposes. */
19064 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19066 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19068 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19069 || (alg != rep_prefix_1_byte \
19070 && alg != rep_prefix_4_byte \
19071 && alg != rep_prefix_8_byte))
19072 const struct processor_costs *cost;
19074 /* Even if the string operation call is cold, we still might spend a lot
19075 of time processing large blocks. */
19076 if (optimize_function_for_size_p (cfun)
19077 || (optimize_insn_for_size_p ()
19078 && expected_size != -1 && expected_size < 256))
19079 optimize_for_speed = false;
19081 optimize_for_speed = true;
19083 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19085 *dynamic_check = -1;
19087 algs = &cost->memset[TARGET_64BIT != 0];
19089 algs = &cost->memcpy[TARGET_64BIT != 0];
19090 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19091 return stringop_alg;
19092 /* rep; movq or rep; movl is the smallest variant. */
19093 else if (!optimize_for_speed)
19095 if (!count || (count & 3))
19096 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19098 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19100 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19102 else if (expected_size != -1 && expected_size < 4)
19103 return loop_1_byte;
19104 else if (expected_size != -1)
19107 enum stringop_alg alg = libcall;
19108 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
19110 /* We get here if the algorithms that were not libcall-based
19111 were rep-prefix based and we are unable to use rep prefixes
19112 based on global register usage. Break out of the loop and
19113 use the heuristic below. */
19114 if (algs->size[i].max == 0)
19116 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19118 enum stringop_alg candidate = algs->size[i].alg;
19120 if (candidate != libcall && ALG_USABLE_P (candidate))
19122 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19123 last non-libcall inline algorithm. */
19124 if (TARGET_INLINE_ALL_STRINGOPS)
19126 /* When the current size is best to be copied by a libcall,
19127 but we are still forced to inline, run the heuristic below
19128 that will pick code for medium sized blocks. */
19129 if (alg != libcall)
19133 else if (ALG_USABLE_P (candidate))
19137 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19139 /* When asked to inline the call anyway, try to pick meaningful choice.
19140 We look for maximal size of block that is faster to copy by hand and
19141 take blocks of at most of that size guessing that average size will
19142 be roughly half of the block.
19144 If this turns out to be bad, we might simply specify the preferred
19145 choice in ix86_costs. */
19146 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19147 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19150 enum stringop_alg alg;
19152 bool any_alg_usable_p = true;
19154 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
19156 enum stringop_alg candidate = algs->size[i].alg;
19157 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19159 if (candidate != libcall && candidate
19160 && ALG_USABLE_P (candidate))
19161 max = algs->size[i].max;
19163 /* If there aren't any usable algorithms, then recursing on
19164 smaller sizes isn't going to find anything. Just return the
19165 simple byte-at-a-time copy loop. */
19166 if (!any_alg_usable_p)
19168 /* Pick something reasonable. */
19169 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19170 *dynamic_check = 128;
19171 return loop_1_byte;
19175 alg = decide_alg (count, max / 2, memset, dynamic_check);
19176 gcc_assert (*dynamic_check == -1);
19177 gcc_assert (alg != libcall);
19178 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19179 *dynamic_check = max;
19182 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19183 #undef ALG_USABLE_P
19186 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19187 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19189 decide_alignment (int align,
19190 enum stringop_alg alg,
19193 int desired_align = 0;
19197 gcc_unreachable ();
19199 case unrolled_loop:
19200 desired_align = GET_MODE_SIZE (Pmode);
19202 case rep_prefix_8_byte:
19205 case rep_prefix_4_byte:
19206 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19207 copying whole cacheline at once. */
19208 if (TARGET_PENTIUMPRO)
19213 case rep_prefix_1_byte:
19214 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19215 copying whole cacheline at once. */
19216 if (TARGET_PENTIUMPRO)
19230 if (desired_align < align)
19231 desired_align = align;
19232 if (expected_size != -1 && expected_size < 4)
19233 desired_align = align;
19234 return desired_align;
19237 /* Return the smallest power of 2 greater than VAL. */
19239 smallest_pow2_greater_than (int val)
19247 /* Expand string move (memcpy) operation. Use i386 string operations when
19248 profitable. expand_setmem contains similar code. The code depends upon
19249 architecture, block size and alignment, but always has the same
19252 1) Prologue guard: Conditional that jumps up to epilogues for small
19253 blocks that can be handled by epilogue alone. This is faster but
19254 also needed for correctness, since prologue assume the block is larger
19255 than the desired alignment.
19257 Optional dynamic check for size and libcall for large
19258 blocks is emitted here too, with -minline-stringops-dynamically.
19260 2) Prologue: copy first few bytes in order to get destination aligned
19261 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19262 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19263 We emit either a jump tree on power of two sized blocks, or a byte loop.
19265 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19266 with specified algorithm.
19268 4) Epilogue: code copying tail of the block that is too small to be
19269 handled by main body (or up to size guarded by prologue guard). */
19272 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19273 rtx expected_align_exp, rtx expected_size_exp)
19279 rtx jump_around_label = NULL;
19280 HOST_WIDE_INT align = 1;
19281 unsigned HOST_WIDE_INT count = 0;
19282 HOST_WIDE_INT expected_size = -1;
19283 int size_needed = 0, epilogue_size_needed;
19284 int desired_align = 0, align_bytes = 0;
19285 enum stringop_alg alg;
19287 bool need_zero_guard = false;
19289 if (CONST_INT_P (align_exp))
19290 align = INTVAL (align_exp);
19291 /* i386 can do misaligned access on reasonably increased cost. */
19292 if (CONST_INT_P (expected_align_exp)
19293 && INTVAL (expected_align_exp) > align)
19294 align = INTVAL (expected_align_exp);
19295 /* ALIGN is the minimum of destination and source alignment, but we care here
19296 just about destination alignment. */
19297 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19298 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19300 if (CONST_INT_P (count_exp))
19301 count = expected_size = INTVAL (count_exp);
19302 if (CONST_INT_P (expected_size_exp) && count == 0)
19303 expected_size = INTVAL (expected_size_exp);
19305 /* Make sure we don't need to care about overflow later on. */
19306 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19309 /* Step 0: Decide on preferred algorithm, desired alignment and
19310 size of chunks to be copied by main loop. */
19312 alg = decide_alg (count, expected_size, false, &dynamic_check);
19313 desired_align = decide_alignment (align, alg, expected_size);
19315 if (!TARGET_ALIGN_STRINGOPS)
19316 align = desired_align;
19318 if (alg == libcall)
19320 gcc_assert (alg != no_stringop);
19322 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19323 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19324 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19329 gcc_unreachable ();
19331 need_zero_guard = true;
19332 size_needed = GET_MODE_SIZE (Pmode);
19334 case unrolled_loop:
19335 need_zero_guard = true;
19336 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19338 case rep_prefix_8_byte:
19341 case rep_prefix_4_byte:
19344 case rep_prefix_1_byte:
19348 need_zero_guard = true;
19353 epilogue_size_needed = size_needed;
19355 /* Step 1: Prologue guard. */
19357 /* Alignment code needs count to be in register. */
19358 if (CONST_INT_P (count_exp) && desired_align > align)
19360 if (INTVAL (count_exp) > desired_align
19361 && INTVAL (count_exp) > size_needed)
19364 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19365 if (align_bytes <= 0)
19368 align_bytes = desired_align - align_bytes;
19370 if (align_bytes == 0)
19371 count_exp = force_reg (counter_mode (count_exp), count_exp);
19373 gcc_assert (desired_align >= 1 && align >= 1);
19375 /* Ensure that alignment prologue won't copy past end of block. */
19376 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19378 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19379 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19380 Make sure it is power of 2. */
19381 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19385 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19387 /* If main algorithm works on QImode, no epilogue is needed.
19388 For small sizes just don't align anything. */
19389 if (size_needed == 1)
19390 desired_align = align;
19397 label = gen_label_rtx ();
19398 emit_cmp_and_jump_insns (count_exp,
19399 GEN_INT (epilogue_size_needed),
19400 LTU, 0, counter_mode (count_exp), 1, label);
19401 if (expected_size == -1 || expected_size < epilogue_size_needed)
19402 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19404 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19408 /* Emit code to decide on runtime whether library call or inline should be
19410 if (dynamic_check != -1)
19412 if (CONST_INT_P (count_exp))
19414 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19416 emit_block_move_via_libcall (dst, src, count_exp, false);
19417 count_exp = const0_rtx;
19423 rtx hot_label = gen_label_rtx ();
19424 jump_around_label = gen_label_rtx ();
19425 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19426 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19427 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19428 emit_block_move_via_libcall (dst, src, count_exp, false);
19429 emit_jump (jump_around_label);
19430 emit_label (hot_label);
19434 /* Step 2: Alignment prologue. */
19436 if (desired_align > align)
19438 if (align_bytes == 0)
19440 /* Except for the first move in epilogue, we no longer know
19441 constant offset in aliasing info. It don't seems to worth
19442 the pain to maintain it for the first move, so throw away
19444 src = change_address (src, BLKmode, srcreg);
19445 dst = change_address (dst, BLKmode, destreg);
19446 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19451 /* If we know how many bytes need to be stored before dst is
19452 sufficiently aligned, maintain aliasing info accurately. */
19453 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19454 desired_align, align_bytes);
19455 count_exp = plus_constant (count_exp, -align_bytes);
19456 count -= align_bytes;
19458 if (need_zero_guard
19459 && (count < (unsigned HOST_WIDE_INT) size_needed
19460 || (align_bytes == 0
19461 && count < ((unsigned HOST_WIDE_INT) size_needed
19462 + desired_align - align))))
19464 /* It is possible that we copied enough so the main loop will not
19466 gcc_assert (size_needed > 1);
19467 if (label == NULL_RTX)
19468 label = gen_label_rtx ();
19469 emit_cmp_and_jump_insns (count_exp,
19470 GEN_INT (size_needed),
19471 LTU, 0, counter_mode (count_exp), 1, label);
19472 if (expected_size == -1
19473 || expected_size < (desired_align - align) / 2 + size_needed)
19474 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19476 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19479 if (label && size_needed == 1)
19481 emit_label (label);
19482 LABEL_NUSES (label) = 1;
19484 epilogue_size_needed = 1;
19486 else if (label == NULL_RTX)
19487 epilogue_size_needed = size_needed;
19489 /* Step 3: Main loop. */
19495 gcc_unreachable ();
19497 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19498 count_exp, QImode, 1, expected_size);
19501 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19502 count_exp, Pmode, 1, expected_size);
19504 case unrolled_loop:
19505 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
19506 registers for 4 temporaries anyway. */
19507 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19508 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
19511 case rep_prefix_8_byte:
19512 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19515 case rep_prefix_4_byte:
19516 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19519 case rep_prefix_1_byte:
19520 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19524 /* Adjust properly the offset of src and dest memory for aliasing. */
19525 if (CONST_INT_P (count_exp))
19527 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
19528 (count / size_needed) * size_needed);
19529 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19530 (count / size_needed) * size_needed);
19534 src = change_address (src, BLKmode, srcreg);
19535 dst = change_address (dst, BLKmode, destreg);
19538 /* Step 4: Epilogue to copy the remaining bytes. */
19542 /* When the main loop is done, COUNT_EXP might hold original count,
19543 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19544 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19545 bytes. Compensate if needed. */
19547 if (size_needed < epilogue_size_needed)
19550 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19551 GEN_INT (size_needed - 1), count_exp, 1,
19553 if (tmp != count_exp)
19554 emit_move_insn (count_exp, tmp);
19556 emit_label (label);
19557 LABEL_NUSES (label) = 1;
19560 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19561 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
19562 epilogue_size_needed);
19563 if (jump_around_label)
19564 emit_label (jump_around_label);
19568 /* Helper function for memcpy. For QImode value 0xXY produce
19569 0xXYXYXYXY of wide specified by MODE. This is essentially
19570 a * 0x10101010, but we can do slightly better than
19571 synth_mult by unwinding the sequence by hand on CPUs with
19574 promote_duplicated_reg (enum machine_mode mode, rtx val)
19576 enum machine_mode valmode = GET_MODE (val);
19578 int nops = mode == DImode ? 3 : 2;
19580 gcc_assert (mode == SImode || mode == DImode);
19581 if (val == const0_rtx)
19582 return copy_to_mode_reg (mode, const0_rtx);
19583 if (CONST_INT_P (val))
19585 HOST_WIDE_INT v = INTVAL (val) & 255;
19589 if (mode == DImode)
19590 v |= (v << 16) << 16;
19591 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
19594 if (valmode == VOIDmode)
19596 if (valmode != QImode)
19597 val = gen_lowpart (QImode, val);
19598 if (mode == QImode)
19600 if (!TARGET_PARTIAL_REG_STALL)
19602 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
19603 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
19604 <= (ix86_cost->shift_const + ix86_cost->add) * nops
19605 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
19607 rtx reg = convert_modes (mode, QImode, val, true);
19608 tmp = promote_duplicated_reg (mode, const1_rtx);
19609 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
19614 rtx reg = convert_modes (mode, QImode, val, true);
19616 if (!TARGET_PARTIAL_REG_STALL)
19617 if (mode == SImode)
19618 emit_insn (gen_movsi_insv_1 (reg, reg));
19620 emit_insn (gen_movdi_insv_1 (reg, reg));
19623 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
19624 NULL, 1, OPTAB_DIRECT);
19626 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19628 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
19629 NULL, 1, OPTAB_DIRECT);
19630 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19631 if (mode == SImode)
19633 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
19634 NULL, 1, OPTAB_DIRECT);
19635 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19640 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
19641 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
19642 alignment from ALIGN to DESIRED_ALIGN. */
19644 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
19649 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
19650 promoted_val = promote_duplicated_reg (DImode, val);
19651 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
19652 promoted_val = promote_duplicated_reg (SImode, val);
19653 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
19654 promoted_val = promote_duplicated_reg (HImode, val);
19656 promoted_val = val;
19658 return promoted_val;
19661 /* Expand string clear operation (bzero). Use i386 string operations when
19662 profitable. See expand_movmem comment for explanation of individual
19663 steps performed. */
19665 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
19666 rtx expected_align_exp, rtx expected_size_exp)
19671 rtx jump_around_label = NULL;
19672 HOST_WIDE_INT align = 1;
19673 unsigned HOST_WIDE_INT count = 0;
19674 HOST_WIDE_INT expected_size = -1;
19675 int size_needed = 0, epilogue_size_needed;
19676 int desired_align = 0, align_bytes = 0;
19677 enum stringop_alg alg;
19678 rtx promoted_val = NULL;
19679 bool force_loopy_epilogue = false;
19681 bool need_zero_guard = false;
19683 if (CONST_INT_P (align_exp))
19684 align = INTVAL (align_exp);
19685 /* i386 can do misaligned access on reasonably increased cost. */
19686 if (CONST_INT_P (expected_align_exp)
19687 && INTVAL (expected_align_exp) > align)
19688 align = INTVAL (expected_align_exp);
19689 if (CONST_INT_P (count_exp))
19690 count = expected_size = INTVAL (count_exp);
19691 if (CONST_INT_P (expected_size_exp) && count == 0)
19692 expected_size = INTVAL (expected_size_exp);
19694 /* Make sure we don't need to care about overflow later on. */
19695 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19698 /* Step 0: Decide on preferred algorithm, desired alignment and
19699 size of chunks to be copied by main loop. */
19701 alg = decide_alg (count, expected_size, true, &dynamic_check);
19702 desired_align = decide_alignment (align, alg, expected_size);
19704 if (!TARGET_ALIGN_STRINGOPS)
19705 align = desired_align;
19707 if (alg == libcall)
19709 gcc_assert (alg != no_stringop);
19711 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
19712 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19717 gcc_unreachable ();
19719 need_zero_guard = true;
19720 size_needed = GET_MODE_SIZE (Pmode);
19722 case unrolled_loop:
19723 need_zero_guard = true;
19724 size_needed = GET_MODE_SIZE (Pmode) * 4;
19726 case rep_prefix_8_byte:
19729 case rep_prefix_4_byte:
19732 case rep_prefix_1_byte:
19736 need_zero_guard = true;
19740 epilogue_size_needed = size_needed;
19742 /* Step 1: Prologue guard. */
19744 /* Alignment code needs count to be in register. */
19745 if (CONST_INT_P (count_exp) && desired_align > align)
19747 if (INTVAL (count_exp) > desired_align
19748 && INTVAL (count_exp) > size_needed)
19751 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19752 if (align_bytes <= 0)
19755 align_bytes = desired_align - align_bytes;
19757 if (align_bytes == 0)
19759 enum machine_mode mode = SImode;
19760 if (TARGET_64BIT && (count & ~0xffffffff))
19762 count_exp = force_reg (mode, count_exp);
19765 /* Do the cheap promotion to allow better CSE across the
19766 main loop and epilogue (ie one load of the big constant in the
19767 front of all code. */
19768 if (CONST_INT_P (val_exp))
19769 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19770 desired_align, align);
19771 /* Ensure that alignment prologue won't copy past end of block. */
19772 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19774 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19775 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
19776 Make sure it is power of 2. */
19777 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19779 /* To improve performance of small blocks, we jump around the VAL
19780 promoting mode. This mean that if the promoted VAL is not constant,
19781 we might not use it in the epilogue and have to use byte
19783 if (epilogue_size_needed > 2 && !promoted_val)
19784 force_loopy_epilogue = true;
19787 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19789 /* If main algorithm works on QImode, no epilogue is needed.
19790 For small sizes just don't align anything. */
19791 if (size_needed == 1)
19792 desired_align = align;
19799 label = gen_label_rtx ();
19800 emit_cmp_and_jump_insns (count_exp,
19801 GEN_INT (epilogue_size_needed),
19802 LTU, 0, counter_mode (count_exp), 1, label);
19803 if (expected_size == -1 || expected_size <= epilogue_size_needed)
19804 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19806 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19809 if (dynamic_check != -1)
19811 rtx hot_label = gen_label_rtx ();
19812 jump_around_label = gen_label_rtx ();
19813 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19814 LEU, 0, counter_mode (count_exp), 1, hot_label);
19815 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19816 set_storage_via_libcall (dst, count_exp, val_exp, false);
19817 emit_jump (jump_around_label);
19818 emit_label (hot_label);
19821 /* Step 2: Alignment prologue. */
19823 /* Do the expensive promotion once we branched off the small blocks. */
19825 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19826 desired_align, align);
19827 gcc_assert (desired_align >= 1 && align >= 1);
19829 if (desired_align > align)
19831 if (align_bytes == 0)
19833 /* Except for the first move in epilogue, we no longer know
19834 constant offset in aliasing info. It don't seems to worth
19835 the pain to maintain it for the first move, so throw away
19837 dst = change_address (dst, BLKmode, destreg);
19838 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
19843 /* If we know how many bytes need to be stored before dst is
19844 sufficiently aligned, maintain aliasing info accurately. */
19845 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
19846 desired_align, align_bytes);
19847 count_exp = plus_constant (count_exp, -align_bytes);
19848 count -= align_bytes;
19850 if (need_zero_guard
19851 && (count < (unsigned HOST_WIDE_INT) size_needed
19852 || (align_bytes == 0
19853 && count < ((unsigned HOST_WIDE_INT) size_needed
19854 + desired_align - align))))
19856 /* It is possible that we copied enough so the main loop will not
19858 gcc_assert (size_needed > 1);
19859 if (label == NULL_RTX)
19860 label = gen_label_rtx ();
19861 emit_cmp_and_jump_insns (count_exp,
19862 GEN_INT (size_needed),
19863 LTU, 0, counter_mode (count_exp), 1, label);
19864 if (expected_size == -1
19865 || expected_size < (desired_align - align) / 2 + size_needed)
19866 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19868 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19871 if (label && size_needed == 1)
19873 emit_label (label);
19874 LABEL_NUSES (label) = 1;
19876 promoted_val = val_exp;
19877 epilogue_size_needed = 1;
19879 else if (label == NULL_RTX)
19880 epilogue_size_needed = size_needed;
19882 /* Step 3: Main loop. */
19888 gcc_unreachable ();
19890 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19891 count_exp, QImode, 1, expected_size);
19894 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19895 count_exp, Pmode, 1, expected_size);
19897 case unrolled_loop:
19898 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19899 count_exp, Pmode, 4, expected_size);
19901 case rep_prefix_8_byte:
19902 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19905 case rep_prefix_4_byte:
19906 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19909 case rep_prefix_1_byte:
19910 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19914 /* Adjust properly the offset of src and dest memory for aliasing. */
19915 if (CONST_INT_P (count_exp))
19916 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19917 (count / size_needed) * size_needed);
19919 dst = change_address (dst, BLKmode, destreg);
19921 /* Step 4: Epilogue to copy the remaining bytes. */
19925 /* When the main loop is done, COUNT_EXP might hold original count,
19926 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19927 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19928 bytes. Compensate if needed. */
19930 if (size_needed < epilogue_size_needed)
19933 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19934 GEN_INT (size_needed - 1), count_exp, 1,
19936 if (tmp != count_exp)
19937 emit_move_insn (count_exp, tmp);
19939 emit_label (label);
19940 LABEL_NUSES (label) = 1;
19943 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19945 if (force_loopy_epilogue)
19946 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
19947 epilogue_size_needed);
19949 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
19950 epilogue_size_needed);
19952 if (jump_around_label)
19953 emit_label (jump_around_label);
19957 /* Expand the appropriate insns for doing strlen if not just doing
19960 out = result, initialized with the start address
19961 align_rtx = alignment of the address.
19962 scratch = scratch register, initialized with the startaddress when
19963 not aligned, otherwise undefined
19965 This is just the body. It needs the initializations mentioned above and
19966 some address computing at the end. These things are done in i386.md. */
19969 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
19973 rtx align_2_label = NULL_RTX;
19974 rtx align_3_label = NULL_RTX;
19975 rtx align_4_label = gen_label_rtx ();
19976 rtx end_0_label = gen_label_rtx ();
19978 rtx tmpreg = gen_reg_rtx (SImode);
19979 rtx scratch = gen_reg_rtx (SImode);
19983 if (CONST_INT_P (align_rtx))
19984 align = INTVAL (align_rtx);
19986 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
19988 /* Is there a known alignment and is it less than 4? */
19991 rtx scratch1 = gen_reg_rtx (Pmode);
19992 emit_move_insn (scratch1, out);
19993 /* Is there a known alignment and is it not 2? */
19996 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
19997 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
19999 /* Leave just the 3 lower bits. */
20000 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
20001 NULL_RTX, 0, OPTAB_WIDEN);
20003 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20004 Pmode, 1, align_4_label);
20005 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
20006 Pmode, 1, align_2_label);
20007 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
20008 Pmode, 1, align_3_label);
20012 /* Since the alignment is 2, we have to check 2 or 0 bytes;
20013 check if is aligned to 4 - byte. */
20015 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
20016 NULL_RTX, 0, OPTAB_WIDEN);
20018 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20019 Pmode, 1, align_4_label);
20022 mem = change_address (src, QImode, out);
20024 /* Now compare the bytes. */
20026 /* Compare the first n unaligned byte on a byte per byte basis. */
20027 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
20028 QImode, 1, end_0_label);
20030 /* Increment the address. */
20031 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20033 /* Not needed with an alignment of 2 */
20036 emit_label (align_2_label);
20038 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20041 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20043 emit_label (align_3_label);
20046 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20049 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20052 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20053 align this loop. It gives only huge programs, but does not help to
20055 emit_label (align_4_label);
20057 mem = change_address (src, SImode, out);
20058 emit_move_insn (scratch, mem);
20059 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20061 /* This formula yields a nonzero result iff one of the bytes is zero.
20062 This saves three branches inside loop and many cycles. */
20064 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20065 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20066 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20067 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20068 gen_int_mode (0x80808080, SImode)));
20069 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20074 rtx reg = gen_reg_rtx (SImode);
20075 rtx reg2 = gen_reg_rtx (Pmode);
20076 emit_move_insn (reg, tmpreg);
20077 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20079 /* If zero is not in the first two bytes, move two bytes forward. */
20080 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20081 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20082 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20083 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20084 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20087 /* Emit lea manually to avoid clobbering of flags. */
20088 emit_insn (gen_rtx_SET (SImode, reg2,
20089 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20091 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20092 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20093 emit_insn (gen_rtx_SET (VOIDmode, out,
20094 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20100 rtx end_2_label = gen_label_rtx ();
20101 /* Is zero in the first two bytes? */
20103 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20104 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20105 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20106 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20107 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20109 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20110 JUMP_LABEL (tmp) = end_2_label;
20112 /* Not in the first two. Move two bytes forward. */
20113 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20114 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20116 emit_label (end_2_label);
20120 /* Avoid branch in fixing the byte. */
20121 tmpreg = gen_lowpart (QImode, tmpreg);
20122 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20123 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20124 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20125 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20127 emit_label (end_0_label);
20130 /* Expand strlen. */
20133 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20135 rtx addr, scratch1, scratch2, scratch3, scratch4;
20137 /* The generic case of strlen expander is long. Avoid it's
20138 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20140 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20141 && !TARGET_INLINE_ALL_STRINGOPS
20142 && !optimize_insn_for_size_p ()
20143 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20146 addr = force_reg (Pmode, XEXP (src, 0));
20147 scratch1 = gen_reg_rtx (Pmode);
20149 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20150 && !optimize_insn_for_size_p ())
20152 /* Well it seems that some optimizer does not combine a call like
20153 foo(strlen(bar), strlen(bar));
20154 when the move and the subtraction is done here. It does calculate
20155 the length just once when these instructions are done inside of
20156 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20157 often used and I use one fewer register for the lifetime of
20158 output_strlen_unroll() this is better. */
20160 emit_move_insn (out, addr);
20162 ix86_expand_strlensi_unroll_1 (out, src, align);
20164 /* strlensi_unroll_1 returns the address of the zero at the end of
20165 the string, like memchr(), so compute the length by subtracting
20166 the start address. */
20167 emit_insn (ix86_gen_sub3 (out, out, addr));
20173 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20174 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20177 scratch2 = gen_reg_rtx (Pmode);
20178 scratch3 = gen_reg_rtx (Pmode);
20179 scratch4 = force_reg (Pmode, constm1_rtx);
20181 emit_move_insn (scratch3, addr);
20182 eoschar = force_reg (QImode, eoschar);
20184 src = replace_equiv_address_nv (src, scratch3);
20186 /* If .md starts supporting :P, this can be done in .md. */
20187 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20188 scratch4), UNSPEC_SCAS);
20189 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20190 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20191 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20196 /* For given symbol (function) construct code to compute address of it's PLT
20197 entry in large x86-64 PIC model. */
20199 construct_plt_address (rtx symbol)
20201 rtx tmp = gen_reg_rtx (Pmode);
20202 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20204 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20205 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20207 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20208 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20213 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20215 rtx pop, int sibcall)
20217 rtx use = NULL, call;
20219 if (pop == const0_rtx)
20221 gcc_assert (!TARGET_64BIT || !pop);
20223 if (TARGET_MACHO && !TARGET_64BIT)
20226 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20227 fnaddr = machopic_indirect_call_target (fnaddr);
20232 /* Static functions and indirect calls don't need the pic register. */
20233 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20234 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20235 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20236 use_reg (&use, pic_offset_table_rtx);
20239 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20241 rtx al = gen_rtx_REG (QImode, AX_REG);
20242 emit_move_insn (al, callarg2);
20243 use_reg (&use, al);
20246 if (ix86_cmodel == CM_LARGE_PIC
20248 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20249 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20250 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20252 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20253 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20255 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20256 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20259 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20261 call = gen_rtx_SET (VOIDmode, retval, call);
20264 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20265 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20266 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20269 && ix86_cfun_abi () == MS_ABI
20270 && (!callarg2 || INTVAL (callarg2) != -2))
20272 /* We need to represent that SI and DI registers are clobbered
20274 static int clobbered_registers[] = {
20275 XMM6_REG, XMM7_REG, XMM8_REG,
20276 XMM9_REG, XMM10_REG, XMM11_REG,
20277 XMM12_REG, XMM13_REG, XMM14_REG,
20278 XMM15_REG, SI_REG, DI_REG
20281 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20282 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20283 UNSPEC_MS_TO_SYSV_CALL);
20287 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20288 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20291 (SSE_REGNO_P (clobbered_registers[i])
20293 clobbered_registers[i]));
20295 call = gen_rtx_PARALLEL (VOIDmode,
20296 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20300 call = emit_call_insn (call);
20302 CALL_INSN_FUNCTION_USAGE (call) = use;
20306 /* Clear stack slot assignments remembered from previous functions.
20307 This is called from INIT_EXPANDERS once before RTL is emitted for each
20310 static struct machine_function *
20311 ix86_init_machine_status (void)
20313 struct machine_function *f;
20315 f = ggc_alloc_cleared_machine_function ();
20316 f->use_fast_prologue_epilogue_nregs = -1;
20317 f->tls_descriptor_call_expanded_p = 0;
20318 f->call_abi = ix86_abi;
20323 /* Return a MEM corresponding to a stack slot with mode MODE.
20324 Allocate a new slot if necessary.
20326 The RTL for a function can have several slots available: N is
20327 which slot to use. */
20330 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20332 struct stack_local_entry *s;
20334 gcc_assert (n < MAX_386_STACK_LOCALS);
20336 /* Virtual slot is valid only before vregs are instantiated. */
20337 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20339 for (s = ix86_stack_locals; s; s = s->next)
20340 if (s->mode == mode && s->n == n)
20341 return copy_rtx (s->rtl);
20343 s = ggc_alloc_stack_local_entry ();
20346 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20348 s->next = ix86_stack_locals;
20349 ix86_stack_locals = s;
20353 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20355 static GTY(()) rtx ix86_tls_symbol;
20357 ix86_tls_get_addr (void)
20360 if (!ix86_tls_symbol)
20362 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20363 (TARGET_ANY_GNU_TLS
20365 ? "___tls_get_addr"
20366 : "__tls_get_addr");
20369 return ix86_tls_symbol;
20372 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20374 static GTY(()) rtx ix86_tls_module_base_symbol;
20376 ix86_tls_module_base (void)
20379 if (!ix86_tls_module_base_symbol)
20381 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20382 "_TLS_MODULE_BASE_");
20383 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20384 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20387 return ix86_tls_module_base_symbol;
20390 /* Calculate the length of the memory address in the instruction
20391 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20394 memory_address_length (rtx addr)
20396 struct ix86_address parts;
20397 rtx base, index, disp;
20401 if (GET_CODE (addr) == PRE_DEC
20402 || GET_CODE (addr) == POST_INC
20403 || GET_CODE (addr) == PRE_MODIFY
20404 || GET_CODE (addr) == POST_MODIFY)
20407 ok = ix86_decompose_address (addr, &parts);
20410 if (parts.base && GET_CODE (parts.base) == SUBREG)
20411 parts.base = SUBREG_REG (parts.base);
20412 if (parts.index && GET_CODE (parts.index) == SUBREG)
20413 parts.index = SUBREG_REG (parts.index);
20416 index = parts.index;
20421 - esp as the base always wants an index,
20422 - ebp as the base always wants a displacement,
20423 - r12 as the base always wants an index,
20424 - r13 as the base always wants a displacement. */
20426 /* Register Indirect. */
20427 if (base && !index && !disp)
20429 /* esp (for its index) and ebp (for its displacement) need
20430 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20433 && (addr == arg_pointer_rtx
20434 || addr == frame_pointer_rtx
20435 || REGNO (addr) == SP_REG
20436 || REGNO (addr) == BP_REG
20437 || REGNO (addr) == R12_REG
20438 || REGNO (addr) == R13_REG))
20442 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20443 is not disp32, but disp32(%rip), so for disp32
20444 SIB byte is needed, unless print_operand_address
20445 optimizes it into disp32(%rip) or (%rip) is implied
20447 else if (disp && !base && !index)
20454 if (GET_CODE (disp) == CONST)
20455 symbol = XEXP (disp, 0);
20456 if (GET_CODE (symbol) == PLUS
20457 && CONST_INT_P (XEXP (symbol, 1)))
20458 symbol = XEXP (symbol, 0);
20460 if (GET_CODE (symbol) != LABEL_REF
20461 && (GET_CODE (symbol) != SYMBOL_REF
20462 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20463 && (GET_CODE (symbol) != UNSPEC
20464 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20465 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20472 /* Find the length of the displacement constant. */
20475 if (base && satisfies_constraint_K (disp))
20480 /* ebp always wants a displacement. Similarly r13. */
20481 else if (base && REG_P (base)
20482 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20485 /* An index requires the two-byte modrm form.... */
20487 /* ...like esp (or r12), which always wants an index. */
20488 || base == arg_pointer_rtx
20489 || base == frame_pointer_rtx
20490 || (base && REG_P (base)
20491 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
20508 /* Compute default value for "length_immediate" attribute. When SHORTFORM
20509 is set, expect that insn have 8bit immediate alternative. */
20511 ix86_attr_length_immediate_default (rtx insn, int shortform)
20515 extract_insn_cached (insn);
20516 for (i = recog_data.n_operands - 1; i >= 0; --i)
20517 if (CONSTANT_P (recog_data.operand[i]))
20519 enum attr_mode mode = get_attr_mode (insn);
20522 if (shortform && CONST_INT_P (recog_data.operand[i]))
20524 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
20531 ival = trunc_int_for_mode (ival, HImode);
20534 ival = trunc_int_for_mode (ival, SImode);
20539 if (IN_RANGE (ival, -128, 127))
20556 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
20561 fatal_insn ("unknown insn mode", insn);
20566 /* Compute default value for "length_address" attribute. */
20568 ix86_attr_length_address_default (rtx insn)
20572 if (get_attr_type (insn) == TYPE_LEA)
20574 rtx set = PATTERN (insn), addr;
20576 if (GET_CODE (set) == PARALLEL)
20577 set = XVECEXP (set, 0, 0);
20579 gcc_assert (GET_CODE (set) == SET);
20581 addr = SET_SRC (set);
20582 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
20584 if (GET_CODE (addr) == ZERO_EXTEND)
20585 addr = XEXP (addr, 0);
20586 if (GET_CODE (addr) == SUBREG)
20587 addr = SUBREG_REG (addr);
20590 return memory_address_length (addr);
20593 extract_insn_cached (insn);
20594 for (i = recog_data.n_operands - 1; i >= 0; --i)
20595 if (MEM_P (recog_data.operand[i]))
20597 constrain_operands_cached (reload_completed);
20598 if (which_alternative != -1)
20600 const char *constraints = recog_data.constraints[i];
20601 int alt = which_alternative;
20603 while (*constraints == '=' || *constraints == '+')
20606 while (*constraints++ != ',')
20608 /* Skip ignored operands. */
20609 if (*constraints == 'X')
20612 return memory_address_length (XEXP (recog_data.operand[i], 0));
20617 /* Compute default value for "length_vex" attribute. It includes
20618 2 or 3 byte VEX prefix and 1 opcode byte. */
20621 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
20626 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
20627 byte VEX prefix. */
20628 if (!has_0f_opcode || has_vex_w)
20631 /* We can always use 2 byte VEX prefix in 32bit. */
20635 extract_insn_cached (insn);
20637 for (i = recog_data.n_operands - 1; i >= 0; --i)
20638 if (REG_P (recog_data.operand[i]))
20640 /* REX.W bit uses 3 byte VEX prefix. */
20641 if (GET_MODE (recog_data.operand[i]) == DImode
20642 && GENERAL_REG_P (recog_data.operand[i]))
20647 /* REX.X or REX.B bits use 3 byte VEX prefix. */
20648 if (MEM_P (recog_data.operand[i])
20649 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
20656 /* Return the maximum number of instructions a cpu can issue. */
20659 ix86_issue_rate (void)
20663 case PROCESSOR_PENTIUM:
20664 case PROCESSOR_ATOM:
20668 case PROCESSOR_PENTIUMPRO:
20669 case PROCESSOR_PENTIUM4:
20670 case PROCESSOR_ATHLON:
20672 case PROCESSOR_AMDFAM10:
20673 case PROCESSOR_NOCONA:
20674 case PROCESSOR_GENERIC32:
20675 case PROCESSOR_GENERIC64:
20676 case PROCESSOR_BDVER1:
20679 case PROCESSOR_CORE2:
20687 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
20688 by DEP_INSN and nothing set by DEP_INSN. */
20691 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
20695 /* Simplify the test for uninteresting insns. */
20696 if (insn_type != TYPE_SETCC
20697 && insn_type != TYPE_ICMOV
20698 && insn_type != TYPE_FCMOV
20699 && insn_type != TYPE_IBR)
20702 if ((set = single_set (dep_insn)) != 0)
20704 set = SET_DEST (set);
20707 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
20708 && XVECLEN (PATTERN (dep_insn), 0) == 2
20709 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
20710 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
20712 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20713 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20718 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
20721 /* This test is true if the dependent insn reads the flags but
20722 not any other potentially set register. */
20723 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
20726 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
20732 /* Return true iff USE_INSN has a memory address with operands set by
20736 ix86_agi_dependent (rtx set_insn, rtx use_insn)
20739 extract_insn_cached (use_insn);
20740 for (i = recog_data.n_operands - 1; i >= 0; --i)
20741 if (MEM_P (recog_data.operand[i]))
20743 rtx addr = XEXP (recog_data.operand[i], 0);
20744 return modified_in_p (addr, set_insn) != 0;
20750 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
20752 enum attr_type insn_type, dep_insn_type;
20753 enum attr_memory memory;
20755 int dep_insn_code_number;
20757 /* Anti and output dependencies have zero cost on all CPUs. */
20758 if (REG_NOTE_KIND (link) != 0)
20761 dep_insn_code_number = recog_memoized (dep_insn);
20763 /* If we can't recognize the insns, we can't really do anything. */
20764 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
20767 insn_type = get_attr_type (insn);
20768 dep_insn_type = get_attr_type (dep_insn);
20772 case PROCESSOR_PENTIUM:
20773 /* Address Generation Interlock adds a cycle of latency. */
20774 if (insn_type == TYPE_LEA)
20776 rtx addr = PATTERN (insn);
20778 if (GET_CODE (addr) == PARALLEL)
20779 addr = XVECEXP (addr, 0, 0);
20781 gcc_assert (GET_CODE (addr) == SET);
20783 addr = SET_SRC (addr);
20784 if (modified_in_p (addr, dep_insn))
20787 else if (ix86_agi_dependent (dep_insn, insn))
20790 /* ??? Compares pair with jump/setcc. */
20791 if (ix86_flags_dependent (insn, dep_insn, insn_type))
20794 /* Floating point stores require value to be ready one cycle earlier. */
20795 if (insn_type == TYPE_FMOV
20796 && get_attr_memory (insn) == MEMORY_STORE
20797 && !ix86_agi_dependent (dep_insn, insn))
20801 case PROCESSOR_PENTIUMPRO:
20802 memory = get_attr_memory (insn);
20804 /* INT->FP conversion is expensive. */
20805 if (get_attr_fp_int_src (dep_insn))
20808 /* There is one cycle extra latency between an FP op and a store. */
20809 if (insn_type == TYPE_FMOV
20810 && (set = single_set (dep_insn)) != NULL_RTX
20811 && (set2 = single_set (insn)) != NULL_RTX
20812 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
20813 && MEM_P (SET_DEST (set2)))
20816 /* Show ability of reorder buffer to hide latency of load by executing
20817 in parallel with previous instruction in case
20818 previous instruction is not needed to compute the address. */
20819 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20820 && !ix86_agi_dependent (dep_insn, insn))
20822 /* Claim moves to take one cycle, as core can issue one load
20823 at time and the next load can start cycle later. */
20824 if (dep_insn_type == TYPE_IMOV
20825 || dep_insn_type == TYPE_FMOV)
20833 memory = get_attr_memory (insn);
20835 /* The esp dependency is resolved before the instruction is really
20837 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
20838 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
20841 /* INT->FP conversion is expensive. */
20842 if (get_attr_fp_int_src (dep_insn))
20845 /* Show ability of reorder buffer to hide latency of load by executing
20846 in parallel with previous instruction in case
20847 previous instruction is not needed to compute the address. */
20848 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20849 && !ix86_agi_dependent (dep_insn, insn))
20851 /* Claim moves to take one cycle, as core can issue one load
20852 at time and the next load can start cycle later. */
20853 if (dep_insn_type == TYPE_IMOV
20854 || dep_insn_type == TYPE_FMOV)
20863 case PROCESSOR_ATHLON:
20865 case PROCESSOR_AMDFAM10:
20866 case PROCESSOR_BDVER1:
20867 case PROCESSOR_ATOM:
20868 case PROCESSOR_GENERIC32:
20869 case PROCESSOR_GENERIC64:
20870 memory = get_attr_memory (insn);
20872 /* Show ability of reorder buffer to hide latency of load by executing
20873 in parallel with previous instruction in case
20874 previous instruction is not needed to compute the address. */
20875 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20876 && !ix86_agi_dependent (dep_insn, insn))
20878 enum attr_unit unit = get_attr_unit (insn);
20881 /* Because of the difference between the length of integer and
20882 floating unit pipeline preparation stages, the memory operands
20883 for floating point are cheaper.
20885 ??? For Athlon it the difference is most probably 2. */
20886 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
20889 loadcost = TARGET_ATHLON ? 2 : 0;
20891 if (cost >= loadcost)
20904 /* How many alternative schedules to try. This should be as wide as the
20905 scheduling freedom in the DFA, but no wider. Making this value too
20906 large results extra work for the scheduler. */
20909 ia32_multipass_dfa_lookahead (void)
20913 case PROCESSOR_PENTIUM:
20916 case PROCESSOR_PENTIUMPRO:
20926 /* Compute the alignment given to a constant that is being placed in memory.
20927 EXP is the constant and ALIGN is the alignment that the object would
20929 The value of this function is used instead of that alignment to align
20933 ix86_constant_alignment (tree exp, int align)
20935 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
20936 || TREE_CODE (exp) == INTEGER_CST)
20938 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
20940 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
20943 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
20944 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
20945 return BITS_PER_WORD;
20950 /* Compute the alignment for a static variable.
20951 TYPE is the data type, and ALIGN is the alignment that
20952 the object would ordinarily have. The value of this function is used
20953 instead of that alignment to align the object. */
20956 ix86_data_alignment (tree type, int align)
20958 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
20960 if (AGGREGATE_TYPE_P (type)
20961 && TYPE_SIZE (type)
20962 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20963 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
20964 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
20965 && align < max_align)
20968 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20969 to 16byte boundary. */
20972 if (AGGREGATE_TYPE_P (type)
20973 && TYPE_SIZE (type)
20974 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20975 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
20976 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20980 if (TREE_CODE (type) == ARRAY_TYPE)
20982 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20984 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20987 else if (TREE_CODE (type) == COMPLEX_TYPE)
20990 if (TYPE_MODE (type) == DCmode && align < 64)
20992 if ((TYPE_MODE (type) == XCmode
20993 || TYPE_MODE (type) == TCmode) && align < 128)
20996 else if ((TREE_CODE (type) == RECORD_TYPE
20997 || TREE_CODE (type) == UNION_TYPE
20998 || TREE_CODE (type) == QUAL_UNION_TYPE)
20999 && TYPE_FIELDS (type))
21001 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21003 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21006 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21007 || TREE_CODE (type) == INTEGER_TYPE)
21009 if (TYPE_MODE (type) == DFmode && align < 64)
21011 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21018 /* Compute the alignment for a local variable or a stack slot. EXP is
21019 the data type or decl itself, MODE is the widest mode available and
21020 ALIGN is the alignment that the object would ordinarily have. The
21021 value of this macro is used instead of that alignment to align the
21025 ix86_local_alignment (tree exp, enum machine_mode mode,
21026 unsigned int align)
21030 if (exp && DECL_P (exp))
21032 type = TREE_TYPE (exp);
21041 /* Don't do dynamic stack realignment for long long objects with
21042 -mpreferred-stack-boundary=2. */
21045 && ix86_preferred_stack_boundary < 64
21046 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21047 && (!type || !TYPE_USER_ALIGN (type))
21048 && (!decl || !DECL_USER_ALIGN (decl)))
21051 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21052 register in MODE. We will return the largest alignment of XF
21056 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21057 align = GET_MODE_ALIGNMENT (DFmode);
21061 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21062 to 16byte boundary. Exact wording is:
21064 An array uses the same alignment as its elements, except that a local or
21065 global array variable of length at least 16 bytes or
21066 a C99 variable-length array variable always has alignment of at least 16 bytes.
21068 This was added to allow use of aligned SSE instructions at arrays. This
21069 rule is meant for static storage (where compiler can not do the analysis
21070 by itself). We follow it for automatic variables only when convenient.
21071 We fully control everything in the function compiled and functions from
21072 other unit can not rely on the alignment.
21074 Exclude va_list type. It is the common case of local array where
21075 we can not benefit from the alignment. */
21076 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21079 if (AGGREGATE_TYPE_P (type)
21080 && (TYPE_MAIN_VARIANT (type)
21081 != TYPE_MAIN_VARIANT (va_list_type_node))
21082 && TYPE_SIZE (type)
21083 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21084 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21085 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21088 if (TREE_CODE (type) == ARRAY_TYPE)
21090 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21092 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21095 else if (TREE_CODE (type) == COMPLEX_TYPE)
21097 if (TYPE_MODE (type) == DCmode && align < 64)
21099 if ((TYPE_MODE (type) == XCmode
21100 || TYPE_MODE (type) == TCmode) && align < 128)
21103 else if ((TREE_CODE (type) == RECORD_TYPE
21104 || TREE_CODE (type) == UNION_TYPE
21105 || TREE_CODE (type) == QUAL_UNION_TYPE)
21106 && TYPE_FIELDS (type))
21108 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21110 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21113 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21114 || TREE_CODE (type) == INTEGER_TYPE)
21117 if (TYPE_MODE (type) == DFmode && align < 64)
21119 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21125 /* Compute the minimum required alignment for dynamic stack realignment
21126 purposes for a local variable, parameter or a stack slot. EXP is
21127 the data type or decl itself, MODE is its mode and ALIGN is the
21128 alignment that the object would ordinarily have. */
21131 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21132 unsigned int align)
21136 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21139 if (exp && DECL_P (exp))
21141 type = TREE_TYPE (exp);
21150 /* Don't do dynamic stack realignment for long long objects with
21151 -mpreferred-stack-boundary=2. */
21152 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21153 && (!type || !TYPE_USER_ALIGN (type))
21154 && (!decl || !DECL_USER_ALIGN (decl)))
21160 /* Find a location for the static chain incoming to a nested function.
21161 This is a register, unless all free registers are used by arguments. */
21164 ix86_static_chain (const_tree fndecl, bool incoming_p)
21168 if (!DECL_STATIC_CHAIN (fndecl))
21173 /* We always use R10 in 64-bit mode. */
21179 /* By default in 32-bit mode we use ECX to pass the static chain. */
21182 fntype = TREE_TYPE (fndecl);
21183 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21185 /* Fastcall functions use ecx/edx for arguments, which leaves
21186 us with EAX for the static chain. */
21189 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21191 /* Thiscall functions use ecx for arguments, which leaves
21192 us with EAX for the static chain. */
21195 else if (ix86_function_regparm (fntype, fndecl) == 3)
21197 /* For regparm 3, we have no free call-clobbered registers in
21198 which to store the static chain. In order to implement this,
21199 we have the trampoline push the static chain to the stack.
21200 However, we can't push a value below the return address when
21201 we call the nested function directly, so we have to use an
21202 alternate entry point. For this we use ESI, and have the
21203 alternate entry point push ESI, so that things appear the
21204 same once we're executing the nested function. */
21207 if (fndecl == current_function_decl)
21208 ix86_static_chain_on_stack = true;
21209 return gen_frame_mem (SImode,
21210 plus_constant (arg_pointer_rtx, -8));
21216 return gen_rtx_REG (Pmode, regno);
21219 /* Emit RTL insns to initialize the variable parts of a trampoline.
21220 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21221 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21222 to be passed to the target function. */
21225 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21229 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21236 /* Depending on the static chain location, either load a register
21237 with a constant, or push the constant to the stack. All of the
21238 instructions are the same size. */
21239 chain = ix86_static_chain (fndecl, true);
21242 if (REGNO (chain) == CX_REG)
21244 else if (REGNO (chain) == AX_REG)
21247 gcc_unreachable ();
21252 mem = adjust_address (m_tramp, QImode, 0);
21253 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21255 mem = adjust_address (m_tramp, SImode, 1);
21256 emit_move_insn (mem, chain_value);
21258 /* Compute offset from the end of the jmp to the target function.
21259 In the case in which the trampoline stores the static chain on
21260 the stack, we need to skip the first insn which pushes the
21261 (call-saved) register static chain; this push is 1 byte. */
21262 disp = expand_binop (SImode, sub_optab, fnaddr,
21263 plus_constant (XEXP (m_tramp, 0),
21264 MEM_P (chain) ? 9 : 10),
21265 NULL_RTX, 1, OPTAB_DIRECT);
21267 mem = adjust_address (m_tramp, QImode, 5);
21268 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21270 mem = adjust_address (m_tramp, SImode, 6);
21271 emit_move_insn (mem, disp);
21277 /* Load the function address to r11. Try to load address using
21278 the shorter movl instead of movabs. We may want to support
21279 movq for kernel mode, but kernel does not use trampolines at
21281 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21283 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21285 mem = adjust_address (m_tramp, HImode, offset);
21286 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21288 mem = adjust_address (m_tramp, SImode, offset + 2);
21289 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21294 mem = adjust_address (m_tramp, HImode, offset);
21295 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21297 mem = adjust_address (m_tramp, DImode, offset + 2);
21298 emit_move_insn (mem, fnaddr);
21302 /* Load static chain using movabs to r10. */
21303 mem = adjust_address (m_tramp, HImode, offset);
21304 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21306 mem = adjust_address (m_tramp, DImode, offset + 2);
21307 emit_move_insn (mem, chain_value);
21310 /* Jump to r11; the last (unused) byte is a nop, only there to
21311 pad the write out to a single 32-bit store. */
21312 mem = adjust_address (m_tramp, SImode, offset);
21313 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21316 gcc_assert (offset <= TRAMPOLINE_SIZE);
21319 #ifdef ENABLE_EXECUTE_STACK
21320 #ifdef CHECK_EXECUTE_STACK_ENABLED
21321 if (CHECK_EXECUTE_STACK_ENABLED)
21323 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21324 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21328 /* The following file contains several enumerations and data structures
21329 built from the definitions in i386-builtin-types.def. */
21331 #include "i386-builtin-types.inc"
21333 /* Table for the ix86 builtin non-function types. */
21334 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21336 /* Retrieve an element from the above table, building some of
21337 the types lazily. */
21340 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21342 unsigned int index;
21345 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21347 type = ix86_builtin_type_tab[(int) tcode];
21351 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21352 if (tcode <= IX86_BT_LAST_VECT)
21354 enum machine_mode mode;
21356 index = tcode - IX86_BT_LAST_PRIM - 1;
21357 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21358 mode = ix86_builtin_type_vect_mode[index];
21360 type = build_vector_type_for_mode (itype, mode);
21366 index = tcode - IX86_BT_LAST_VECT - 1;
21367 if (tcode <= IX86_BT_LAST_PTR)
21368 quals = TYPE_UNQUALIFIED;
21370 quals = TYPE_QUAL_CONST;
21372 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21373 if (quals != TYPE_UNQUALIFIED)
21374 itype = build_qualified_type (itype, quals);
21376 type = build_pointer_type (itype);
21379 ix86_builtin_type_tab[(int) tcode] = type;
21383 /* Table for the ix86 builtin function types. */
21384 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21386 /* Retrieve an element from the above table, building some of
21387 the types lazily. */
21390 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21394 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21396 type = ix86_builtin_func_type_tab[(int) tcode];
21400 if (tcode <= IX86_BT_LAST_FUNC)
21402 unsigned start = ix86_builtin_func_start[(int) tcode];
21403 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21404 tree rtype, atype, args = void_list_node;
21407 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21408 for (i = after - 1; i > start; --i)
21410 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21411 args = tree_cons (NULL, atype, args);
21414 type = build_function_type (rtype, args);
21418 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21419 enum ix86_builtin_func_type icode;
21421 icode = ix86_builtin_func_alias_base[index];
21422 type = ix86_get_builtin_func_type (icode);
21425 ix86_builtin_func_type_tab[(int) tcode] = type;
21430 /* Codes for all the SSE/MMX builtins. */
21433 IX86_BUILTIN_ADDPS,
21434 IX86_BUILTIN_ADDSS,
21435 IX86_BUILTIN_DIVPS,
21436 IX86_BUILTIN_DIVSS,
21437 IX86_BUILTIN_MULPS,
21438 IX86_BUILTIN_MULSS,
21439 IX86_BUILTIN_SUBPS,
21440 IX86_BUILTIN_SUBSS,
21442 IX86_BUILTIN_CMPEQPS,
21443 IX86_BUILTIN_CMPLTPS,
21444 IX86_BUILTIN_CMPLEPS,
21445 IX86_BUILTIN_CMPGTPS,
21446 IX86_BUILTIN_CMPGEPS,
21447 IX86_BUILTIN_CMPNEQPS,
21448 IX86_BUILTIN_CMPNLTPS,
21449 IX86_BUILTIN_CMPNLEPS,
21450 IX86_BUILTIN_CMPNGTPS,
21451 IX86_BUILTIN_CMPNGEPS,
21452 IX86_BUILTIN_CMPORDPS,
21453 IX86_BUILTIN_CMPUNORDPS,
21454 IX86_BUILTIN_CMPEQSS,
21455 IX86_BUILTIN_CMPLTSS,
21456 IX86_BUILTIN_CMPLESS,
21457 IX86_BUILTIN_CMPNEQSS,
21458 IX86_BUILTIN_CMPNLTSS,
21459 IX86_BUILTIN_CMPNLESS,
21460 IX86_BUILTIN_CMPNGTSS,
21461 IX86_BUILTIN_CMPNGESS,
21462 IX86_BUILTIN_CMPORDSS,
21463 IX86_BUILTIN_CMPUNORDSS,
21465 IX86_BUILTIN_COMIEQSS,
21466 IX86_BUILTIN_COMILTSS,
21467 IX86_BUILTIN_COMILESS,
21468 IX86_BUILTIN_COMIGTSS,
21469 IX86_BUILTIN_COMIGESS,
21470 IX86_BUILTIN_COMINEQSS,
21471 IX86_BUILTIN_UCOMIEQSS,
21472 IX86_BUILTIN_UCOMILTSS,
21473 IX86_BUILTIN_UCOMILESS,
21474 IX86_BUILTIN_UCOMIGTSS,
21475 IX86_BUILTIN_UCOMIGESS,
21476 IX86_BUILTIN_UCOMINEQSS,
21478 IX86_BUILTIN_CVTPI2PS,
21479 IX86_BUILTIN_CVTPS2PI,
21480 IX86_BUILTIN_CVTSI2SS,
21481 IX86_BUILTIN_CVTSI642SS,
21482 IX86_BUILTIN_CVTSS2SI,
21483 IX86_BUILTIN_CVTSS2SI64,
21484 IX86_BUILTIN_CVTTPS2PI,
21485 IX86_BUILTIN_CVTTSS2SI,
21486 IX86_BUILTIN_CVTTSS2SI64,
21488 IX86_BUILTIN_MAXPS,
21489 IX86_BUILTIN_MAXSS,
21490 IX86_BUILTIN_MINPS,
21491 IX86_BUILTIN_MINSS,
21493 IX86_BUILTIN_LOADUPS,
21494 IX86_BUILTIN_STOREUPS,
21495 IX86_BUILTIN_MOVSS,
21497 IX86_BUILTIN_MOVHLPS,
21498 IX86_BUILTIN_MOVLHPS,
21499 IX86_BUILTIN_LOADHPS,
21500 IX86_BUILTIN_LOADLPS,
21501 IX86_BUILTIN_STOREHPS,
21502 IX86_BUILTIN_STORELPS,
21504 IX86_BUILTIN_MASKMOVQ,
21505 IX86_BUILTIN_MOVMSKPS,
21506 IX86_BUILTIN_PMOVMSKB,
21508 IX86_BUILTIN_MOVNTPS,
21509 IX86_BUILTIN_MOVNTQ,
21511 IX86_BUILTIN_LOADDQU,
21512 IX86_BUILTIN_STOREDQU,
21514 IX86_BUILTIN_PACKSSWB,
21515 IX86_BUILTIN_PACKSSDW,
21516 IX86_BUILTIN_PACKUSWB,
21518 IX86_BUILTIN_PADDB,
21519 IX86_BUILTIN_PADDW,
21520 IX86_BUILTIN_PADDD,
21521 IX86_BUILTIN_PADDQ,
21522 IX86_BUILTIN_PADDSB,
21523 IX86_BUILTIN_PADDSW,
21524 IX86_BUILTIN_PADDUSB,
21525 IX86_BUILTIN_PADDUSW,
21526 IX86_BUILTIN_PSUBB,
21527 IX86_BUILTIN_PSUBW,
21528 IX86_BUILTIN_PSUBD,
21529 IX86_BUILTIN_PSUBQ,
21530 IX86_BUILTIN_PSUBSB,
21531 IX86_BUILTIN_PSUBSW,
21532 IX86_BUILTIN_PSUBUSB,
21533 IX86_BUILTIN_PSUBUSW,
21536 IX86_BUILTIN_PANDN,
21540 IX86_BUILTIN_PAVGB,
21541 IX86_BUILTIN_PAVGW,
21543 IX86_BUILTIN_PCMPEQB,
21544 IX86_BUILTIN_PCMPEQW,
21545 IX86_BUILTIN_PCMPEQD,
21546 IX86_BUILTIN_PCMPGTB,
21547 IX86_BUILTIN_PCMPGTW,
21548 IX86_BUILTIN_PCMPGTD,
21550 IX86_BUILTIN_PMADDWD,
21552 IX86_BUILTIN_PMAXSW,
21553 IX86_BUILTIN_PMAXUB,
21554 IX86_BUILTIN_PMINSW,
21555 IX86_BUILTIN_PMINUB,
21557 IX86_BUILTIN_PMULHUW,
21558 IX86_BUILTIN_PMULHW,
21559 IX86_BUILTIN_PMULLW,
21561 IX86_BUILTIN_PSADBW,
21562 IX86_BUILTIN_PSHUFW,
21564 IX86_BUILTIN_PSLLW,
21565 IX86_BUILTIN_PSLLD,
21566 IX86_BUILTIN_PSLLQ,
21567 IX86_BUILTIN_PSRAW,
21568 IX86_BUILTIN_PSRAD,
21569 IX86_BUILTIN_PSRLW,
21570 IX86_BUILTIN_PSRLD,
21571 IX86_BUILTIN_PSRLQ,
21572 IX86_BUILTIN_PSLLWI,
21573 IX86_BUILTIN_PSLLDI,
21574 IX86_BUILTIN_PSLLQI,
21575 IX86_BUILTIN_PSRAWI,
21576 IX86_BUILTIN_PSRADI,
21577 IX86_BUILTIN_PSRLWI,
21578 IX86_BUILTIN_PSRLDI,
21579 IX86_BUILTIN_PSRLQI,
21581 IX86_BUILTIN_PUNPCKHBW,
21582 IX86_BUILTIN_PUNPCKHWD,
21583 IX86_BUILTIN_PUNPCKHDQ,
21584 IX86_BUILTIN_PUNPCKLBW,
21585 IX86_BUILTIN_PUNPCKLWD,
21586 IX86_BUILTIN_PUNPCKLDQ,
21588 IX86_BUILTIN_SHUFPS,
21590 IX86_BUILTIN_RCPPS,
21591 IX86_BUILTIN_RCPSS,
21592 IX86_BUILTIN_RSQRTPS,
21593 IX86_BUILTIN_RSQRTPS_NR,
21594 IX86_BUILTIN_RSQRTSS,
21595 IX86_BUILTIN_RSQRTF,
21596 IX86_BUILTIN_SQRTPS,
21597 IX86_BUILTIN_SQRTPS_NR,
21598 IX86_BUILTIN_SQRTSS,
21600 IX86_BUILTIN_UNPCKHPS,
21601 IX86_BUILTIN_UNPCKLPS,
21603 IX86_BUILTIN_ANDPS,
21604 IX86_BUILTIN_ANDNPS,
21606 IX86_BUILTIN_XORPS,
21609 IX86_BUILTIN_LDMXCSR,
21610 IX86_BUILTIN_STMXCSR,
21611 IX86_BUILTIN_SFENCE,
21613 /* 3DNow! Original */
21614 IX86_BUILTIN_FEMMS,
21615 IX86_BUILTIN_PAVGUSB,
21616 IX86_BUILTIN_PF2ID,
21617 IX86_BUILTIN_PFACC,
21618 IX86_BUILTIN_PFADD,
21619 IX86_BUILTIN_PFCMPEQ,
21620 IX86_BUILTIN_PFCMPGE,
21621 IX86_BUILTIN_PFCMPGT,
21622 IX86_BUILTIN_PFMAX,
21623 IX86_BUILTIN_PFMIN,
21624 IX86_BUILTIN_PFMUL,
21625 IX86_BUILTIN_PFRCP,
21626 IX86_BUILTIN_PFRCPIT1,
21627 IX86_BUILTIN_PFRCPIT2,
21628 IX86_BUILTIN_PFRSQIT1,
21629 IX86_BUILTIN_PFRSQRT,
21630 IX86_BUILTIN_PFSUB,
21631 IX86_BUILTIN_PFSUBR,
21632 IX86_BUILTIN_PI2FD,
21633 IX86_BUILTIN_PMULHRW,
21635 /* 3DNow! Athlon Extensions */
21636 IX86_BUILTIN_PF2IW,
21637 IX86_BUILTIN_PFNACC,
21638 IX86_BUILTIN_PFPNACC,
21639 IX86_BUILTIN_PI2FW,
21640 IX86_BUILTIN_PSWAPDSI,
21641 IX86_BUILTIN_PSWAPDSF,
21644 IX86_BUILTIN_ADDPD,
21645 IX86_BUILTIN_ADDSD,
21646 IX86_BUILTIN_DIVPD,
21647 IX86_BUILTIN_DIVSD,
21648 IX86_BUILTIN_MULPD,
21649 IX86_BUILTIN_MULSD,
21650 IX86_BUILTIN_SUBPD,
21651 IX86_BUILTIN_SUBSD,
21653 IX86_BUILTIN_CMPEQPD,
21654 IX86_BUILTIN_CMPLTPD,
21655 IX86_BUILTIN_CMPLEPD,
21656 IX86_BUILTIN_CMPGTPD,
21657 IX86_BUILTIN_CMPGEPD,
21658 IX86_BUILTIN_CMPNEQPD,
21659 IX86_BUILTIN_CMPNLTPD,
21660 IX86_BUILTIN_CMPNLEPD,
21661 IX86_BUILTIN_CMPNGTPD,
21662 IX86_BUILTIN_CMPNGEPD,
21663 IX86_BUILTIN_CMPORDPD,
21664 IX86_BUILTIN_CMPUNORDPD,
21665 IX86_BUILTIN_CMPEQSD,
21666 IX86_BUILTIN_CMPLTSD,
21667 IX86_BUILTIN_CMPLESD,
21668 IX86_BUILTIN_CMPNEQSD,
21669 IX86_BUILTIN_CMPNLTSD,
21670 IX86_BUILTIN_CMPNLESD,
21671 IX86_BUILTIN_CMPORDSD,
21672 IX86_BUILTIN_CMPUNORDSD,
21674 IX86_BUILTIN_COMIEQSD,
21675 IX86_BUILTIN_COMILTSD,
21676 IX86_BUILTIN_COMILESD,
21677 IX86_BUILTIN_COMIGTSD,
21678 IX86_BUILTIN_COMIGESD,
21679 IX86_BUILTIN_COMINEQSD,
21680 IX86_BUILTIN_UCOMIEQSD,
21681 IX86_BUILTIN_UCOMILTSD,
21682 IX86_BUILTIN_UCOMILESD,
21683 IX86_BUILTIN_UCOMIGTSD,
21684 IX86_BUILTIN_UCOMIGESD,
21685 IX86_BUILTIN_UCOMINEQSD,
21687 IX86_BUILTIN_MAXPD,
21688 IX86_BUILTIN_MAXSD,
21689 IX86_BUILTIN_MINPD,
21690 IX86_BUILTIN_MINSD,
21692 IX86_BUILTIN_ANDPD,
21693 IX86_BUILTIN_ANDNPD,
21695 IX86_BUILTIN_XORPD,
21697 IX86_BUILTIN_SQRTPD,
21698 IX86_BUILTIN_SQRTSD,
21700 IX86_BUILTIN_UNPCKHPD,
21701 IX86_BUILTIN_UNPCKLPD,
21703 IX86_BUILTIN_SHUFPD,
21705 IX86_BUILTIN_LOADUPD,
21706 IX86_BUILTIN_STOREUPD,
21707 IX86_BUILTIN_MOVSD,
21709 IX86_BUILTIN_LOADHPD,
21710 IX86_BUILTIN_LOADLPD,
21712 IX86_BUILTIN_CVTDQ2PD,
21713 IX86_BUILTIN_CVTDQ2PS,
21715 IX86_BUILTIN_CVTPD2DQ,
21716 IX86_BUILTIN_CVTPD2PI,
21717 IX86_BUILTIN_CVTPD2PS,
21718 IX86_BUILTIN_CVTTPD2DQ,
21719 IX86_BUILTIN_CVTTPD2PI,
21721 IX86_BUILTIN_CVTPI2PD,
21722 IX86_BUILTIN_CVTSI2SD,
21723 IX86_BUILTIN_CVTSI642SD,
21725 IX86_BUILTIN_CVTSD2SI,
21726 IX86_BUILTIN_CVTSD2SI64,
21727 IX86_BUILTIN_CVTSD2SS,
21728 IX86_BUILTIN_CVTSS2SD,
21729 IX86_BUILTIN_CVTTSD2SI,
21730 IX86_BUILTIN_CVTTSD2SI64,
21732 IX86_BUILTIN_CVTPS2DQ,
21733 IX86_BUILTIN_CVTPS2PD,
21734 IX86_BUILTIN_CVTTPS2DQ,
21736 IX86_BUILTIN_MOVNTI,
21737 IX86_BUILTIN_MOVNTPD,
21738 IX86_BUILTIN_MOVNTDQ,
21740 IX86_BUILTIN_MOVQ128,
21743 IX86_BUILTIN_MASKMOVDQU,
21744 IX86_BUILTIN_MOVMSKPD,
21745 IX86_BUILTIN_PMOVMSKB128,
21747 IX86_BUILTIN_PACKSSWB128,
21748 IX86_BUILTIN_PACKSSDW128,
21749 IX86_BUILTIN_PACKUSWB128,
21751 IX86_BUILTIN_PADDB128,
21752 IX86_BUILTIN_PADDW128,
21753 IX86_BUILTIN_PADDD128,
21754 IX86_BUILTIN_PADDQ128,
21755 IX86_BUILTIN_PADDSB128,
21756 IX86_BUILTIN_PADDSW128,
21757 IX86_BUILTIN_PADDUSB128,
21758 IX86_BUILTIN_PADDUSW128,
21759 IX86_BUILTIN_PSUBB128,
21760 IX86_BUILTIN_PSUBW128,
21761 IX86_BUILTIN_PSUBD128,
21762 IX86_BUILTIN_PSUBQ128,
21763 IX86_BUILTIN_PSUBSB128,
21764 IX86_BUILTIN_PSUBSW128,
21765 IX86_BUILTIN_PSUBUSB128,
21766 IX86_BUILTIN_PSUBUSW128,
21768 IX86_BUILTIN_PAND128,
21769 IX86_BUILTIN_PANDN128,
21770 IX86_BUILTIN_POR128,
21771 IX86_BUILTIN_PXOR128,
21773 IX86_BUILTIN_PAVGB128,
21774 IX86_BUILTIN_PAVGW128,
21776 IX86_BUILTIN_PCMPEQB128,
21777 IX86_BUILTIN_PCMPEQW128,
21778 IX86_BUILTIN_PCMPEQD128,
21779 IX86_BUILTIN_PCMPGTB128,
21780 IX86_BUILTIN_PCMPGTW128,
21781 IX86_BUILTIN_PCMPGTD128,
21783 IX86_BUILTIN_PMADDWD128,
21785 IX86_BUILTIN_PMAXSW128,
21786 IX86_BUILTIN_PMAXUB128,
21787 IX86_BUILTIN_PMINSW128,
21788 IX86_BUILTIN_PMINUB128,
21790 IX86_BUILTIN_PMULUDQ,
21791 IX86_BUILTIN_PMULUDQ128,
21792 IX86_BUILTIN_PMULHUW128,
21793 IX86_BUILTIN_PMULHW128,
21794 IX86_BUILTIN_PMULLW128,
21796 IX86_BUILTIN_PSADBW128,
21797 IX86_BUILTIN_PSHUFHW,
21798 IX86_BUILTIN_PSHUFLW,
21799 IX86_BUILTIN_PSHUFD,
21801 IX86_BUILTIN_PSLLDQI128,
21802 IX86_BUILTIN_PSLLWI128,
21803 IX86_BUILTIN_PSLLDI128,
21804 IX86_BUILTIN_PSLLQI128,
21805 IX86_BUILTIN_PSRAWI128,
21806 IX86_BUILTIN_PSRADI128,
21807 IX86_BUILTIN_PSRLDQI128,
21808 IX86_BUILTIN_PSRLWI128,
21809 IX86_BUILTIN_PSRLDI128,
21810 IX86_BUILTIN_PSRLQI128,
21812 IX86_BUILTIN_PSLLDQ128,
21813 IX86_BUILTIN_PSLLW128,
21814 IX86_BUILTIN_PSLLD128,
21815 IX86_BUILTIN_PSLLQ128,
21816 IX86_BUILTIN_PSRAW128,
21817 IX86_BUILTIN_PSRAD128,
21818 IX86_BUILTIN_PSRLW128,
21819 IX86_BUILTIN_PSRLD128,
21820 IX86_BUILTIN_PSRLQ128,
21822 IX86_BUILTIN_PUNPCKHBW128,
21823 IX86_BUILTIN_PUNPCKHWD128,
21824 IX86_BUILTIN_PUNPCKHDQ128,
21825 IX86_BUILTIN_PUNPCKHQDQ128,
21826 IX86_BUILTIN_PUNPCKLBW128,
21827 IX86_BUILTIN_PUNPCKLWD128,
21828 IX86_BUILTIN_PUNPCKLDQ128,
21829 IX86_BUILTIN_PUNPCKLQDQ128,
21831 IX86_BUILTIN_CLFLUSH,
21832 IX86_BUILTIN_MFENCE,
21833 IX86_BUILTIN_LFENCE,
21835 IX86_BUILTIN_BSRSI,
21836 IX86_BUILTIN_BSRDI,
21837 IX86_BUILTIN_RDPMC,
21838 IX86_BUILTIN_RDTSC,
21839 IX86_BUILTIN_RDTSCP,
21840 IX86_BUILTIN_ROLQI,
21841 IX86_BUILTIN_ROLHI,
21842 IX86_BUILTIN_RORQI,
21843 IX86_BUILTIN_RORHI,
21846 IX86_BUILTIN_ADDSUBPS,
21847 IX86_BUILTIN_HADDPS,
21848 IX86_BUILTIN_HSUBPS,
21849 IX86_BUILTIN_MOVSHDUP,
21850 IX86_BUILTIN_MOVSLDUP,
21851 IX86_BUILTIN_ADDSUBPD,
21852 IX86_BUILTIN_HADDPD,
21853 IX86_BUILTIN_HSUBPD,
21854 IX86_BUILTIN_LDDQU,
21856 IX86_BUILTIN_MONITOR,
21857 IX86_BUILTIN_MWAIT,
21860 IX86_BUILTIN_PHADDW,
21861 IX86_BUILTIN_PHADDD,
21862 IX86_BUILTIN_PHADDSW,
21863 IX86_BUILTIN_PHSUBW,
21864 IX86_BUILTIN_PHSUBD,
21865 IX86_BUILTIN_PHSUBSW,
21866 IX86_BUILTIN_PMADDUBSW,
21867 IX86_BUILTIN_PMULHRSW,
21868 IX86_BUILTIN_PSHUFB,
21869 IX86_BUILTIN_PSIGNB,
21870 IX86_BUILTIN_PSIGNW,
21871 IX86_BUILTIN_PSIGND,
21872 IX86_BUILTIN_PALIGNR,
21873 IX86_BUILTIN_PABSB,
21874 IX86_BUILTIN_PABSW,
21875 IX86_BUILTIN_PABSD,
21877 IX86_BUILTIN_PHADDW128,
21878 IX86_BUILTIN_PHADDD128,
21879 IX86_BUILTIN_PHADDSW128,
21880 IX86_BUILTIN_PHSUBW128,
21881 IX86_BUILTIN_PHSUBD128,
21882 IX86_BUILTIN_PHSUBSW128,
21883 IX86_BUILTIN_PMADDUBSW128,
21884 IX86_BUILTIN_PMULHRSW128,
21885 IX86_BUILTIN_PSHUFB128,
21886 IX86_BUILTIN_PSIGNB128,
21887 IX86_BUILTIN_PSIGNW128,
21888 IX86_BUILTIN_PSIGND128,
21889 IX86_BUILTIN_PALIGNR128,
21890 IX86_BUILTIN_PABSB128,
21891 IX86_BUILTIN_PABSW128,
21892 IX86_BUILTIN_PABSD128,
21894 /* AMDFAM10 - SSE4A New Instructions. */
21895 IX86_BUILTIN_MOVNTSD,
21896 IX86_BUILTIN_MOVNTSS,
21897 IX86_BUILTIN_EXTRQI,
21898 IX86_BUILTIN_EXTRQ,
21899 IX86_BUILTIN_INSERTQI,
21900 IX86_BUILTIN_INSERTQ,
21903 IX86_BUILTIN_BLENDPD,
21904 IX86_BUILTIN_BLENDPS,
21905 IX86_BUILTIN_BLENDVPD,
21906 IX86_BUILTIN_BLENDVPS,
21907 IX86_BUILTIN_PBLENDVB128,
21908 IX86_BUILTIN_PBLENDW128,
21913 IX86_BUILTIN_INSERTPS128,
21915 IX86_BUILTIN_MOVNTDQA,
21916 IX86_BUILTIN_MPSADBW128,
21917 IX86_BUILTIN_PACKUSDW128,
21918 IX86_BUILTIN_PCMPEQQ,
21919 IX86_BUILTIN_PHMINPOSUW128,
21921 IX86_BUILTIN_PMAXSB128,
21922 IX86_BUILTIN_PMAXSD128,
21923 IX86_BUILTIN_PMAXUD128,
21924 IX86_BUILTIN_PMAXUW128,
21926 IX86_BUILTIN_PMINSB128,
21927 IX86_BUILTIN_PMINSD128,
21928 IX86_BUILTIN_PMINUD128,
21929 IX86_BUILTIN_PMINUW128,
21931 IX86_BUILTIN_PMOVSXBW128,
21932 IX86_BUILTIN_PMOVSXBD128,
21933 IX86_BUILTIN_PMOVSXBQ128,
21934 IX86_BUILTIN_PMOVSXWD128,
21935 IX86_BUILTIN_PMOVSXWQ128,
21936 IX86_BUILTIN_PMOVSXDQ128,
21938 IX86_BUILTIN_PMOVZXBW128,
21939 IX86_BUILTIN_PMOVZXBD128,
21940 IX86_BUILTIN_PMOVZXBQ128,
21941 IX86_BUILTIN_PMOVZXWD128,
21942 IX86_BUILTIN_PMOVZXWQ128,
21943 IX86_BUILTIN_PMOVZXDQ128,
21945 IX86_BUILTIN_PMULDQ128,
21946 IX86_BUILTIN_PMULLD128,
21948 IX86_BUILTIN_ROUNDPD,
21949 IX86_BUILTIN_ROUNDPS,
21950 IX86_BUILTIN_ROUNDSD,
21951 IX86_BUILTIN_ROUNDSS,
21953 IX86_BUILTIN_PTESTZ,
21954 IX86_BUILTIN_PTESTC,
21955 IX86_BUILTIN_PTESTNZC,
21957 IX86_BUILTIN_VEC_INIT_V2SI,
21958 IX86_BUILTIN_VEC_INIT_V4HI,
21959 IX86_BUILTIN_VEC_INIT_V8QI,
21960 IX86_BUILTIN_VEC_EXT_V2DF,
21961 IX86_BUILTIN_VEC_EXT_V2DI,
21962 IX86_BUILTIN_VEC_EXT_V4SF,
21963 IX86_BUILTIN_VEC_EXT_V4SI,
21964 IX86_BUILTIN_VEC_EXT_V8HI,
21965 IX86_BUILTIN_VEC_EXT_V2SI,
21966 IX86_BUILTIN_VEC_EXT_V4HI,
21967 IX86_BUILTIN_VEC_EXT_V16QI,
21968 IX86_BUILTIN_VEC_SET_V2DI,
21969 IX86_BUILTIN_VEC_SET_V4SF,
21970 IX86_BUILTIN_VEC_SET_V4SI,
21971 IX86_BUILTIN_VEC_SET_V8HI,
21972 IX86_BUILTIN_VEC_SET_V4HI,
21973 IX86_BUILTIN_VEC_SET_V16QI,
21975 IX86_BUILTIN_VEC_PACK_SFIX,
21978 IX86_BUILTIN_CRC32QI,
21979 IX86_BUILTIN_CRC32HI,
21980 IX86_BUILTIN_CRC32SI,
21981 IX86_BUILTIN_CRC32DI,
21983 IX86_BUILTIN_PCMPESTRI128,
21984 IX86_BUILTIN_PCMPESTRM128,
21985 IX86_BUILTIN_PCMPESTRA128,
21986 IX86_BUILTIN_PCMPESTRC128,
21987 IX86_BUILTIN_PCMPESTRO128,
21988 IX86_BUILTIN_PCMPESTRS128,
21989 IX86_BUILTIN_PCMPESTRZ128,
21990 IX86_BUILTIN_PCMPISTRI128,
21991 IX86_BUILTIN_PCMPISTRM128,
21992 IX86_BUILTIN_PCMPISTRA128,
21993 IX86_BUILTIN_PCMPISTRC128,
21994 IX86_BUILTIN_PCMPISTRO128,
21995 IX86_BUILTIN_PCMPISTRS128,
21996 IX86_BUILTIN_PCMPISTRZ128,
21998 IX86_BUILTIN_PCMPGTQ,
22000 /* AES instructions */
22001 IX86_BUILTIN_AESENC128,
22002 IX86_BUILTIN_AESENCLAST128,
22003 IX86_BUILTIN_AESDEC128,
22004 IX86_BUILTIN_AESDECLAST128,
22005 IX86_BUILTIN_AESIMC128,
22006 IX86_BUILTIN_AESKEYGENASSIST128,
22008 /* PCLMUL instruction */
22009 IX86_BUILTIN_PCLMULQDQ128,
22012 IX86_BUILTIN_ADDPD256,
22013 IX86_BUILTIN_ADDPS256,
22014 IX86_BUILTIN_ADDSUBPD256,
22015 IX86_BUILTIN_ADDSUBPS256,
22016 IX86_BUILTIN_ANDPD256,
22017 IX86_BUILTIN_ANDPS256,
22018 IX86_BUILTIN_ANDNPD256,
22019 IX86_BUILTIN_ANDNPS256,
22020 IX86_BUILTIN_BLENDPD256,
22021 IX86_BUILTIN_BLENDPS256,
22022 IX86_BUILTIN_BLENDVPD256,
22023 IX86_BUILTIN_BLENDVPS256,
22024 IX86_BUILTIN_DIVPD256,
22025 IX86_BUILTIN_DIVPS256,
22026 IX86_BUILTIN_DPPS256,
22027 IX86_BUILTIN_HADDPD256,
22028 IX86_BUILTIN_HADDPS256,
22029 IX86_BUILTIN_HSUBPD256,
22030 IX86_BUILTIN_HSUBPS256,
22031 IX86_BUILTIN_MAXPD256,
22032 IX86_BUILTIN_MAXPS256,
22033 IX86_BUILTIN_MINPD256,
22034 IX86_BUILTIN_MINPS256,
22035 IX86_BUILTIN_MULPD256,
22036 IX86_BUILTIN_MULPS256,
22037 IX86_BUILTIN_ORPD256,
22038 IX86_BUILTIN_ORPS256,
22039 IX86_BUILTIN_SHUFPD256,
22040 IX86_BUILTIN_SHUFPS256,
22041 IX86_BUILTIN_SUBPD256,
22042 IX86_BUILTIN_SUBPS256,
22043 IX86_BUILTIN_XORPD256,
22044 IX86_BUILTIN_XORPS256,
22045 IX86_BUILTIN_CMPSD,
22046 IX86_BUILTIN_CMPSS,
22047 IX86_BUILTIN_CMPPD,
22048 IX86_BUILTIN_CMPPS,
22049 IX86_BUILTIN_CMPPD256,
22050 IX86_BUILTIN_CMPPS256,
22051 IX86_BUILTIN_CVTDQ2PD256,
22052 IX86_BUILTIN_CVTDQ2PS256,
22053 IX86_BUILTIN_CVTPD2PS256,
22054 IX86_BUILTIN_CVTPS2DQ256,
22055 IX86_BUILTIN_CVTPS2PD256,
22056 IX86_BUILTIN_CVTTPD2DQ256,
22057 IX86_BUILTIN_CVTPD2DQ256,
22058 IX86_BUILTIN_CVTTPS2DQ256,
22059 IX86_BUILTIN_EXTRACTF128PD256,
22060 IX86_BUILTIN_EXTRACTF128PS256,
22061 IX86_BUILTIN_EXTRACTF128SI256,
22062 IX86_BUILTIN_VZEROALL,
22063 IX86_BUILTIN_VZEROUPPER,
22064 IX86_BUILTIN_VPERMILVARPD,
22065 IX86_BUILTIN_VPERMILVARPS,
22066 IX86_BUILTIN_VPERMILVARPD256,
22067 IX86_BUILTIN_VPERMILVARPS256,
22068 IX86_BUILTIN_VPERMILPD,
22069 IX86_BUILTIN_VPERMILPS,
22070 IX86_BUILTIN_VPERMILPD256,
22071 IX86_BUILTIN_VPERMILPS256,
22072 IX86_BUILTIN_VPERMIL2PD,
22073 IX86_BUILTIN_VPERMIL2PS,
22074 IX86_BUILTIN_VPERMIL2PD256,
22075 IX86_BUILTIN_VPERMIL2PS256,
22076 IX86_BUILTIN_VPERM2F128PD256,
22077 IX86_BUILTIN_VPERM2F128PS256,
22078 IX86_BUILTIN_VPERM2F128SI256,
22079 IX86_BUILTIN_VBROADCASTSS,
22080 IX86_BUILTIN_VBROADCASTSD256,
22081 IX86_BUILTIN_VBROADCASTSS256,
22082 IX86_BUILTIN_VBROADCASTPD256,
22083 IX86_BUILTIN_VBROADCASTPS256,
22084 IX86_BUILTIN_VINSERTF128PD256,
22085 IX86_BUILTIN_VINSERTF128PS256,
22086 IX86_BUILTIN_VINSERTF128SI256,
22087 IX86_BUILTIN_LOADUPD256,
22088 IX86_BUILTIN_LOADUPS256,
22089 IX86_BUILTIN_STOREUPD256,
22090 IX86_BUILTIN_STOREUPS256,
22091 IX86_BUILTIN_LDDQU256,
22092 IX86_BUILTIN_MOVNTDQ256,
22093 IX86_BUILTIN_MOVNTPD256,
22094 IX86_BUILTIN_MOVNTPS256,
22095 IX86_BUILTIN_LOADDQU256,
22096 IX86_BUILTIN_STOREDQU256,
22097 IX86_BUILTIN_MASKLOADPD,
22098 IX86_BUILTIN_MASKLOADPS,
22099 IX86_BUILTIN_MASKSTOREPD,
22100 IX86_BUILTIN_MASKSTOREPS,
22101 IX86_BUILTIN_MASKLOADPD256,
22102 IX86_BUILTIN_MASKLOADPS256,
22103 IX86_BUILTIN_MASKSTOREPD256,
22104 IX86_BUILTIN_MASKSTOREPS256,
22105 IX86_BUILTIN_MOVSHDUP256,
22106 IX86_BUILTIN_MOVSLDUP256,
22107 IX86_BUILTIN_MOVDDUP256,
22109 IX86_BUILTIN_SQRTPD256,
22110 IX86_BUILTIN_SQRTPS256,
22111 IX86_BUILTIN_SQRTPS_NR256,
22112 IX86_BUILTIN_RSQRTPS256,
22113 IX86_BUILTIN_RSQRTPS_NR256,
22115 IX86_BUILTIN_RCPPS256,
22117 IX86_BUILTIN_ROUNDPD256,
22118 IX86_BUILTIN_ROUNDPS256,
22120 IX86_BUILTIN_UNPCKHPD256,
22121 IX86_BUILTIN_UNPCKLPD256,
22122 IX86_BUILTIN_UNPCKHPS256,
22123 IX86_BUILTIN_UNPCKLPS256,
22125 IX86_BUILTIN_SI256_SI,
22126 IX86_BUILTIN_PS256_PS,
22127 IX86_BUILTIN_PD256_PD,
22128 IX86_BUILTIN_SI_SI256,
22129 IX86_BUILTIN_PS_PS256,
22130 IX86_BUILTIN_PD_PD256,
22132 IX86_BUILTIN_VTESTZPD,
22133 IX86_BUILTIN_VTESTCPD,
22134 IX86_BUILTIN_VTESTNZCPD,
22135 IX86_BUILTIN_VTESTZPS,
22136 IX86_BUILTIN_VTESTCPS,
22137 IX86_BUILTIN_VTESTNZCPS,
22138 IX86_BUILTIN_VTESTZPD256,
22139 IX86_BUILTIN_VTESTCPD256,
22140 IX86_BUILTIN_VTESTNZCPD256,
22141 IX86_BUILTIN_VTESTZPS256,
22142 IX86_BUILTIN_VTESTCPS256,
22143 IX86_BUILTIN_VTESTNZCPS256,
22144 IX86_BUILTIN_PTESTZ256,
22145 IX86_BUILTIN_PTESTC256,
22146 IX86_BUILTIN_PTESTNZC256,
22148 IX86_BUILTIN_MOVMSKPD256,
22149 IX86_BUILTIN_MOVMSKPS256,
22151 /* TFmode support builtins. */
22153 IX86_BUILTIN_HUGE_VALQ,
22154 IX86_BUILTIN_FABSQ,
22155 IX86_BUILTIN_COPYSIGNQ,
22157 /* Vectorizer support builtins. */
22158 IX86_BUILTIN_CPYSGNPS,
22159 IX86_BUILTIN_CPYSGNPD,
22161 IX86_BUILTIN_CVTUDQ2PS,
22163 IX86_BUILTIN_VEC_PERM_V2DF,
22164 IX86_BUILTIN_VEC_PERM_V4SF,
22165 IX86_BUILTIN_VEC_PERM_V2DI,
22166 IX86_BUILTIN_VEC_PERM_V4SI,
22167 IX86_BUILTIN_VEC_PERM_V8HI,
22168 IX86_BUILTIN_VEC_PERM_V16QI,
22169 IX86_BUILTIN_VEC_PERM_V2DI_U,
22170 IX86_BUILTIN_VEC_PERM_V4SI_U,
22171 IX86_BUILTIN_VEC_PERM_V8HI_U,
22172 IX86_BUILTIN_VEC_PERM_V16QI_U,
22173 IX86_BUILTIN_VEC_PERM_V4DF,
22174 IX86_BUILTIN_VEC_PERM_V8SF,
22176 /* FMA4 and XOP instructions. */
22177 IX86_BUILTIN_VFMADDSS,
22178 IX86_BUILTIN_VFMADDSD,
22179 IX86_BUILTIN_VFMADDPS,
22180 IX86_BUILTIN_VFMADDPD,
22181 IX86_BUILTIN_VFMSUBSS,
22182 IX86_BUILTIN_VFMSUBSD,
22183 IX86_BUILTIN_VFMSUBPS,
22184 IX86_BUILTIN_VFMSUBPD,
22185 IX86_BUILTIN_VFMADDSUBPS,
22186 IX86_BUILTIN_VFMADDSUBPD,
22187 IX86_BUILTIN_VFMSUBADDPS,
22188 IX86_BUILTIN_VFMSUBADDPD,
22189 IX86_BUILTIN_VFNMADDSS,
22190 IX86_BUILTIN_VFNMADDSD,
22191 IX86_BUILTIN_VFNMADDPS,
22192 IX86_BUILTIN_VFNMADDPD,
22193 IX86_BUILTIN_VFNMSUBSS,
22194 IX86_BUILTIN_VFNMSUBSD,
22195 IX86_BUILTIN_VFNMSUBPS,
22196 IX86_BUILTIN_VFNMSUBPD,
22197 IX86_BUILTIN_VFMADDPS256,
22198 IX86_BUILTIN_VFMADDPD256,
22199 IX86_BUILTIN_VFMSUBPS256,
22200 IX86_BUILTIN_VFMSUBPD256,
22201 IX86_BUILTIN_VFMADDSUBPS256,
22202 IX86_BUILTIN_VFMADDSUBPD256,
22203 IX86_BUILTIN_VFMSUBADDPS256,
22204 IX86_BUILTIN_VFMSUBADDPD256,
22205 IX86_BUILTIN_VFNMADDPS256,
22206 IX86_BUILTIN_VFNMADDPD256,
22207 IX86_BUILTIN_VFNMSUBPS256,
22208 IX86_BUILTIN_VFNMSUBPD256,
22210 IX86_BUILTIN_VPCMOV,
22211 IX86_BUILTIN_VPCMOV_V2DI,
22212 IX86_BUILTIN_VPCMOV_V4SI,
22213 IX86_BUILTIN_VPCMOV_V8HI,
22214 IX86_BUILTIN_VPCMOV_V16QI,
22215 IX86_BUILTIN_VPCMOV_V4SF,
22216 IX86_BUILTIN_VPCMOV_V2DF,
22217 IX86_BUILTIN_VPCMOV256,
22218 IX86_BUILTIN_VPCMOV_V4DI256,
22219 IX86_BUILTIN_VPCMOV_V8SI256,
22220 IX86_BUILTIN_VPCMOV_V16HI256,
22221 IX86_BUILTIN_VPCMOV_V32QI256,
22222 IX86_BUILTIN_VPCMOV_V8SF256,
22223 IX86_BUILTIN_VPCMOV_V4DF256,
22225 IX86_BUILTIN_VPPERM,
22227 IX86_BUILTIN_VPMACSSWW,
22228 IX86_BUILTIN_VPMACSWW,
22229 IX86_BUILTIN_VPMACSSWD,
22230 IX86_BUILTIN_VPMACSWD,
22231 IX86_BUILTIN_VPMACSSDD,
22232 IX86_BUILTIN_VPMACSDD,
22233 IX86_BUILTIN_VPMACSSDQL,
22234 IX86_BUILTIN_VPMACSSDQH,
22235 IX86_BUILTIN_VPMACSDQL,
22236 IX86_BUILTIN_VPMACSDQH,
22237 IX86_BUILTIN_VPMADCSSWD,
22238 IX86_BUILTIN_VPMADCSWD,
22240 IX86_BUILTIN_VPHADDBW,
22241 IX86_BUILTIN_VPHADDBD,
22242 IX86_BUILTIN_VPHADDBQ,
22243 IX86_BUILTIN_VPHADDWD,
22244 IX86_BUILTIN_VPHADDWQ,
22245 IX86_BUILTIN_VPHADDDQ,
22246 IX86_BUILTIN_VPHADDUBW,
22247 IX86_BUILTIN_VPHADDUBD,
22248 IX86_BUILTIN_VPHADDUBQ,
22249 IX86_BUILTIN_VPHADDUWD,
22250 IX86_BUILTIN_VPHADDUWQ,
22251 IX86_BUILTIN_VPHADDUDQ,
22252 IX86_BUILTIN_VPHSUBBW,
22253 IX86_BUILTIN_VPHSUBWD,
22254 IX86_BUILTIN_VPHSUBDQ,
22256 IX86_BUILTIN_VPROTB,
22257 IX86_BUILTIN_VPROTW,
22258 IX86_BUILTIN_VPROTD,
22259 IX86_BUILTIN_VPROTQ,
22260 IX86_BUILTIN_VPROTB_IMM,
22261 IX86_BUILTIN_VPROTW_IMM,
22262 IX86_BUILTIN_VPROTD_IMM,
22263 IX86_BUILTIN_VPROTQ_IMM,
22265 IX86_BUILTIN_VPSHLB,
22266 IX86_BUILTIN_VPSHLW,
22267 IX86_BUILTIN_VPSHLD,
22268 IX86_BUILTIN_VPSHLQ,
22269 IX86_BUILTIN_VPSHAB,
22270 IX86_BUILTIN_VPSHAW,
22271 IX86_BUILTIN_VPSHAD,
22272 IX86_BUILTIN_VPSHAQ,
22274 IX86_BUILTIN_VFRCZSS,
22275 IX86_BUILTIN_VFRCZSD,
22276 IX86_BUILTIN_VFRCZPS,
22277 IX86_BUILTIN_VFRCZPD,
22278 IX86_BUILTIN_VFRCZPS256,
22279 IX86_BUILTIN_VFRCZPD256,
22281 IX86_BUILTIN_VPCOMEQUB,
22282 IX86_BUILTIN_VPCOMNEUB,
22283 IX86_BUILTIN_VPCOMLTUB,
22284 IX86_BUILTIN_VPCOMLEUB,
22285 IX86_BUILTIN_VPCOMGTUB,
22286 IX86_BUILTIN_VPCOMGEUB,
22287 IX86_BUILTIN_VPCOMFALSEUB,
22288 IX86_BUILTIN_VPCOMTRUEUB,
22290 IX86_BUILTIN_VPCOMEQUW,
22291 IX86_BUILTIN_VPCOMNEUW,
22292 IX86_BUILTIN_VPCOMLTUW,
22293 IX86_BUILTIN_VPCOMLEUW,
22294 IX86_BUILTIN_VPCOMGTUW,
22295 IX86_BUILTIN_VPCOMGEUW,
22296 IX86_BUILTIN_VPCOMFALSEUW,
22297 IX86_BUILTIN_VPCOMTRUEUW,
22299 IX86_BUILTIN_VPCOMEQUD,
22300 IX86_BUILTIN_VPCOMNEUD,
22301 IX86_BUILTIN_VPCOMLTUD,
22302 IX86_BUILTIN_VPCOMLEUD,
22303 IX86_BUILTIN_VPCOMGTUD,
22304 IX86_BUILTIN_VPCOMGEUD,
22305 IX86_BUILTIN_VPCOMFALSEUD,
22306 IX86_BUILTIN_VPCOMTRUEUD,
22308 IX86_BUILTIN_VPCOMEQUQ,
22309 IX86_BUILTIN_VPCOMNEUQ,
22310 IX86_BUILTIN_VPCOMLTUQ,
22311 IX86_BUILTIN_VPCOMLEUQ,
22312 IX86_BUILTIN_VPCOMGTUQ,
22313 IX86_BUILTIN_VPCOMGEUQ,
22314 IX86_BUILTIN_VPCOMFALSEUQ,
22315 IX86_BUILTIN_VPCOMTRUEUQ,
22317 IX86_BUILTIN_VPCOMEQB,
22318 IX86_BUILTIN_VPCOMNEB,
22319 IX86_BUILTIN_VPCOMLTB,
22320 IX86_BUILTIN_VPCOMLEB,
22321 IX86_BUILTIN_VPCOMGTB,
22322 IX86_BUILTIN_VPCOMGEB,
22323 IX86_BUILTIN_VPCOMFALSEB,
22324 IX86_BUILTIN_VPCOMTRUEB,
22326 IX86_BUILTIN_VPCOMEQW,
22327 IX86_BUILTIN_VPCOMNEW,
22328 IX86_BUILTIN_VPCOMLTW,
22329 IX86_BUILTIN_VPCOMLEW,
22330 IX86_BUILTIN_VPCOMGTW,
22331 IX86_BUILTIN_VPCOMGEW,
22332 IX86_BUILTIN_VPCOMFALSEW,
22333 IX86_BUILTIN_VPCOMTRUEW,
22335 IX86_BUILTIN_VPCOMEQD,
22336 IX86_BUILTIN_VPCOMNED,
22337 IX86_BUILTIN_VPCOMLTD,
22338 IX86_BUILTIN_VPCOMLED,
22339 IX86_BUILTIN_VPCOMGTD,
22340 IX86_BUILTIN_VPCOMGED,
22341 IX86_BUILTIN_VPCOMFALSED,
22342 IX86_BUILTIN_VPCOMTRUED,
22344 IX86_BUILTIN_VPCOMEQQ,
22345 IX86_BUILTIN_VPCOMNEQ,
22346 IX86_BUILTIN_VPCOMLTQ,
22347 IX86_BUILTIN_VPCOMLEQ,
22348 IX86_BUILTIN_VPCOMGTQ,
22349 IX86_BUILTIN_VPCOMGEQ,
22350 IX86_BUILTIN_VPCOMFALSEQ,
22351 IX86_BUILTIN_VPCOMTRUEQ,
22353 /* LWP instructions. */
22354 IX86_BUILTIN_LLWPCB,
22355 IX86_BUILTIN_SLWPCB,
22356 IX86_BUILTIN_LWPVAL32,
22357 IX86_BUILTIN_LWPVAL64,
22358 IX86_BUILTIN_LWPINS32,
22359 IX86_BUILTIN_LWPINS64,
22363 /* FSGSBASE instructions. */
22364 IX86_BUILTIN_RDFSBASE32,
22365 IX86_BUILTIN_RDFSBASE64,
22366 IX86_BUILTIN_RDGSBASE32,
22367 IX86_BUILTIN_RDGSBASE64,
22368 IX86_BUILTIN_WRFSBASE32,
22369 IX86_BUILTIN_WRFSBASE64,
22370 IX86_BUILTIN_WRGSBASE32,
22371 IX86_BUILTIN_WRGSBASE64,
22373 /* RDRND instructions. */
22374 IX86_BUILTIN_RDRAND16,
22375 IX86_BUILTIN_RDRAND32,
22376 IX86_BUILTIN_RDRAND64,
22378 /* F16C instructions. */
22379 IX86_BUILTIN_CVTPH2PS,
22380 IX86_BUILTIN_CVTPH2PS256,
22381 IX86_BUILTIN_CVTPS2PH,
22382 IX86_BUILTIN_CVTPS2PH256,
22387 /* Table for the ix86 builtin decls. */
22388 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22390 /* Table of all of the builtin functions that are possible with different ISA's
22391 but are waiting to be built until a function is declared to use that
22393 struct builtin_isa {
22394 const char *name; /* function name */
22395 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22396 int isa; /* isa_flags this builtin is defined for */
22397 bool const_p; /* true if the declaration is constant */
22398 bool set_and_not_built_p;
22401 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22404 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22405 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22406 function decl in the ix86_builtins array. Returns the function decl or
22407 NULL_TREE, if the builtin was not added.
22409 If the front end has a special hook for builtin functions, delay adding
22410 builtin functions that aren't in the current ISA until the ISA is changed
22411 with function specific optimization. Doing so, can save about 300K for the
22412 default compiler. When the builtin is expanded, check at that time whether
22415 If the front end doesn't have a special hook, record all builtins, even if
22416 it isn't an instruction set in the current ISA in case the user uses
22417 function specific options for a different ISA, so that we don't get scope
22418 errors if a builtin is added in the middle of a function scope. */
22421 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22422 enum ix86_builtins code)
22424 tree decl = NULL_TREE;
22426 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22428 ix86_builtins_isa[(int) code].isa = mask;
22430 mask &= ~OPTION_MASK_ISA_64BIT;
22432 || (mask & ix86_isa_flags) != 0
22433 || (lang_hooks.builtin_function
22434 == lang_hooks.builtin_function_ext_scope))
22437 tree type = ix86_get_builtin_func_type (tcode);
22438 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22440 ix86_builtins[(int) code] = decl;
22441 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22445 ix86_builtins[(int) code] = NULL_TREE;
22446 ix86_builtins_isa[(int) code].tcode = tcode;
22447 ix86_builtins_isa[(int) code].name = name;
22448 ix86_builtins_isa[(int) code].const_p = false;
22449 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22456 /* Like def_builtin, but also marks the function decl "const". */
22459 def_builtin_const (int mask, const char *name,
22460 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22462 tree decl = def_builtin (mask, name, tcode, code);
22464 TREE_READONLY (decl) = 1;
22466 ix86_builtins_isa[(int) code].const_p = true;
22471 /* Add any new builtin functions for a given ISA that may not have been
22472 declared. This saves a bit of space compared to adding all of the
22473 declarations to the tree, even if we didn't use them. */
22476 ix86_add_new_builtins (int isa)
22480 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22482 if ((ix86_builtins_isa[i].isa & isa) != 0
22483 && ix86_builtins_isa[i].set_and_not_built_p)
22487 /* Don't define the builtin again. */
22488 ix86_builtins_isa[i].set_and_not_built_p = false;
22490 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22491 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
22492 type, i, BUILT_IN_MD, NULL,
22495 ix86_builtins[i] = decl;
22496 if (ix86_builtins_isa[i].const_p)
22497 TREE_READONLY (decl) = 1;
22502 /* Bits for builtin_description.flag. */
22504 /* Set when we don't support the comparison natively, and should
22505 swap_comparison in order to support it. */
22506 #define BUILTIN_DESC_SWAP_OPERANDS 1
22508 struct builtin_description
22510 const unsigned int mask;
22511 const enum insn_code icode;
22512 const char *const name;
22513 const enum ix86_builtins code;
22514 const enum rtx_code comparison;
22518 static const struct builtin_description bdesc_comi[] =
22520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
22521 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
22522 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
22523 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
22524 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
22525 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
22526 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
22527 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
22528 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
22529 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
22530 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
22531 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
22532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
22533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
22534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
22535 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
22536 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
22537 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
22538 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
22539 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
22540 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
22541 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
22542 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
22543 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
22546 static const struct builtin_description bdesc_pcmpestr[] =
22549 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
22550 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
22551 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
22552 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
22553 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
22554 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
22555 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
22558 static const struct builtin_description bdesc_pcmpistr[] =
22561 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
22562 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
22563 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
22564 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
22565 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
22566 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
22567 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
22570 /* Special builtins with variable number of arguments. */
22571 static const struct builtin_description bdesc_special_args[] =
22573 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
22574 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
22577 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22580 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22583 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22584 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22585 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22592 /* SSE or 3DNow!A */
22593 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22594 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
22597 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22600 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
22601 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22602 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
22603 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
22604 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
22605 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22607 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22608 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22611 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22614 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
22617 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22618 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
22622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
22624 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22625 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22626 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
22628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
22630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22632 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
22636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
22639 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
22643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
22644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
22645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
22646 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
22647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
22648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
22649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
22651 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
22652 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
22653 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
22654 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
22655 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
22656 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
22659 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22660 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22661 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22662 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22663 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22664 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22665 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22666 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22669 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
22670 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22671 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22674 /* Builtins with variable number of arguments. */
22675 static const struct builtin_description bdesc_args[] =
22677 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
22678 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
22679 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
22680 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22681 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22682 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22683 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22686 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22687 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22688 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22689 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22690 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22691 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22693 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22694 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22695 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22696 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22697 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22698 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22699 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22700 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22702 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22703 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22705 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22706 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22707 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22708 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22710 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22711 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22712 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22713 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22714 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22715 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22717 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22718 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22719 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22720 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22721 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
22722 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
22724 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22725 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
22726 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22728 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
22730 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22731 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22732 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22733 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22734 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22735 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22737 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22738 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22739 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22740 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22741 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22742 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22744 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22745 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22746 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22747 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22750 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22751 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22752 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22753 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22755 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22756 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22757 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22758 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22759 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22760 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22761 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22762 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22763 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22764 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22765 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22766 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22767 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22768 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22769 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22772 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22773 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22774 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22775 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22776 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22777 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22780 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
22781 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22782 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22783 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22784 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22785 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22786 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22787 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22788 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22789 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22790 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22791 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22793 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22795 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22796 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22797 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22798 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22799 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22800 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22801 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22802 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22804 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22805 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22806 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22807 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22808 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22809 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22810 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22811 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22812 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22813 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22814 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
22815 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22816 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22817 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22818 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22819 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22820 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22821 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22822 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22823 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22824 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22825 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22827 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22828 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22829 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22830 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22832 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22833 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22834 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22835 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22837 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22839 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22840 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22841 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22842 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22843 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22845 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
22846 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
22847 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
22849 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
22851 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22852 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22853 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22855 /* SSE MMX or 3Dnow!A */
22856 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22857 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22858 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22860 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22861 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22862 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22863 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22865 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
22866 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
22868 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
22871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22873 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
22874 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
22875 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
22876 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
22877 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
22878 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22879 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
22880 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
22881 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
22882 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
22883 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
22884 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
22886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
22887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
22888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
22889 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
22890 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22891 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22893 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22894 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22895 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
22896 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22897 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22899 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
22901 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22902 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22903 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22904 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22906 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22907 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
22908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22910 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22911 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22912 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22913 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22914 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22917 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22920 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22921 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22922 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22923 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
22924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22926 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22927 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22928 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22931 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22934 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22935 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22936 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22937 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22938 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22940 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22941 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22942 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22945 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22946 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22947 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22948 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22950 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22953 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22954 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22956 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
22958 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22959 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22960 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22961 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22962 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22963 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22964 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22965 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22976 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22977 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
22979 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22981 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22982 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22984 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22987 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22991 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22992 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22994 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22995 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22996 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22997 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22999 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23000 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23001 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23002 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23003 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23004 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23005 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23006 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23008 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23012 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
23015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
23016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
23020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
23021 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
23022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
23023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
23025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23026 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23027 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23028 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23029 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23030 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23031 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23033 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23034 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23035 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23036 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23037 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23038 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23039 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23041 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23042 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23043 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23044 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23046 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23047 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23048 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23050 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23052 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23053 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23055 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23058 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23059 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23062 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23063 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23065 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23066 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23067 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23068 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23069 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23070 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23073 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23074 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23075 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23076 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23077 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23078 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23080 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23081 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23082 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23083 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23084 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23085 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23086 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23087 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23088 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23089 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23090 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23091 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23092 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23093 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23094 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23095 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23096 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23097 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23098 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23099 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23100 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23101 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23102 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23103 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23106 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23107 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23110 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23111 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23112 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23113 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23114 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23115 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23116 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23117 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23118 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23119 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23121 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23122 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23123 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23124 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23125 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23126 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23127 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23128 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23129 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23130 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23131 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23132 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23133 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23135 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23136 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23137 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23138 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23139 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23140 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23141 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23142 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23143 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23144 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23145 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23146 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23149 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23150 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23151 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23152 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23154 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23155 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23156 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23159 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23160 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23161 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23162 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23163 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23166 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23167 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23168 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23169 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23172 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23173 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23175 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23176 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23177 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23178 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23181 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23184 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23185 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23186 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23187 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23188 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23189 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23190 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23191 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23192 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23193 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23194 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23195 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23196 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23197 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23198 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23199 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23200 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23201 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23202 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23203 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23204 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23205 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23206 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23207 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23208 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23209 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23213 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23214 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23216 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23217 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23218 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23219 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23220 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23221 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23222 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23224 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23225 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23228 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23232 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23235 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23236 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23239 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23240 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23241 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23242 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23244 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23245 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23246 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23247 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23252 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23253 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23255 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23256 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23257 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23259 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23267 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23268 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23271 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23272 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23273 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23274 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23275 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23276 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23278 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23279 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23280 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23281 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23284 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23286 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23287 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23288 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23289 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23290 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23291 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23292 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23294 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23295 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23297 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23300 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23301 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23302 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23303 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23306 /* FMA4 and XOP. */
23307 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23308 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23309 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23310 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23311 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23312 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23313 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23314 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23315 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23316 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23317 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23318 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23319 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23320 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23321 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23322 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23323 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23324 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23325 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23326 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23327 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23328 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23329 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23330 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23331 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23332 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23333 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23334 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23335 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23336 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23337 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23338 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23339 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23340 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23341 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23342 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23343 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23344 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23345 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23346 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23347 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23348 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23349 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23350 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23351 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23352 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23353 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23354 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23355 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23356 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23357 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23358 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23360 static const struct builtin_description bdesc_multi_arg[] =
23362 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23363 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23364 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23365 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23366 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23367 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23368 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23369 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23371 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23372 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23373 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23374 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23375 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23376 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23377 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23378 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23380 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23381 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23382 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23383 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23385 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23386 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23387 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23388 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23390 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23391 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23392 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23393 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23395 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23396 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23397 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23398 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
23492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
23493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
23495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
23499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
23500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
23501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
23503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
23504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
23507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
23508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
23509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
23511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
23512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
23515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
23516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
23517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
23519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
23520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
23523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
23524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
23525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
23527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
23531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
23532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
23533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
23535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23548 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23549 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23550 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23551 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23553 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
23554 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
23555 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
23556 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
23560 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
23561 in the current target ISA to allow the user to compile particular modules
23562 with different target specific options that differ from the command line
23565 ix86_init_mmx_sse_builtins (void)
23567 const struct builtin_description * d;
23568 enum ix86_builtin_func_type ftype;
23571 /* Add all special builtins with variable number of operands. */
23572 for (i = 0, d = bdesc_special_args;
23573 i < ARRAY_SIZE (bdesc_special_args);
23579 ftype = (enum ix86_builtin_func_type) d->flag;
23580 def_builtin (d->mask, d->name, ftype, d->code);
23583 /* Add all builtins with variable number of operands. */
23584 for (i = 0, d = bdesc_args;
23585 i < ARRAY_SIZE (bdesc_args);
23591 ftype = (enum ix86_builtin_func_type) d->flag;
23592 def_builtin_const (d->mask, d->name, ftype, d->code);
23595 /* pcmpestr[im] insns. */
23596 for (i = 0, d = bdesc_pcmpestr;
23597 i < ARRAY_SIZE (bdesc_pcmpestr);
23600 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23601 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
23603 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
23604 def_builtin_const (d->mask, d->name, ftype, d->code);
23607 /* pcmpistr[im] insns. */
23608 for (i = 0, d = bdesc_pcmpistr;
23609 i < ARRAY_SIZE (bdesc_pcmpistr);
23612 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23613 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
23615 ftype = INT_FTYPE_V16QI_V16QI_INT;
23616 def_builtin_const (d->mask, d->name, ftype, d->code);
23619 /* comi/ucomi insns. */
23620 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23622 if (d->mask == OPTION_MASK_ISA_SSE2)
23623 ftype = INT_FTYPE_V2DF_V2DF;
23625 ftype = INT_FTYPE_V4SF_V4SF;
23626 def_builtin_const (d->mask, d->name, ftype, d->code);
23630 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
23631 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
23632 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
23633 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
23635 /* SSE or 3DNow!A */
23636 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23637 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
23638 IX86_BUILTIN_MASKMOVQ);
23641 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
23642 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
23644 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
23645 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
23646 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
23647 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
23650 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
23651 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
23652 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
23653 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
23656 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
23657 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
23658 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
23659 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
23660 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
23661 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
23662 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
23663 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
23664 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
23665 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
23666 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
23667 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
23670 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
23671 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
23673 /* MMX access to the vec_init patterns. */
23674 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
23675 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
23677 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
23678 V4HI_FTYPE_HI_HI_HI_HI,
23679 IX86_BUILTIN_VEC_INIT_V4HI);
23681 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
23682 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
23683 IX86_BUILTIN_VEC_INIT_V8QI);
23685 /* Access to the vec_extract patterns. */
23686 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
23687 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
23688 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
23689 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
23690 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
23691 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
23692 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
23693 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
23694 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
23695 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
23697 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23698 "__builtin_ia32_vec_ext_v4hi",
23699 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
23701 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
23702 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
23704 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
23705 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
23707 /* Access to the vec_set patterns. */
23708 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
23709 "__builtin_ia32_vec_set_v2di",
23710 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
23712 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
23713 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
23715 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
23716 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
23718 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
23719 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
23721 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23722 "__builtin_ia32_vec_set_v4hi",
23723 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
23725 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
23726 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
23728 /* Add FMA4 multi-arg argument instructions */
23729 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23734 ftype = (enum ix86_builtin_func_type) d->flag;
23735 def_builtin_const (d->mask, d->name, ftype, d->code);
23739 /* Internal method for ix86_init_builtins. */
23742 ix86_init_builtins_va_builtins_abi (void)
23744 tree ms_va_ref, sysv_va_ref;
23745 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23746 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23747 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23748 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23752 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23753 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23754 ms_va_ref = build_reference_type (ms_va_list_type_node);
23756 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23759 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23760 fnvoid_va_start_ms =
23761 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23762 fnvoid_va_end_sysv =
23763 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23764 fnvoid_va_start_sysv =
23765 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23767 fnvoid_va_copy_ms =
23768 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23770 fnvoid_va_copy_sysv =
23771 build_function_type_list (void_type_node, sysv_va_ref,
23772 sysv_va_ref, NULL_TREE);
23774 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23775 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23776 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23777 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23778 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23779 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23780 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23781 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23782 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23783 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23784 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23785 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23789 ix86_init_builtin_types (void)
23791 tree float128_type_node, float80_type_node;
23793 /* The __float80 type. */
23794 float80_type_node = long_double_type_node;
23795 if (TYPE_MODE (float80_type_node) != XFmode)
23797 /* The __float80 type. */
23798 float80_type_node = make_node (REAL_TYPE);
23800 TYPE_PRECISION (float80_type_node) = 80;
23801 layout_type (float80_type_node);
23803 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
23805 /* The __float128 type. */
23806 float128_type_node = make_node (REAL_TYPE);
23807 TYPE_PRECISION (float128_type_node) = 128;
23808 layout_type (float128_type_node);
23809 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
23811 /* This macro is built by i386-builtin-types.awk. */
23812 DEFINE_BUILTIN_PRIMITIVE_TYPES;
23816 ix86_init_builtins (void)
23820 ix86_init_builtin_types ();
23822 /* TFmode support builtins. */
23823 def_builtin_const (0, "__builtin_infq",
23824 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
23825 def_builtin_const (0, "__builtin_huge_valq",
23826 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
23828 /* We will expand them to normal call if SSE2 isn't available since
23829 they are used by libgcc. */
23830 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
23831 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
23832 BUILT_IN_MD, "__fabstf2", NULL_TREE);
23833 TREE_READONLY (t) = 1;
23834 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
23836 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
23837 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
23838 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
23839 TREE_READONLY (t) = 1;
23840 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
23842 ix86_init_mmx_sse_builtins ();
23845 ix86_init_builtins_va_builtins_abi ();
23848 /* Return the ix86 builtin for CODE. */
23851 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
23853 if (code >= IX86_BUILTIN_MAX)
23854 return error_mark_node;
23856 return ix86_builtins[code];
23859 /* Errors in the source file can cause expand_expr to return const0_rtx
23860 where we expect a vector. To avoid crashing, use one of the vector
23861 clear instructions. */
23863 safe_vector_operand (rtx x, enum machine_mode mode)
23865 if (x == const0_rtx)
23866 x = CONST0_RTX (mode);
23870 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23873 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23876 tree arg0 = CALL_EXPR_ARG (exp, 0);
23877 tree arg1 = CALL_EXPR_ARG (exp, 1);
23878 rtx op0 = expand_normal (arg0);
23879 rtx op1 = expand_normal (arg1);
23880 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23881 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23882 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23884 if (VECTOR_MODE_P (mode0))
23885 op0 = safe_vector_operand (op0, mode0);
23886 if (VECTOR_MODE_P (mode1))
23887 op1 = safe_vector_operand (op1, mode1);
23889 if (optimize || !target
23890 || GET_MODE (target) != tmode
23891 || !insn_data[icode].operand[0].predicate (target, tmode))
23892 target = gen_reg_rtx (tmode);
23894 if (GET_MODE (op1) == SImode && mode1 == TImode)
23896 rtx x = gen_reg_rtx (V4SImode);
23897 emit_insn (gen_sse2_loadd (x, op1));
23898 op1 = gen_lowpart (TImode, x);
23901 if (!insn_data[icode].operand[1].predicate (op0, mode0))
23902 op0 = copy_to_mode_reg (mode0, op0);
23903 if (!insn_data[icode].operand[2].predicate (op1, mode1))
23904 op1 = copy_to_mode_reg (mode1, op1);
23906 pat = GEN_FCN (icode) (target, op0, op1);
23915 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23918 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23919 enum ix86_builtin_func_type m_type,
23920 enum rtx_code sub_code)
23925 bool comparison_p = false;
23927 bool last_arg_constant = false;
23928 int num_memory = 0;
23931 enum machine_mode mode;
23934 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23938 case MULTI_ARG_4_DF2_DI_I:
23939 case MULTI_ARG_4_DF2_DI_I1:
23940 case MULTI_ARG_4_SF2_SI_I:
23941 case MULTI_ARG_4_SF2_SI_I1:
23943 last_arg_constant = true;
23946 case MULTI_ARG_3_SF:
23947 case MULTI_ARG_3_DF:
23948 case MULTI_ARG_3_SF2:
23949 case MULTI_ARG_3_DF2:
23950 case MULTI_ARG_3_DI:
23951 case MULTI_ARG_3_SI:
23952 case MULTI_ARG_3_SI_DI:
23953 case MULTI_ARG_3_HI:
23954 case MULTI_ARG_3_HI_SI:
23955 case MULTI_ARG_3_QI:
23956 case MULTI_ARG_3_DI2:
23957 case MULTI_ARG_3_SI2:
23958 case MULTI_ARG_3_HI2:
23959 case MULTI_ARG_3_QI2:
23963 case MULTI_ARG_2_SF:
23964 case MULTI_ARG_2_DF:
23965 case MULTI_ARG_2_DI:
23966 case MULTI_ARG_2_SI:
23967 case MULTI_ARG_2_HI:
23968 case MULTI_ARG_2_QI:
23972 case MULTI_ARG_2_DI_IMM:
23973 case MULTI_ARG_2_SI_IMM:
23974 case MULTI_ARG_2_HI_IMM:
23975 case MULTI_ARG_2_QI_IMM:
23977 last_arg_constant = true;
23980 case MULTI_ARG_1_SF:
23981 case MULTI_ARG_1_DF:
23982 case MULTI_ARG_1_SF2:
23983 case MULTI_ARG_1_DF2:
23984 case MULTI_ARG_1_DI:
23985 case MULTI_ARG_1_SI:
23986 case MULTI_ARG_1_HI:
23987 case MULTI_ARG_1_QI:
23988 case MULTI_ARG_1_SI_DI:
23989 case MULTI_ARG_1_HI_DI:
23990 case MULTI_ARG_1_HI_SI:
23991 case MULTI_ARG_1_QI_DI:
23992 case MULTI_ARG_1_QI_SI:
23993 case MULTI_ARG_1_QI_HI:
23997 case MULTI_ARG_2_DI_CMP:
23998 case MULTI_ARG_2_SI_CMP:
23999 case MULTI_ARG_2_HI_CMP:
24000 case MULTI_ARG_2_QI_CMP:
24002 comparison_p = true;
24005 case MULTI_ARG_2_SF_TF:
24006 case MULTI_ARG_2_DF_TF:
24007 case MULTI_ARG_2_DI_TF:
24008 case MULTI_ARG_2_SI_TF:
24009 case MULTI_ARG_2_HI_TF:
24010 case MULTI_ARG_2_QI_TF:
24016 gcc_unreachable ();
24019 if (optimize || !target
24020 || GET_MODE (target) != tmode
24021 || !insn_data[icode].operand[0].predicate (target, tmode))
24022 target = gen_reg_rtx (tmode);
24024 gcc_assert (nargs <= 4);
24026 for (i = 0; i < nargs; i++)
24028 tree arg = CALL_EXPR_ARG (exp, i);
24029 rtx op = expand_normal (arg);
24030 int adjust = (comparison_p) ? 1 : 0;
24031 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24033 if (last_arg_constant && i == nargs-1)
24035 if (!CONST_INT_P (op))
24037 error ("last argument must be an immediate");
24038 return gen_reg_rtx (tmode);
24043 if (VECTOR_MODE_P (mode))
24044 op = safe_vector_operand (op, mode);
24046 /* If we aren't optimizing, only allow one memory operand to be
24048 if (memory_operand (op, mode))
24051 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24054 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24056 op = force_reg (mode, op);
24060 args[i].mode = mode;
24066 pat = GEN_FCN (icode) (target, args[0].op);
24071 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24072 GEN_INT ((int)sub_code));
24073 else if (! comparison_p)
24074 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24077 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24081 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24086 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24090 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24094 gcc_unreachable ();
24104 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24105 insns with vec_merge. */
24108 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24112 tree arg0 = CALL_EXPR_ARG (exp, 0);
24113 rtx op1, op0 = expand_normal (arg0);
24114 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24115 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24117 if (optimize || !target
24118 || GET_MODE (target) != tmode
24119 || !insn_data[icode].operand[0].predicate (target, tmode))
24120 target = gen_reg_rtx (tmode);
24122 if (VECTOR_MODE_P (mode0))
24123 op0 = safe_vector_operand (op0, mode0);
24125 if ((optimize && !register_operand (op0, mode0))
24126 || !insn_data[icode].operand[1].predicate (op0, mode0))
24127 op0 = copy_to_mode_reg (mode0, op0);
24130 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24131 op1 = copy_to_mode_reg (mode0, op1);
24133 pat = GEN_FCN (icode) (target, op0, op1);
24140 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24143 ix86_expand_sse_compare (const struct builtin_description *d,
24144 tree exp, rtx target, bool swap)
24147 tree arg0 = CALL_EXPR_ARG (exp, 0);
24148 tree arg1 = CALL_EXPR_ARG (exp, 1);
24149 rtx op0 = expand_normal (arg0);
24150 rtx op1 = expand_normal (arg1);
24152 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24153 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24154 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24155 enum rtx_code comparison = d->comparison;
24157 if (VECTOR_MODE_P (mode0))
24158 op0 = safe_vector_operand (op0, mode0);
24159 if (VECTOR_MODE_P (mode1))
24160 op1 = safe_vector_operand (op1, mode1);
24162 /* Swap operands if we have a comparison that isn't available in
24166 rtx tmp = gen_reg_rtx (mode1);
24167 emit_move_insn (tmp, op1);
24172 if (optimize || !target
24173 || GET_MODE (target) != tmode
24174 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24175 target = gen_reg_rtx (tmode);
24177 if ((optimize && !register_operand (op0, mode0))
24178 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24179 op0 = copy_to_mode_reg (mode0, op0);
24180 if ((optimize && !register_operand (op1, mode1))
24181 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24182 op1 = copy_to_mode_reg (mode1, op1);
24184 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24185 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24192 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24195 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24199 tree arg0 = CALL_EXPR_ARG (exp, 0);
24200 tree arg1 = CALL_EXPR_ARG (exp, 1);
24201 rtx op0 = expand_normal (arg0);
24202 rtx op1 = expand_normal (arg1);
24203 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24204 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24205 enum rtx_code comparison = d->comparison;
24207 if (VECTOR_MODE_P (mode0))
24208 op0 = safe_vector_operand (op0, mode0);
24209 if (VECTOR_MODE_P (mode1))
24210 op1 = safe_vector_operand (op1, mode1);
24212 /* Swap operands if we have a comparison that isn't available in
24214 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24221 target = gen_reg_rtx (SImode);
24222 emit_move_insn (target, const0_rtx);
24223 target = gen_rtx_SUBREG (QImode, target, 0);
24225 if ((optimize && !register_operand (op0, mode0))
24226 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24227 op0 = copy_to_mode_reg (mode0, op0);
24228 if ((optimize && !register_operand (op1, mode1))
24229 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24230 op1 = copy_to_mode_reg (mode1, op1);
24232 pat = GEN_FCN (d->icode) (op0, op1);
24236 emit_insn (gen_rtx_SET (VOIDmode,
24237 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24238 gen_rtx_fmt_ee (comparison, QImode,
24242 return SUBREG_REG (target);
24245 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24248 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24252 tree arg0 = CALL_EXPR_ARG (exp, 0);
24253 tree arg1 = CALL_EXPR_ARG (exp, 1);
24254 rtx op0 = expand_normal (arg0);
24255 rtx op1 = expand_normal (arg1);
24256 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24257 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24258 enum rtx_code comparison = d->comparison;
24260 if (VECTOR_MODE_P (mode0))
24261 op0 = safe_vector_operand (op0, mode0);
24262 if (VECTOR_MODE_P (mode1))
24263 op1 = safe_vector_operand (op1, mode1);
24265 target = gen_reg_rtx (SImode);
24266 emit_move_insn (target, const0_rtx);
24267 target = gen_rtx_SUBREG (QImode, target, 0);
24269 if ((optimize && !register_operand (op0, mode0))
24270 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24271 op0 = copy_to_mode_reg (mode0, op0);
24272 if ((optimize && !register_operand (op1, mode1))
24273 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24274 op1 = copy_to_mode_reg (mode1, op1);
24276 pat = GEN_FCN (d->icode) (op0, op1);
24280 emit_insn (gen_rtx_SET (VOIDmode,
24281 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24282 gen_rtx_fmt_ee (comparison, QImode,
24286 return SUBREG_REG (target);
24289 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24292 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24293 tree exp, rtx target)
24296 tree arg0 = CALL_EXPR_ARG (exp, 0);
24297 tree arg1 = CALL_EXPR_ARG (exp, 1);
24298 tree arg2 = CALL_EXPR_ARG (exp, 2);
24299 tree arg3 = CALL_EXPR_ARG (exp, 3);
24300 tree arg4 = CALL_EXPR_ARG (exp, 4);
24301 rtx scratch0, scratch1;
24302 rtx op0 = expand_normal (arg0);
24303 rtx op1 = expand_normal (arg1);
24304 rtx op2 = expand_normal (arg2);
24305 rtx op3 = expand_normal (arg3);
24306 rtx op4 = expand_normal (arg4);
24307 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24309 tmode0 = insn_data[d->icode].operand[0].mode;
24310 tmode1 = insn_data[d->icode].operand[1].mode;
24311 modev2 = insn_data[d->icode].operand[2].mode;
24312 modei3 = insn_data[d->icode].operand[3].mode;
24313 modev4 = insn_data[d->icode].operand[4].mode;
24314 modei5 = insn_data[d->icode].operand[5].mode;
24315 modeimm = insn_data[d->icode].operand[6].mode;
24317 if (VECTOR_MODE_P (modev2))
24318 op0 = safe_vector_operand (op0, modev2);
24319 if (VECTOR_MODE_P (modev4))
24320 op2 = safe_vector_operand (op2, modev4);
24322 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24323 op0 = copy_to_mode_reg (modev2, op0);
24324 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24325 op1 = copy_to_mode_reg (modei3, op1);
24326 if ((optimize && !register_operand (op2, modev4))
24327 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24328 op2 = copy_to_mode_reg (modev4, op2);
24329 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24330 op3 = copy_to_mode_reg (modei5, op3);
24332 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24334 error ("the fifth argument must be a 8-bit immediate");
24338 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24340 if (optimize || !target
24341 || GET_MODE (target) != tmode0
24342 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24343 target = gen_reg_rtx (tmode0);
24345 scratch1 = gen_reg_rtx (tmode1);
24347 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24349 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24351 if (optimize || !target
24352 || GET_MODE (target) != tmode1
24353 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24354 target = gen_reg_rtx (tmode1);
24356 scratch0 = gen_reg_rtx (tmode0);
24358 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24362 gcc_assert (d->flag);
24364 scratch0 = gen_reg_rtx (tmode0);
24365 scratch1 = gen_reg_rtx (tmode1);
24367 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24377 target = gen_reg_rtx (SImode);
24378 emit_move_insn (target, const0_rtx);
24379 target = gen_rtx_SUBREG (QImode, target, 0);
24382 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24383 gen_rtx_fmt_ee (EQ, QImode,
24384 gen_rtx_REG ((enum machine_mode) d->flag,
24387 return SUBREG_REG (target);
24394 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24397 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24398 tree exp, rtx target)
24401 tree arg0 = CALL_EXPR_ARG (exp, 0);
24402 tree arg1 = CALL_EXPR_ARG (exp, 1);
24403 tree arg2 = CALL_EXPR_ARG (exp, 2);
24404 rtx scratch0, scratch1;
24405 rtx op0 = expand_normal (arg0);
24406 rtx op1 = expand_normal (arg1);
24407 rtx op2 = expand_normal (arg2);
24408 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24410 tmode0 = insn_data[d->icode].operand[0].mode;
24411 tmode1 = insn_data[d->icode].operand[1].mode;
24412 modev2 = insn_data[d->icode].operand[2].mode;
24413 modev3 = insn_data[d->icode].operand[3].mode;
24414 modeimm = insn_data[d->icode].operand[4].mode;
24416 if (VECTOR_MODE_P (modev2))
24417 op0 = safe_vector_operand (op0, modev2);
24418 if (VECTOR_MODE_P (modev3))
24419 op1 = safe_vector_operand (op1, modev3);
24421 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24422 op0 = copy_to_mode_reg (modev2, op0);
24423 if ((optimize && !register_operand (op1, modev3))
24424 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24425 op1 = copy_to_mode_reg (modev3, op1);
24427 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24429 error ("the third argument must be a 8-bit immediate");
24433 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24435 if (optimize || !target
24436 || GET_MODE (target) != tmode0
24437 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24438 target = gen_reg_rtx (tmode0);
24440 scratch1 = gen_reg_rtx (tmode1);
24442 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24444 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24446 if (optimize || !target
24447 || GET_MODE (target) != tmode1
24448 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24449 target = gen_reg_rtx (tmode1);
24451 scratch0 = gen_reg_rtx (tmode0);
24453 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24457 gcc_assert (d->flag);
24459 scratch0 = gen_reg_rtx (tmode0);
24460 scratch1 = gen_reg_rtx (tmode1);
24462 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24472 target = gen_reg_rtx (SImode);
24473 emit_move_insn (target, const0_rtx);
24474 target = gen_rtx_SUBREG (QImode, target, 0);
24477 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24478 gen_rtx_fmt_ee (EQ, QImode,
24479 gen_rtx_REG ((enum machine_mode) d->flag,
24482 return SUBREG_REG (target);
24488 /* Subroutine of ix86_expand_builtin to take care of insns with
24489 variable number of operands. */
24492 ix86_expand_args_builtin (const struct builtin_description *d,
24493 tree exp, rtx target)
24495 rtx pat, real_target;
24496 unsigned int i, nargs;
24497 unsigned int nargs_constant = 0;
24498 int num_memory = 0;
24502 enum machine_mode mode;
24504 bool last_arg_count = false;
24505 enum insn_code icode = d->icode;
24506 const struct insn_data_d *insn_p = &insn_data[icode];
24507 enum machine_mode tmode = insn_p->operand[0].mode;
24508 enum machine_mode rmode = VOIDmode;
24510 enum rtx_code comparison = d->comparison;
24512 switch ((enum ix86_builtin_func_type) d->flag)
24514 case INT_FTYPE_V8SF_V8SF_PTEST:
24515 case INT_FTYPE_V4DI_V4DI_PTEST:
24516 case INT_FTYPE_V4DF_V4DF_PTEST:
24517 case INT_FTYPE_V4SF_V4SF_PTEST:
24518 case INT_FTYPE_V2DI_V2DI_PTEST:
24519 case INT_FTYPE_V2DF_V2DF_PTEST:
24520 return ix86_expand_sse_ptest (d, exp, target);
24521 case FLOAT128_FTYPE_FLOAT128:
24522 case FLOAT_FTYPE_FLOAT:
24523 case INT_FTYPE_INT:
24524 case UINT64_FTYPE_INT:
24525 case UINT16_FTYPE_UINT16:
24526 case INT64_FTYPE_INT64:
24527 case INT64_FTYPE_V4SF:
24528 case INT64_FTYPE_V2DF:
24529 case INT_FTYPE_V16QI:
24530 case INT_FTYPE_V8QI:
24531 case INT_FTYPE_V8SF:
24532 case INT_FTYPE_V4DF:
24533 case INT_FTYPE_V4SF:
24534 case INT_FTYPE_V2DF:
24535 case V16QI_FTYPE_V16QI:
24536 case V8SI_FTYPE_V8SF:
24537 case V8SI_FTYPE_V4SI:
24538 case V8HI_FTYPE_V8HI:
24539 case V8HI_FTYPE_V16QI:
24540 case V8QI_FTYPE_V8QI:
24541 case V8SF_FTYPE_V8SF:
24542 case V8SF_FTYPE_V8SI:
24543 case V8SF_FTYPE_V4SF:
24544 case V8SF_FTYPE_V8HI:
24545 case V4SI_FTYPE_V4SI:
24546 case V4SI_FTYPE_V16QI:
24547 case V4SI_FTYPE_V4SF:
24548 case V4SI_FTYPE_V8SI:
24549 case V4SI_FTYPE_V8HI:
24550 case V4SI_FTYPE_V4DF:
24551 case V4SI_FTYPE_V2DF:
24552 case V4HI_FTYPE_V4HI:
24553 case V4DF_FTYPE_V4DF:
24554 case V4DF_FTYPE_V4SI:
24555 case V4DF_FTYPE_V4SF:
24556 case V4DF_FTYPE_V2DF:
24557 case V4SF_FTYPE_V4SF:
24558 case V4SF_FTYPE_V4SI:
24559 case V4SF_FTYPE_V8SF:
24560 case V4SF_FTYPE_V4DF:
24561 case V4SF_FTYPE_V8HI:
24562 case V4SF_FTYPE_V2DF:
24563 case V2DI_FTYPE_V2DI:
24564 case V2DI_FTYPE_V16QI:
24565 case V2DI_FTYPE_V8HI:
24566 case V2DI_FTYPE_V4SI:
24567 case V2DF_FTYPE_V2DF:
24568 case V2DF_FTYPE_V4SI:
24569 case V2DF_FTYPE_V4DF:
24570 case V2DF_FTYPE_V4SF:
24571 case V2DF_FTYPE_V2SI:
24572 case V2SI_FTYPE_V2SI:
24573 case V2SI_FTYPE_V4SF:
24574 case V2SI_FTYPE_V2SF:
24575 case V2SI_FTYPE_V2DF:
24576 case V2SF_FTYPE_V2SF:
24577 case V2SF_FTYPE_V2SI:
24580 case V4SF_FTYPE_V4SF_VEC_MERGE:
24581 case V2DF_FTYPE_V2DF_VEC_MERGE:
24582 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24583 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24584 case V16QI_FTYPE_V16QI_V16QI:
24585 case V16QI_FTYPE_V8HI_V8HI:
24586 case V8QI_FTYPE_V8QI_V8QI:
24587 case V8QI_FTYPE_V4HI_V4HI:
24588 case V8HI_FTYPE_V8HI_V8HI:
24589 case V8HI_FTYPE_V16QI_V16QI:
24590 case V8HI_FTYPE_V4SI_V4SI:
24591 case V8SF_FTYPE_V8SF_V8SF:
24592 case V8SF_FTYPE_V8SF_V8SI:
24593 case V4SI_FTYPE_V4SI_V4SI:
24594 case V4SI_FTYPE_V8HI_V8HI:
24595 case V4SI_FTYPE_V4SF_V4SF:
24596 case V4SI_FTYPE_V2DF_V2DF:
24597 case V4HI_FTYPE_V4HI_V4HI:
24598 case V4HI_FTYPE_V8QI_V8QI:
24599 case V4HI_FTYPE_V2SI_V2SI:
24600 case V4DF_FTYPE_V4DF_V4DF:
24601 case V4DF_FTYPE_V4DF_V4DI:
24602 case V4SF_FTYPE_V4SF_V4SF:
24603 case V4SF_FTYPE_V4SF_V4SI:
24604 case V4SF_FTYPE_V4SF_V2SI:
24605 case V4SF_FTYPE_V4SF_V2DF:
24606 case V4SF_FTYPE_V4SF_DI:
24607 case V4SF_FTYPE_V4SF_SI:
24608 case V2DI_FTYPE_V2DI_V2DI:
24609 case V2DI_FTYPE_V16QI_V16QI:
24610 case V2DI_FTYPE_V4SI_V4SI:
24611 case V2DI_FTYPE_V2DI_V16QI:
24612 case V2DI_FTYPE_V2DF_V2DF:
24613 case V2SI_FTYPE_V2SI_V2SI:
24614 case V2SI_FTYPE_V4HI_V4HI:
24615 case V2SI_FTYPE_V2SF_V2SF:
24616 case V2DF_FTYPE_V2DF_V2DF:
24617 case V2DF_FTYPE_V2DF_V4SF:
24618 case V2DF_FTYPE_V2DF_V2DI:
24619 case V2DF_FTYPE_V2DF_DI:
24620 case V2DF_FTYPE_V2DF_SI:
24621 case V2SF_FTYPE_V2SF_V2SF:
24622 case V1DI_FTYPE_V1DI_V1DI:
24623 case V1DI_FTYPE_V8QI_V8QI:
24624 case V1DI_FTYPE_V2SI_V2SI:
24625 if (comparison == UNKNOWN)
24626 return ix86_expand_binop_builtin (icode, exp, target);
24629 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24630 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24631 gcc_assert (comparison != UNKNOWN);
24635 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24636 case V8HI_FTYPE_V8HI_SI_COUNT:
24637 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24638 case V4SI_FTYPE_V4SI_SI_COUNT:
24639 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24640 case V4HI_FTYPE_V4HI_SI_COUNT:
24641 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24642 case V2DI_FTYPE_V2DI_SI_COUNT:
24643 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24644 case V2SI_FTYPE_V2SI_SI_COUNT:
24645 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24646 case V1DI_FTYPE_V1DI_SI_COUNT:
24648 last_arg_count = true;
24650 case UINT64_FTYPE_UINT64_UINT64:
24651 case UINT_FTYPE_UINT_UINT:
24652 case UINT_FTYPE_UINT_USHORT:
24653 case UINT_FTYPE_UINT_UCHAR:
24654 case UINT16_FTYPE_UINT16_INT:
24655 case UINT8_FTYPE_UINT8_INT:
24658 case V2DI_FTYPE_V2DI_INT_CONVERT:
24661 nargs_constant = 1;
24663 case V8HI_FTYPE_V8HI_INT:
24664 case V8HI_FTYPE_V8SF_INT:
24665 case V8HI_FTYPE_V4SF_INT:
24666 case V8SF_FTYPE_V8SF_INT:
24667 case V4SI_FTYPE_V4SI_INT:
24668 case V4SI_FTYPE_V8SI_INT:
24669 case V4HI_FTYPE_V4HI_INT:
24670 case V4DF_FTYPE_V4DF_INT:
24671 case V4SF_FTYPE_V4SF_INT:
24672 case V4SF_FTYPE_V8SF_INT:
24673 case V2DI_FTYPE_V2DI_INT:
24674 case V2DF_FTYPE_V2DF_INT:
24675 case V2DF_FTYPE_V4DF_INT:
24677 nargs_constant = 1;
24679 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24680 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24681 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24682 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24683 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24686 case V16QI_FTYPE_V16QI_V16QI_INT:
24687 case V8HI_FTYPE_V8HI_V8HI_INT:
24688 case V8SI_FTYPE_V8SI_V8SI_INT:
24689 case V8SI_FTYPE_V8SI_V4SI_INT:
24690 case V8SF_FTYPE_V8SF_V8SF_INT:
24691 case V8SF_FTYPE_V8SF_V4SF_INT:
24692 case V4SI_FTYPE_V4SI_V4SI_INT:
24693 case V4DF_FTYPE_V4DF_V4DF_INT:
24694 case V4DF_FTYPE_V4DF_V2DF_INT:
24695 case V4SF_FTYPE_V4SF_V4SF_INT:
24696 case V2DI_FTYPE_V2DI_V2DI_INT:
24697 case V2DF_FTYPE_V2DF_V2DF_INT:
24699 nargs_constant = 1;
24701 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
24704 nargs_constant = 1;
24706 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
24709 nargs_constant = 1;
24711 case V2DI_FTYPE_V2DI_UINT_UINT:
24713 nargs_constant = 2;
24715 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
24716 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
24717 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
24718 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
24720 nargs_constant = 1;
24722 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24724 nargs_constant = 2;
24727 gcc_unreachable ();
24730 gcc_assert (nargs <= ARRAY_SIZE (args));
24732 if (comparison != UNKNOWN)
24734 gcc_assert (nargs == 2);
24735 return ix86_expand_sse_compare (d, exp, target, swap);
24738 if (rmode == VOIDmode || rmode == tmode)
24742 || GET_MODE (target) != tmode
24743 || !insn_p->operand[0].predicate (target, tmode))
24744 target = gen_reg_rtx (tmode);
24745 real_target = target;
24749 target = gen_reg_rtx (rmode);
24750 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24753 for (i = 0; i < nargs; i++)
24755 tree arg = CALL_EXPR_ARG (exp, i);
24756 rtx op = expand_normal (arg);
24757 enum machine_mode mode = insn_p->operand[i + 1].mode;
24758 bool match = insn_p->operand[i + 1].predicate (op, mode);
24760 if (last_arg_count && (i + 1) == nargs)
24762 /* SIMD shift insns take either an 8-bit immediate or
24763 register as count. But builtin functions take int as
24764 count. If count doesn't match, we put it in register. */
24767 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24768 if (!insn_p->operand[i + 1].predicate (op, mode))
24769 op = copy_to_reg (op);
24772 else if ((nargs - i) <= nargs_constant)
24777 case CODE_FOR_sse4_1_roundpd:
24778 case CODE_FOR_sse4_1_roundps:
24779 case CODE_FOR_sse4_1_roundsd:
24780 case CODE_FOR_sse4_1_roundss:
24781 case CODE_FOR_sse4_1_blendps:
24782 case CODE_FOR_avx_blendpd256:
24783 case CODE_FOR_avx_vpermilv4df:
24784 case CODE_FOR_avx_roundpd256:
24785 case CODE_FOR_avx_roundps256:
24786 error ("the last argument must be a 4-bit immediate");
24789 case CODE_FOR_sse4_1_blendpd:
24790 case CODE_FOR_avx_vpermilv2df:
24791 case CODE_FOR_xop_vpermil2v2df3:
24792 case CODE_FOR_xop_vpermil2v4sf3:
24793 case CODE_FOR_xop_vpermil2v4df3:
24794 case CODE_FOR_xop_vpermil2v8sf3:
24795 error ("the last argument must be a 2-bit immediate");
24798 case CODE_FOR_avx_vextractf128v4df:
24799 case CODE_FOR_avx_vextractf128v8sf:
24800 case CODE_FOR_avx_vextractf128v8si:
24801 case CODE_FOR_avx_vinsertf128v4df:
24802 case CODE_FOR_avx_vinsertf128v8sf:
24803 case CODE_FOR_avx_vinsertf128v8si:
24804 error ("the last argument must be a 1-bit immediate");
24807 case CODE_FOR_avx_cmpsdv2df3:
24808 case CODE_FOR_avx_cmpssv4sf3:
24809 case CODE_FOR_avx_cmppdv2df3:
24810 case CODE_FOR_avx_cmppsv4sf3:
24811 case CODE_FOR_avx_cmppdv4df3:
24812 case CODE_FOR_avx_cmppsv8sf3:
24813 error ("the last argument must be a 5-bit immediate");
24817 switch (nargs_constant)
24820 if ((nargs - i) == nargs_constant)
24822 error ("the next to last argument must be an 8-bit immediate");
24826 error ("the last argument must be an 8-bit immediate");
24829 gcc_unreachable ();
24836 if (VECTOR_MODE_P (mode))
24837 op = safe_vector_operand (op, mode);
24839 /* If we aren't optimizing, only allow one memory operand to
24841 if (memory_operand (op, mode))
24844 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24846 if (optimize || !match || num_memory > 1)
24847 op = copy_to_mode_reg (mode, op);
24851 op = copy_to_reg (op);
24852 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24857 args[i].mode = mode;
24863 pat = GEN_FCN (icode) (real_target, args[0].op);
24866 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24869 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24873 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24874 args[2].op, args[3].op);
24877 gcc_unreachable ();
24887 /* Subroutine of ix86_expand_builtin to take care of special insns
24888 with variable number of operands. */
24891 ix86_expand_special_args_builtin (const struct builtin_description *d,
24892 tree exp, rtx target)
24896 unsigned int i, nargs, arg_adjust, memory;
24900 enum machine_mode mode;
24902 enum insn_code icode = d->icode;
24903 bool last_arg_constant = false;
24904 const struct insn_data_d *insn_p = &insn_data[icode];
24905 enum machine_mode tmode = insn_p->operand[0].mode;
24906 enum { load, store } klass;
24908 switch ((enum ix86_builtin_func_type) d->flag)
24910 case VOID_FTYPE_VOID:
24911 emit_insn (GEN_FCN (icode) (target));
24913 case VOID_FTYPE_UINT64:
24914 case VOID_FTYPE_UNSIGNED:
24920 case UINT64_FTYPE_VOID:
24921 case UNSIGNED_FTYPE_VOID:
24922 case UINT16_FTYPE_VOID:
24927 case UINT64_FTYPE_PUNSIGNED:
24928 case V2DI_FTYPE_PV2DI:
24929 case V32QI_FTYPE_PCCHAR:
24930 case V16QI_FTYPE_PCCHAR:
24931 case V8SF_FTYPE_PCV4SF:
24932 case V8SF_FTYPE_PCFLOAT:
24933 case V4SF_FTYPE_PCFLOAT:
24934 case V4DF_FTYPE_PCV2DF:
24935 case V4DF_FTYPE_PCDOUBLE:
24936 case V2DF_FTYPE_PCDOUBLE:
24937 case VOID_FTYPE_PVOID:
24942 case VOID_FTYPE_PV2SF_V4SF:
24943 case VOID_FTYPE_PV4DI_V4DI:
24944 case VOID_FTYPE_PV2DI_V2DI:
24945 case VOID_FTYPE_PCHAR_V32QI:
24946 case VOID_FTYPE_PCHAR_V16QI:
24947 case VOID_FTYPE_PFLOAT_V8SF:
24948 case VOID_FTYPE_PFLOAT_V4SF:
24949 case VOID_FTYPE_PDOUBLE_V4DF:
24950 case VOID_FTYPE_PDOUBLE_V2DF:
24951 case VOID_FTYPE_PULONGLONG_ULONGLONG:
24952 case VOID_FTYPE_PINT_INT:
24955 /* Reserve memory operand for target. */
24956 memory = ARRAY_SIZE (args);
24958 case V4SF_FTYPE_V4SF_PCV2SF:
24959 case V2DF_FTYPE_V2DF_PCDOUBLE:
24964 case V8SF_FTYPE_PCV8SF_V8SF:
24965 case V4DF_FTYPE_PCV4DF_V4DF:
24966 case V4SF_FTYPE_PCV4SF_V4SF:
24967 case V2DF_FTYPE_PCV2DF_V2DF:
24972 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24973 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24974 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24975 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24978 /* Reserve memory operand for target. */
24979 memory = ARRAY_SIZE (args);
24981 case VOID_FTYPE_UINT_UINT_UINT:
24982 case VOID_FTYPE_UINT64_UINT_UINT:
24983 case UCHAR_FTYPE_UINT_UINT_UINT:
24984 case UCHAR_FTYPE_UINT64_UINT_UINT:
24987 memory = ARRAY_SIZE (args);
24988 last_arg_constant = true;
24991 gcc_unreachable ();
24994 gcc_assert (nargs <= ARRAY_SIZE (args));
24996 if (klass == store)
24998 arg = CALL_EXPR_ARG (exp, 0);
24999 op = expand_normal (arg);
25000 gcc_assert (target == 0);
25002 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
25004 target = force_reg (tmode, op);
25012 || GET_MODE (target) != tmode
25013 || !insn_p->operand[0].predicate (target, tmode))
25014 target = gen_reg_rtx (tmode);
25017 for (i = 0; i < nargs; i++)
25019 enum machine_mode mode = insn_p->operand[i + 1].mode;
25022 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25023 op = expand_normal (arg);
25024 match = insn_p->operand[i + 1].predicate (op, mode);
25026 if (last_arg_constant && (i + 1) == nargs)
25030 if (icode == CODE_FOR_lwp_lwpvalsi3
25031 || icode == CODE_FOR_lwp_lwpinssi3
25032 || icode == CODE_FOR_lwp_lwpvaldi3
25033 || icode == CODE_FOR_lwp_lwpinsdi3)
25034 error ("the last argument must be a 32-bit immediate");
25036 error ("the last argument must be an 8-bit immediate");
25044 /* This must be the memory operand. */
25045 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25046 gcc_assert (GET_MODE (op) == mode
25047 || GET_MODE (op) == VOIDmode);
25051 /* This must be register. */
25052 if (VECTOR_MODE_P (mode))
25053 op = safe_vector_operand (op, mode);
25055 gcc_assert (GET_MODE (op) == mode
25056 || GET_MODE (op) == VOIDmode);
25057 op = copy_to_mode_reg (mode, op);
25062 args[i].mode = mode;
25068 pat = GEN_FCN (icode) (target);
25071 pat = GEN_FCN (icode) (target, args[0].op);
25074 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25077 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25080 gcc_unreachable ();
25086 return klass == store ? 0 : target;
25089 /* Return the integer constant in ARG. Constrain it to be in the range
25090 of the subparts of VEC_TYPE; issue an error if not. */
25093 get_element_number (tree vec_type, tree arg)
25095 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25097 if (!host_integerp (arg, 1)
25098 || (elt = tree_low_cst (arg, 1), elt > max))
25100 error ("selector must be an integer constant in the range 0..%wi", max);
25107 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25108 ix86_expand_vector_init. We DO have language-level syntax for this, in
25109 the form of (type){ init-list }. Except that since we can't place emms
25110 instructions from inside the compiler, we can't allow the use of MMX
25111 registers unless the user explicitly asks for it. So we do *not* define
25112 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25113 we have builtins invoked by mmintrin.h that gives us license to emit
25114 these sorts of instructions. */
25117 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25119 enum machine_mode tmode = TYPE_MODE (type);
25120 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25121 int i, n_elt = GET_MODE_NUNITS (tmode);
25122 rtvec v = rtvec_alloc (n_elt);
25124 gcc_assert (VECTOR_MODE_P (tmode));
25125 gcc_assert (call_expr_nargs (exp) == n_elt);
25127 for (i = 0; i < n_elt; ++i)
25129 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25130 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25133 if (!target || !register_operand (target, tmode))
25134 target = gen_reg_rtx (tmode);
25136 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25140 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25141 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25142 had a language-level syntax for referencing vector elements. */
25145 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25147 enum machine_mode tmode, mode0;
25152 arg0 = CALL_EXPR_ARG (exp, 0);
25153 arg1 = CALL_EXPR_ARG (exp, 1);
25155 op0 = expand_normal (arg0);
25156 elt = get_element_number (TREE_TYPE (arg0), arg1);
25158 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25159 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25160 gcc_assert (VECTOR_MODE_P (mode0));
25162 op0 = force_reg (mode0, op0);
25164 if (optimize || !target || !register_operand (target, tmode))
25165 target = gen_reg_rtx (tmode);
25167 ix86_expand_vector_extract (true, target, op0, elt);
25172 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25173 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25174 a language-level syntax for referencing vector elements. */
25177 ix86_expand_vec_set_builtin (tree exp)
25179 enum machine_mode tmode, mode1;
25180 tree arg0, arg1, arg2;
25182 rtx op0, op1, target;
25184 arg0 = CALL_EXPR_ARG (exp, 0);
25185 arg1 = CALL_EXPR_ARG (exp, 1);
25186 arg2 = CALL_EXPR_ARG (exp, 2);
25188 tmode = TYPE_MODE (TREE_TYPE (arg0));
25189 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25190 gcc_assert (VECTOR_MODE_P (tmode));
25192 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25193 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25194 elt = get_element_number (TREE_TYPE (arg0), arg2);
25196 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25197 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25199 op0 = force_reg (tmode, op0);
25200 op1 = force_reg (mode1, op1);
25202 /* OP0 is the source of these builtin functions and shouldn't be
25203 modified. Create a copy, use it and return it as target. */
25204 target = gen_reg_rtx (tmode);
25205 emit_move_insn (target, op0);
25206 ix86_expand_vector_set (true, target, op1, elt);
25211 /* Expand an expression EXP that calls a built-in function,
25212 with result going to TARGET if that's convenient
25213 (and in mode MODE if that's convenient).
25214 SUBTARGET may be used as the target for computing one of EXP's operands.
25215 IGNORE is nonzero if the value is to be ignored. */
25218 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25219 enum machine_mode mode ATTRIBUTE_UNUSED,
25220 int ignore ATTRIBUTE_UNUSED)
25222 const struct builtin_description *d;
25224 enum insn_code icode;
25225 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25226 tree arg0, arg1, arg2;
25227 rtx op0, op1, op2, pat;
25228 enum machine_mode mode0, mode1, mode2;
25229 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25231 /* Determine whether the builtin function is available under the current ISA.
25232 Originally the builtin was not created if it wasn't applicable to the
25233 current ISA based on the command line switches. With function specific
25234 options, we need to check in the context of the function making the call
25235 whether it is supported. */
25236 if (ix86_builtins_isa[fcode].isa
25237 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25239 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25240 NULL, NULL, false);
25243 error ("%qE needs unknown isa option", fndecl);
25246 gcc_assert (opts != NULL);
25247 error ("%qE needs isa option %s", fndecl, opts);
25255 case IX86_BUILTIN_MASKMOVQ:
25256 case IX86_BUILTIN_MASKMOVDQU:
25257 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25258 ? CODE_FOR_mmx_maskmovq
25259 : CODE_FOR_sse2_maskmovdqu);
25260 /* Note the arg order is different from the operand order. */
25261 arg1 = CALL_EXPR_ARG (exp, 0);
25262 arg2 = CALL_EXPR_ARG (exp, 1);
25263 arg0 = CALL_EXPR_ARG (exp, 2);
25264 op0 = expand_normal (arg0);
25265 op1 = expand_normal (arg1);
25266 op2 = expand_normal (arg2);
25267 mode0 = insn_data[icode].operand[0].mode;
25268 mode1 = insn_data[icode].operand[1].mode;
25269 mode2 = insn_data[icode].operand[2].mode;
25271 op0 = force_reg (Pmode, op0);
25272 op0 = gen_rtx_MEM (mode1, op0);
25274 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25275 op0 = copy_to_mode_reg (mode0, op0);
25276 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25277 op1 = copy_to_mode_reg (mode1, op1);
25278 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25279 op2 = copy_to_mode_reg (mode2, op2);
25280 pat = GEN_FCN (icode) (op0, op1, op2);
25286 case IX86_BUILTIN_LDMXCSR:
25287 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25288 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25289 emit_move_insn (target, op0);
25290 emit_insn (gen_sse_ldmxcsr (target));
25293 case IX86_BUILTIN_STMXCSR:
25294 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25295 emit_insn (gen_sse_stmxcsr (target));
25296 return copy_to_mode_reg (SImode, target);
25298 case IX86_BUILTIN_CLFLUSH:
25299 arg0 = CALL_EXPR_ARG (exp, 0);
25300 op0 = expand_normal (arg0);
25301 icode = CODE_FOR_sse2_clflush;
25302 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25303 op0 = copy_to_mode_reg (Pmode, op0);
25305 emit_insn (gen_sse2_clflush (op0));
25308 case IX86_BUILTIN_MONITOR:
25309 arg0 = CALL_EXPR_ARG (exp, 0);
25310 arg1 = CALL_EXPR_ARG (exp, 1);
25311 arg2 = CALL_EXPR_ARG (exp, 2);
25312 op0 = expand_normal (arg0);
25313 op1 = expand_normal (arg1);
25314 op2 = expand_normal (arg2);
25316 op0 = copy_to_mode_reg (Pmode, op0);
25318 op1 = copy_to_mode_reg (SImode, op1);
25320 op2 = copy_to_mode_reg (SImode, op2);
25321 emit_insn (ix86_gen_monitor (op0, op1, op2));
25324 case IX86_BUILTIN_MWAIT:
25325 arg0 = CALL_EXPR_ARG (exp, 0);
25326 arg1 = CALL_EXPR_ARG (exp, 1);
25327 op0 = expand_normal (arg0);
25328 op1 = expand_normal (arg1);
25330 op0 = copy_to_mode_reg (SImode, op0);
25332 op1 = copy_to_mode_reg (SImode, op1);
25333 emit_insn (gen_sse3_mwait (op0, op1));
25336 case IX86_BUILTIN_VEC_INIT_V2SI:
25337 case IX86_BUILTIN_VEC_INIT_V4HI:
25338 case IX86_BUILTIN_VEC_INIT_V8QI:
25339 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25341 case IX86_BUILTIN_VEC_EXT_V2DF:
25342 case IX86_BUILTIN_VEC_EXT_V2DI:
25343 case IX86_BUILTIN_VEC_EXT_V4SF:
25344 case IX86_BUILTIN_VEC_EXT_V4SI:
25345 case IX86_BUILTIN_VEC_EXT_V8HI:
25346 case IX86_BUILTIN_VEC_EXT_V2SI:
25347 case IX86_BUILTIN_VEC_EXT_V4HI:
25348 case IX86_BUILTIN_VEC_EXT_V16QI:
25349 return ix86_expand_vec_ext_builtin (exp, target);
25351 case IX86_BUILTIN_VEC_SET_V2DI:
25352 case IX86_BUILTIN_VEC_SET_V4SF:
25353 case IX86_BUILTIN_VEC_SET_V4SI:
25354 case IX86_BUILTIN_VEC_SET_V8HI:
25355 case IX86_BUILTIN_VEC_SET_V4HI:
25356 case IX86_BUILTIN_VEC_SET_V16QI:
25357 return ix86_expand_vec_set_builtin (exp);
25359 case IX86_BUILTIN_VEC_PERM_V2DF:
25360 case IX86_BUILTIN_VEC_PERM_V4SF:
25361 case IX86_BUILTIN_VEC_PERM_V2DI:
25362 case IX86_BUILTIN_VEC_PERM_V4SI:
25363 case IX86_BUILTIN_VEC_PERM_V8HI:
25364 case IX86_BUILTIN_VEC_PERM_V16QI:
25365 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25366 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25367 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25368 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25369 case IX86_BUILTIN_VEC_PERM_V4DF:
25370 case IX86_BUILTIN_VEC_PERM_V8SF:
25371 return ix86_expand_vec_perm_builtin (exp);
25373 case IX86_BUILTIN_INFQ:
25374 case IX86_BUILTIN_HUGE_VALQ:
25376 REAL_VALUE_TYPE inf;
25380 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25382 tmp = validize_mem (force_const_mem (mode, tmp));
25385 target = gen_reg_rtx (mode);
25387 emit_move_insn (target, tmp);
25391 case IX86_BUILTIN_LLWPCB:
25392 arg0 = CALL_EXPR_ARG (exp, 0);
25393 op0 = expand_normal (arg0);
25394 icode = CODE_FOR_lwp_llwpcb;
25395 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25396 op0 = copy_to_mode_reg (Pmode, op0);
25397 emit_insn (gen_lwp_llwpcb (op0));
25400 case IX86_BUILTIN_SLWPCB:
25401 icode = CODE_FOR_lwp_slwpcb;
25403 || !insn_data[icode].operand[0].predicate (target, Pmode))
25404 target = gen_reg_rtx (Pmode);
25405 emit_insn (gen_lwp_slwpcb (target));
25412 for (i = 0, d = bdesc_special_args;
25413 i < ARRAY_SIZE (bdesc_special_args);
25415 if (d->code == fcode)
25416 return ix86_expand_special_args_builtin (d, exp, target);
25418 for (i = 0, d = bdesc_args;
25419 i < ARRAY_SIZE (bdesc_args);
25421 if (d->code == fcode)
25424 case IX86_BUILTIN_FABSQ:
25425 case IX86_BUILTIN_COPYSIGNQ:
25427 /* Emit a normal call if SSE2 isn't available. */
25428 return expand_call (exp, target, ignore);
25430 return ix86_expand_args_builtin (d, exp, target);
25433 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25434 if (d->code == fcode)
25435 return ix86_expand_sse_comi (d, exp, target);
25437 for (i = 0, d = bdesc_pcmpestr;
25438 i < ARRAY_SIZE (bdesc_pcmpestr);
25440 if (d->code == fcode)
25441 return ix86_expand_sse_pcmpestr (d, exp, target);
25443 for (i = 0, d = bdesc_pcmpistr;
25444 i < ARRAY_SIZE (bdesc_pcmpistr);
25446 if (d->code == fcode)
25447 return ix86_expand_sse_pcmpistr (d, exp, target);
25449 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25450 if (d->code == fcode)
25451 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25452 (enum ix86_builtin_func_type)
25453 d->flag, d->comparison);
25455 gcc_unreachable ();
25458 /* Returns a function decl for a vectorized version of the builtin function
25459 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25460 if it is not available. */
25463 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25466 enum machine_mode in_mode, out_mode;
25468 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25470 if (TREE_CODE (type_out) != VECTOR_TYPE
25471 || TREE_CODE (type_in) != VECTOR_TYPE
25472 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25475 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25476 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25477 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25478 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25482 case BUILT_IN_SQRT:
25483 if (out_mode == DFmode && out_n == 2
25484 && in_mode == DFmode && in_n == 2)
25485 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25488 case BUILT_IN_SQRTF:
25489 if (out_mode == SFmode && out_n == 4
25490 && in_mode == SFmode && in_n == 4)
25491 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25494 case BUILT_IN_LRINT:
25495 if (out_mode == SImode && out_n == 4
25496 && in_mode == DFmode && in_n == 2)
25497 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25500 case BUILT_IN_LRINTF:
25501 if (out_mode == SImode && out_n == 4
25502 && in_mode == SFmode && in_n == 4)
25503 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25506 case BUILT_IN_COPYSIGN:
25507 if (out_mode == DFmode && out_n == 2
25508 && in_mode == DFmode && in_n == 2)
25509 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
25512 case BUILT_IN_COPYSIGNF:
25513 if (out_mode == SFmode && out_n == 4
25514 && in_mode == SFmode && in_n == 4)
25515 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
25522 /* Dispatch to a handler for a vectorization library. */
25523 if (ix86_veclib_handler)
25524 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
25530 /* Handler for an SVML-style interface to
25531 a library with vectorized intrinsics. */
25534 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25537 tree fntype, new_fndecl, args;
25540 enum machine_mode el_mode, in_mode;
25543 /* The SVML is suitable for unsafe math only. */
25544 if (!flag_unsafe_math_optimizations)
25547 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25548 n = TYPE_VECTOR_SUBPARTS (type_out);
25549 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25550 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25551 if (el_mode != in_mode
25559 case BUILT_IN_LOG10:
25561 case BUILT_IN_TANH:
25563 case BUILT_IN_ATAN:
25564 case BUILT_IN_ATAN2:
25565 case BUILT_IN_ATANH:
25566 case BUILT_IN_CBRT:
25567 case BUILT_IN_SINH:
25569 case BUILT_IN_ASINH:
25570 case BUILT_IN_ASIN:
25571 case BUILT_IN_COSH:
25573 case BUILT_IN_ACOSH:
25574 case BUILT_IN_ACOS:
25575 if (el_mode != DFmode || n != 2)
25579 case BUILT_IN_EXPF:
25580 case BUILT_IN_LOGF:
25581 case BUILT_IN_LOG10F:
25582 case BUILT_IN_POWF:
25583 case BUILT_IN_TANHF:
25584 case BUILT_IN_TANF:
25585 case BUILT_IN_ATANF:
25586 case BUILT_IN_ATAN2F:
25587 case BUILT_IN_ATANHF:
25588 case BUILT_IN_CBRTF:
25589 case BUILT_IN_SINHF:
25590 case BUILT_IN_SINF:
25591 case BUILT_IN_ASINHF:
25592 case BUILT_IN_ASINF:
25593 case BUILT_IN_COSHF:
25594 case BUILT_IN_COSF:
25595 case BUILT_IN_ACOSHF:
25596 case BUILT_IN_ACOSF:
25597 if (el_mode != SFmode || n != 4)
25605 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25607 if (fn == BUILT_IN_LOGF)
25608 strcpy (name, "vmlsLn4");
25609 else if (fn == BUILT_IN_LOG)
25610 strcpy (name, "vmldLn2");
25613 sprintf (name, "vmls%s", bname+10);
25614 name[strlen (name)-1] = '4';
25617 sprintf (name, "vmld%s2", bname+10);
25619 /* Convert to uppercase. */
25623 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25624 args = TREE_CHAIN (args))
25628 fntype = build_function_type_list (type_out, type_in, NULL);
25630 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25632 /* Build a function declaration for the vectorized function. */
25633 new_fndecl = build_decl (BUILTINS_LOCATION,
25634 FUNCTION_DECL, get_identifier (name), fntype);
25635 TREE_PUBLIC (new_fndecl) = 1;
25636 DECL_EXTERNAL (new_fndecl) = 1;
25637 DECL_IS_NOVOPS (new_fndecl) = 1;
25638 TREE_READONLY (new_fndecl) = 1;
25643 /* Handler for an ACML-style interface to
25644 a library with vectorized intrinsics. */
25647 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25649 char name[20] = "__vr.._";
25650 tree fntype, new_fndecl, args;
25653 enum machine_mode el_mode, in_mode;
25656 /* The ACML is 64bits only and suitable for unsafe math only as
25657 it does not correctly support parts of IEEE with the required
25658 precision such as denormals. */
25660 || !flag_unsafe_math_optimizations)
25663 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25664 n = TYPE_VECTOR_SUBPARTS (type_out);
25665 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25666 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25667 if (el_mode != in_mode
25677 case BUILT_IN_LOG2:
25678 case BUILT_IN_LOG10:
25681 if (el_mode != DFmode
25686 case BUILT_IN_SINF:
25687 case BUILT_IN_COSF:
25688 case BUILT_IN_EXPF:
25689 case BUILT_IN_POWF:
25690 case BUILT_IN_LOGF:
25691 case BUILT_IN_LOG2F:
25692 case BUILT_IN_LOG10F:
25695 if (el_mode != SFmode
25704 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25705 sprintf (name + 7, "%s", bname+10);
25708 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25709 args = TREE_CHAIN (args))
25713 fntype = build_function_type_list (type_out, type_in, NULL);
25715 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25717 /* Build a function declaration for the vectorized function. */
25718 new_fndecl = build_decl (BUILTINS_LOCATION,
25719 FUNCTION_DECL, get_identifier (name), fntype);
25720 TREE_PUBLIC (new_fndecl) = 1;
25721 DECL_EXTERNAL (new_fndecl) = 1;
25722 DECL_IS_NOVOPS (new_fndecl) = 1;
25723 TREE_READONLY (new_fndecl) = 1;
25729 /* Returns a decl of a function that implements conversion of an integer vector
25730 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
25731 are the types involved when converting according to CODE.
25732 Return NULL_TREE if it is not available. */
25735 ix86_vectorize_builtin_conversion (unsigned int code,
25736 tree dest_type, tree src_type)
25744 switch (TYPE_MODE (src_type))
25747 switch (TYPE_MODE (dest_type))
25750 return (TYPE_UNSIGNED (src_type)
25751 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
25752 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25754 return (TYPE_UNSIGNED (src_type)
25756 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
25762 switch (TYPE_MODE (dest_type))
25765 return (TYPE_UNSIGNED (src_type)
25767 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25776 case FIX_TRUNC_EXPR:
25777 switch (TYPE_MODE (dest_type))
25780 switch (TYPE_MODE (src_type))
25783 return (TYPE_UNSIGNED (dest_type)
25785 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
25787 return (TYPE_UNSIGNED (dest_type)
25789 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
25796 switch (TYPE_MODE (src_type))
25799 return (TYPE_UNSIGNED (dest_type)
25801 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
25818 /* Returns a code for a target-specific builtin that implements
25819 reciprocal of the function, or NULL_TREE if not available. */
25822 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25823 bool sqrt ATTRIBUTE_UNUSED)
25825 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
25826 && flag_finite_math_only && !flag_trapping_math
25827 && flag_unsafe_math_optimizations))
25831 /* Machine dependent builtins. */
25834 /* Vectorized version of sqrt to rsqrt conversion. */
25835 case IX86_BUILTIN_SQRTPS_NR:
25836 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25842 /* Normal builtins. */
25845 /* Sqrt to rsqrt conversion. */
25846 case BUILT_IN_SQRTF:
25847 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25854 /* Helper for avx_vpermilps256_operand et al. This is also used by
25855 the expansion functions to turn the parallel back into a mask.
25856 The return value is 0 for no match and the imm8+1 for a match. */
25859 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
25861 unsigned i, nelt = GET_MODE_NUNITS (mode);
25863 unsigned char ipar[8];
25865 if (XVECLEN (par, 0) != (int) nelt)
25868 /* Validate that all of the elements are constants, and not totally
25869 out of range. Copy the data into an integral array to make the
25870 subsequent checks easier. */
25871 for (i = 0; i < nelt; ++i)
25873 rtx er = XVECEXP (par, 0, i);
25874 unsigned HOST_WIDE_INT ei;
25876 if (!CONST_INT_P (er))
25887 /* In the 256-bit DFmode case, we can only move elements within
25889 for (i = 0; i < 2; ++i)
25893 mask |= ipar[i] << i;
25895 for (i = 2; i < 4; ++i)
25899 mask |= (ipar[i] - 2) << i;
25904 /* In the 256-bit SFmode case, we have full freedom of movement
25905 within the low 128-bit lane, but the high 128-bit lane must
25906 mirror the exact same pattern. */
25907 for (i = 0; i < 4; ++i)
25908 if (ipar[i] + 4 != ipar[i + 4])
25915 /* In the 128-bit case, we've full freedom in the placement of
25916 the elements from the source operand. */
25917 for (i = 0; i < nelt; ++i)
25918 mask |= ipar[i] << (i * (nelt / 2));
25922 gcc_unreachable ();
25925 /* Make sure success has a non-zero value by adding one. */
25929 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
25930 the expansion functions to turn the parallel back into a mask.
25931 The return value is 0 for no match and the imm8+1 for a match. */
25934 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
25936 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
25938 unsigned char ipar[8];
25940 if (XVECLEN (par, 0) != (int) nelt)
25943 /* Validate that all of the elements are constants, and not totally
25944 out of range. Copy the data into an integral array to make the
25945 subsequent checks easier. */
25946 for (i = 0; i < nelt; ++i)
25948 rtx er = XVECEXP (par, 0, i);
25949 unsigned HOST_WIDE_INT ei;
25951 if (!CONST_INT_P (er))
25954 if (ei >= 2 * nelt)
25959 /* Validate that the halves of the permute are halves. */
25960 for (i = 0; i < nelt2 - 1; ++i)
25961 if (ipar[i] + 1 != ipar[i + 1])
25963 for (i = nelt2; i < nelt - 1; ++i)
25964 if (ipar[i] + 1 != ipar[i + 1])
25967 /* Reconstruct the mask. */
25968 for (i = 0; i < 2; ++i)
25970 unsigned e = ipar[i * nelt2];
25974 mask |= e << (i * 4);
25977 /* Make sure success has a non-zero value by adding one. */
25982 /* Store OPERAND to the memory after reload is completed. This means
25983 that we can't easily use assign_stack_local. */
25985 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25989 gcc_assert (reload_completed);
25990 if (ix86_using_red_zone ())
25992 result = gen_rtx_MEM (mode,
25993 gen_rtx_PLUS (Pmode,
25995 GEN_INT (-RED_ZONE_SIZE)));
25996 emit_move_insn (result, operand);
25998 else if (TARGET_64BIT)
26004 operand = gen_lowpart (DImode, operand);
26008 gen_rtx_SET (VOIDmode,
26009 gen_rtx_MEM (DImode,
26010 gen_rtx_PRE_DEC (DImode,
26011 stack_pointer_rtx)),
26015 gcc_unreachable ();
26017 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26026 split_di (&operand, 1, operands, operands + 1);
26028 gen_rtx_SET (VOIDmode,
26029 gen_rtx_MEM (SImode,
26030 gen_rtx_PRE_DEC (Pmode,
26031 stack_pointer_rtx)),
26034 gen_rtx_SET (VOIDmode,
26035 gen_rtx_MEM (SImode,
26036 gen_rtx_PRE_DEC (Pmode,
26037 stack_pointer_rtx)),
26042 /* Store HImodes as SImodes. */
26043 operand = gen_lowpart (SImode, operand);
26047 gen_rtx_SET (VOIDmode,
26048 gen_rtx_MEM (GET_MODE (operand),
26049 gen_rtx_PRE_DEC (SImode,
26050 stack_pointer_rtx)),
26054 gcc_unreachable ();
26056 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26061 /* Free operand from the memory. */
26063 ix86_free_from_memory (enum machine_mode mode)
26065 if (!ix86_using_red_zone ())
26069 if (mode == DImode || TARGET_64BIT)
26073 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26074 to pop or add instruction if registers are available. */
26075 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26076 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26081 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26082 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26084 static const reg_class_t *
26085 i386_ira_cover_classes (void)
26087 static const reg_class_t sse_fpmath_classes[] = {
26088 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26090 static const reg_class_t no_sse_fpmath_classes[] = {
26091 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26094 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26097 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26098 QImode must go into class Q_REGS.
26099 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26100 movdf to do mem-to-mem moves through integer regs. */
26102 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26104 enum machine_mode mode = GET_MODE (x);
26106 /* We're only allowed to return a subclass of CLASS. Many of the
26107 following checks fail for NO_REGS, so eliminate that early. */
26108 if (regclass == NO_REGS)
26111 /* All classes can load zeros. */
26112 if (x == CONST0_RTX (mode))
26115 /* Force constants into memory if we are loading a (nonzero) constant into
26116 an MMX or SSE register. This is because there are no MMX/SSE instructions
26117 to load from a constant. */
26119 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26122 /* Prefer SSE regs only, if we can use them for math. */
26123 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26124 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26126 /* Floating-point constants need more complex checks. */
26127 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26129 /* General regs can load everything. */
26130 if (reg_class_subset_p (regclass, GENERAL_REGS))
26133 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26134 zero above. We only want to wind up preferring 80387 registers if
26135 we plan on doing computation with them. */
26137 && standard_80387_constant_p (x))
26139 /* Limit class to non-sse. */
26140 if (regclass == FLOAT_SSE_REGS)
26142 if (regclass == FP_TOP_SSE_REGS)
26144 if (regclass == FP_SECOND_SSE_REGS)
26145 return FP_SECOND_REG;
26146 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26153 /* Generally when we see PLUS here, it's the function invariant
26154 (plus soft-fp const_int). Which can only be computed into general
26156 if (GET_CODE (x) == PLUS)
26157 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26159 /* QImode constants are easy to load, but non-constant QImode data
26160 must go into Q_REGS. */
26161 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26163 if (reg_class_subset_p (regclass, Q_REGS))
26165 if (reg_class_subset_p (Q_REGS, regclass))
26173 /* Discourage putting floating-point values in SSE registers unless
26174 SSE math is being used, and likewise for the 387 registers. */
26176 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26178 enum machine_mode mode = GET_MODE (x);
26180 /* Restrict the output reload class to the register bank that we are doing
26181 math on. If we would like not to return a subset of CLASS, reject this
26182 alternative: if reload cannot do this, it will still use its choice. */
26183 mode = GET_MODE (x);
26184 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26185 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26187 if (X87_FLOAT_MODE_P (mode))
26189 if (regclass == FP_TOP_SSE_REGS)
26191 else if (regclass == FP_SECOND_SSE_REGS)
26192 return FP_SECOND_REG;
26194 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26201 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26202 enum machine_mode mode,
26203 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26205 /* QImode spills from non-QI registers require
26206 intermediate register on 32bit targets. */
26207 if (!in_p && mode == QImode && !TARGET_64BIT
26208 && (rclass == GENERAL_REGS
26209 || rclass == LEGACY_REGS
26210 || rclass == INDEX_REGS))
26219 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26220 regno = true_regnum (x);
26222 /* Return Q_REGS if the operand is in memory. */
26230 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. */
26233 ix86_class_likely_spilled_p (reg_class_t rclass)
26244 case SSE_FIRST_REG:
26246 case FP_SECOND_REG:
26256 /* If we are copying between general and FP registers, we need a memory
26257 location. The same is true for SSE and MMX registers.
26259 To optimize register_move_cost performance, allow inline variant.
26261 The macro can't work reliably when one of the CLASSES is class containing
26262 registers from multiple units (SSE, MMX, integer). We avoid this by never
26263 combining those units in single alternative in the machine description.
26264 Ensure that this constraint holds to avoid unexpected surprises.
26266 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26267 enforce these sanity checks. */
26270 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26271 enum machine_mode mode, int strict)
26273 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26274 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26275 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26276 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26277 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26278 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26280 gcc_assert (!strict);
26284 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26287 /* ??? This is a lie. We do have moves between mmx/general, and for
26288 mmx/sse2. But by saying we need secondary memory we discourage the
26289 register allocator from using the mmx registers unless needed. */
26290 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26293 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26295 /* SSE1 doesn't have any direct moves from other classes. */
26299 /* If the target says that inter-unit moves are more expensive
26300 than moving through memory, then don't generate them. */
26301 if (!TARGET_INTER_UNIT_MOVES)
26304 /* Between SSE and general, we have moves no larger than word size. */
26305 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26313 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26314 enum machine_mode mode, int strict)
26316 return inline_secondary_memory_needed (class1, class2, mode, strict);
26319 /* Return true if the registers in CLASS cannot represent the change from
26320 modes FROM to TO. */
26323 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26324 enum reg_class regclass)
26329 /* x87 registers can't do subreg at all, as all values are reformatted
26330 to extended precision. */
26331 if (MAYBE_FLOAT_CLASS_P (regclass))
26334 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26336 /* Vector registers do not support QI or HImode loads. If we don't
26337 disallow a change to these modes, reload will assume it's ok to
26338 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26339 the vec_dupv4hi pattern. */
26340 if (GET_MODE_SIZE (from) < 4)
26343 /* Vector registers do not support subreg with nonzero offsets, which
26344 are otherwise valid for integer registers. Since we can't see
26345 whether we have a nonzero offset from here, prohibit all
26346 nonparadoxical subregs changing size. */
26347 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26354 /* Return the cost of moving data of mode M between a
26355 register and memory. A value of 2 is the default; this cost is
26356 relative to those in `REGISTER_MOVE_COST'.
26358 This function is used extensively by register_move_cost that is used to
26359 build tables at startup. Make it inline in this case.
26360 When IN is 2, return maximum of in and out move cost.
26362 If moving between registers and memory is more expensive than
26363 between two registers, you should define this macro to express the
26366 Model also increased moving costs of QImode registers in non
26370 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26374 if (FLOAT_CLASS_P (regclass))
26392 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26393 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26395 if (SSE_CLASS_P (regclass))
26398 switch (GET_MODE_SIZE (mode))
26413 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26414 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26416 if (MMX_CLASS_P (regclass))
26419 switch (GET_MODE_SIZE (mode))
26431 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26432 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26434 switch (GET_MODE_SIZE (mode))
26437 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26440 return ix86_cost->int_store[0];
26441 if (TARGET_PARTIAL_REG_DEPENDENCY
26442 && optimize_function_for_speed_p (cfun))
26443 cost = ix86_cost->movzbl_load;
26445 cost = ix86_cost->int_load[0];
26447 return MAX (cost, ix86_cost->int_store[0]);
26453 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26455 return ix86_cost->movzbl_load;
26457 return ix86_cost->int_store[0] + 4;
26462 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26463 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26465 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26466 if (mode == TFmode)
26469 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26471 cost = ix86_cost->int_load[2];
26473 cost = ix86_cost->int_store[2];
26474 return (cost * (((int) GET_MODE_SIZE (mode)
26475 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26480 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26483 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26487 /* Return the cost of moving data from a register in class CLASS1 to
26488 one in class CLASS2.
26490 It is not required that the cost always equal 2 when FROM is the same as TO;
26491 on some machines it is expensive to move between registers if they are not
26492 general registers. */
26495 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
26496 reg_class_t class2_i)
26498 enum reg_class class1 = (enum reg_class) class1_i;
26499 enum reg_class class2 = (enum reg_class) class2_i;
26501 /* In case we require secondary memory, compute cost of the store followed
26502 by load. In order to avoid bad register allocation choices, we need
26503 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26505 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26509 cost += inline_memory_move_cost (mode, class1, 2);
26510 cost += inline_memory_move_cost (mode, class2, 2);
26512 /* In case of copying from general_purpose_register we may emit multiple
26513 stores followed by single load causing memory size mismatch stall.
26514 Count this as arbitrarily high cost of 20. */
26515 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26518 /* In the case of FP/MMX moves, the registers actually overlap, and we
26519 have to switch modes in order to treat them differently. */
26520 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26521 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26527 /* Moves between SSE/MMX and integer unit are expensive. */
26528 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26529 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26531 /* ??? By keeping returned value relatively high, we limit the number
26532 of moves between integer and MMX/SSE registers for all targets.
26533 Additionally, high value prevents problem with x86_modes_tieable_p(),
26534 where integer modes in MMX/SSE registers are not tieable
26535 because of missing QImode and HImode moves to, from or between
26536 MMX/SSE registers. */
26537 return MAX (8, ix86_cost->mmxsse_to_integer);
26539 if (MAYBE_FLOAT_CLASS_P (class1))
26540 return ix86_cost->fp_move;
26541 if (MAYBE_SSE_CLASS_P (class1))
26542 return ix86_cost->sse_move;
26543 if (MAYBE_MMX_CLASS_P (class1))
26544 return ix86_cost->mmx_move;
26548 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26551 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26553 /* Flags and only flags can only hold CCmode values. */
26554 if (CC_REGNO_P (regno))
26555 return GET_MODE_CLASS (mode) == MODE_CC;
26556 if (GET_MODE_CLASS (mode) == MODE_CC
26557 || GET_MODE_CLASS (mode) == MODE_RANDOM
26558 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26560 if (FP_REGNO_P (regno))
26561 return VALID_FP_MODE_P (mode);
26562 if (SSE_REGNO_P (regno))
26564 /* We implement the move patterns for all vector modes into and
26565 out of SSE registers, even when no operation instructions
26566 are available. OImode move is available only when AVX is
26568 return ((TARGET_AVX && mode == OImode)
26569 || VALID_AVX256_REG_MODE (mode)
26570 || VALID_SSE_REG_MODE (mode)
26571 || VALID_SSE2_REG_MODE (mode)
26572 || VALID_MMX_REG_MODE (mode)
26573 || VALID_MMX_REG_MODE_3DNOW (mode));
26575 if (MMX_REGNO_P (regno))
26577 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26578 so if the register is available at all, then we can move data of
26579 the given mode into or out of it. */
26580 return (VALID_MMX_REG_MODE (mode)
26581 || VALID_MMX_REG_MODE_3DNOW (mode));
26584 if (mode == QImode)
26586 /* Take care for QImode values - they can be in non-QI regs,
26587 but then they do cause partial register stalls. */
26588 if (regno <= BX_REG || TARGET_64BIT)
26590 if (!TARGET_PARTIAL_REG_STALL)
26592 return reload_in_progress || reload_completed;
26594 /* We handle both integer and floats in the general purpose registers. */
26595 else if (VALID_INT_MODE_P (mode))
26597 else if (VALID_FP_MODE_P (mode))
26599 else if (VALID_DFP_MODE_P (mode))
26601 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26602 on to use that value in smaller contexts, this can easily force a
26603 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26604 supporting DImode, allow it. */
26605 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26611 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26612 tieable integer mode. */
26615 ix86_tieable_integer_mode_p (enum machine_mode mode)
26624 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26627 return TARGET_64BIT;
26634 /* Return true if MODE1 is accessible in a register that can hold MODE2
26635 without copying. That is, all register classes that can hold MODE2
26636 can also hold MODE1. */
26639 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26641 if (mode1 == mode2)
26644 if (ix86_tieable_integer_mode_p (mode1)
26645 && ix86_tieable_integer_mode_p (mode2))
26648 /* MODE2 being XFmode implies fp stack or general regs, which means we
26649 can tie any smaller floating point modes to it. Note that we do not
26650 tie this with TFmode. */
26651 if (mode2 == XFmode)
26652 return mode1 == SFmode || mode1 == DFmode;
26654 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26655 that we can tie it with SFmode. */
26656 if (mode2 == DFmode)
26657 return mode1 == SFmode;
26659 /* If MODE2 is only appropriate for an SSE register, then tie with
26660 any other mode acceptable to SSE registers. */
26661 if (GET_MODE_SIZE (mode2) == 16
26662 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26663 return (GET_MODE_SIZE (mode1) == 16
26664 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26666 /* If MODE2 is appropriate for an MMX register, then tie
26667 with any other mode acceptable to MMX registers. */
26668 if (GET_MODE_SIZE (mode2) == 8
26669 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26670 return (GET_MODE_SIZE (mode1) == 8
26671 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26676 /* Compute a (partial) cost for rtx X. Return true if the complete
26677 cost has been computed, and false if subexpressions should be
26678 scanned. In either case, *TOTAL contains the cost result. */
26681 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26683 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26684 enum machine_mode mode = GET_MODE (x);
26685 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26693 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26695 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26697 else if (flag_pic && SYMBOLIC_CONST (x)
26699 || (!GET_CODE (x) != LABEL_REF
26700 && (GET_CODE (x) != SYMBOL_REF
26701 || !SYMBOL_REF_LOCAL_P (x)))))
26708 if (mode == VOIDmode)
26711 switch (standard_80387_constant_p (x))
26716 default: /* Other constants */
26721 /* Start with (MEM (SYMBOL_REF)), since that's where
26722 it'll probably end up. Add a penalty for size. */
26723 *total = (COSTS_N_INSNS (1)
26724 + (flag_pic != 0 && !TARGET_64BIT)
26725 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26731 /* The zero extensions is often completely free on x86_64, so make
26732 it as cheap as possible. */
26733 if (TARGET_64BIT && mode == DImode
26734 && GET_MODE (XEXP (x, 0)) == SImode)
26736 else if (TARGET_ZERO_EXTEND_WITH_AND)
26737 *total = cost->add;
26739 *total = cost->movzx;
26743 *total = cost->movsx;
26747 if (CONST_INT_P (XEXP (x, 1))
26748 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26750 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26753 *total = cost->add;
26756 if ((value == 2 || value == 3)
26757 && cost->lea <= cost->shift_const)
26759 *total = cost->lea;
26769 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26771 if (CONST_INT_P (XEXP (x, 1)))
26773 if (INTVAL (XEXP (x, 1)) > 32)
26774 *total = cost->shift_const + COSTS_N_INSNS (2);
26776 *total = cost->shift_const * 2;
26780 if (GET_CODE (XEXP (x, 1)) == AND)
26781 *total = cost->shift_var * 2;
26783 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26788 if (CONST_INT_P (XEXP (x, 1)))
26789 *total = cost->shift_const;
26791 *total = cost->shift_var;
26796 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26798 /* ??? SSE scalar cost should be used here. */
26799 *total = cost->fmul;
26802 else if (X87_FLOAT_MODE_P (mode))
26804 *total = cost->fmul;
26807 else if (FLOAT_MODE_P (mode))
26809 /* ??? SSE vector cost should be used here. */
26810 *total = cost->fmul;
26815 rtx op0 = XEXP (x, 0);
26816 rtx op1 = XEXP (x, 1);
26818 if (CONST_INT_P (XEXP (x, 1)))
26820 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26821 for (nbits = 0; value != 0; value &= value - 1)
26825 /* This is arbitrary. */
26828 /* Compute costs correctly for widening multiplication. */
26829 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26830 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26831 == GET_MODE_SIZE (mode))
26833 int is_mulwiden = 0;
26834 enum machine_mode inner_mode = GET_MODE (op0);
26836 if (GET_CODE (op0) == GET_CODE (op1))
26837 is_mulwiden = 1, op1 = XEXP (op1, 0);
26838 else if (CONST_INT_P (op1))
26840 if (GET_CODE (op0) == SIGN_EXTEND)
26841 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26844 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26848 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26851 *total = (cost->mult_init[MODE_INDEX (mode)]
26852 + nbits * cost->mult_bit
26853 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26862 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26863 /* ??? SSE cost should be used here. */
26864 *total = cost->fdiv;
26865 else if (X87_FLOAT_MODE_P (mode))
26866 *total = cost->fdiv;
26867 else if (FLOAT_MODE_P (mode))
26868 /* ??? SSE vector cost should be used here. */
26869 *total = cost->fdiv;
26871 *total = cost->divide[MODE_INDEX (mode)];
26875 if (GET_MODE_CLASS (mode) == MODE_INT
26876 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26878 if (GET_CODE (XEXP (x, 0)) == PLUS
26879 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26880 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26881 && CONSTANT_P (XEXP (x, 1)))
26883 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26884 if (val == 2 || val == 4 || val == 8)
26886 *total = cost->lea;
26887 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26888 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26889 outer_code, speed);
26890 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26894 else if (GET_CODE (XEXP (x, 0)) == MULT
26895 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26897 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26898 if (val == 2 || val == 4 || val == 8)
26900 *total = cost->lea;
26901 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26902 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26906 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26908 *total = cost->lea;
26909 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26910 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26911 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26918 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26920 /* ??? SSE cost should be used here. */
26921 *total = cost->fadd;
26924 else if (X87_FLOAT_MODE_P (mode))
26926 *total = cost->fadd;
26929 else if (FLOAT_MODE_P (mode))
26931 /* ??? SSE vector cost should be used here. */
26932 *total = cost->fadd;
26940 if (!TARGET_64BIT && mode == DImode)
26942 *total = (cost->add * 2
26943 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26944 << (GET_MODE (XEXP (x, 0)) != DImode))
26945 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26946 << (GET_MODE (XEXP (x, 1)) != DImode)));
26952 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26954 /* ??? SSE cost should be used here. */
26955 *total = cost->fchs;
26958 else if (X87_FLOAT_MODE_P (mode))
26960 *total = cost->fchs;
26963 else if (FLOAT_MODE_P (mode))
26965 /* ??? SSE vector cost should be used here. */
26966 *total = cost->fchs;
26972 if (!TARGET_64BIT && mode == DImode)
26973 *total = cost->add * 2;
26975 *total = cost->add;
26979 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26980 && XEXP (XEXP (x, 0), 1) == const1_rtx
26981 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26982 && XEXP (x, 1) == const0_rtx)
26984 /* This kind of construct is implemented using test[bwl].
26985 Treat it as if we had an AND. */
26986 *total = (cost->add
26987 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26988 + rtx_cost (const1_rtx, outer_code, speed));
26994 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26999 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27000 /* ??? SSE cost should be used here. */
27001 *total = cost->fabs;
27002 else if (X87_FLOAT_MODE_P (mode))
27003 *total = cost->fabs;
27004 else if (FLOAT_MODE_P (mode))
27005 /* ??? SSE vector cost should be used here. */
27006 *total = cost->fabs;
27010 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
27011 /* ??? SSE cost should be used here. */
27012 *total = cost->fsqrt;
27013 else if (X87_FLOAT_MODE_P (mode))
27014 *total = cost->fsqrt;
27015 else if (FLOAT_MODE_P (mode))
27016 /* ??? SSE vector cost should be used here. */
27017 *total = cost->fsqrt;
27021 if (XINT (x, 1) == UNSPEC_TP)
27028 case VEC_DUPLICATE:
27029 /* ??? Assume all of these vector manipulation patterns are
27030 recognizable. In which case they all pretty much have the
27032 *total = COSTS_N_INSNS (1);
27042 static int current_machopic_label_num;
27044 /* Given a symbol name and its associated stub, write out the
27045 definition of the stub. */
27048 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27050 unsigned int length;
27051 char *binder_name, *symbol_name, lazy_ptr_name[32];
27052 int label = ++current_machopic_label_num;
27054 /* For 64-bit we shouldn't get here. */
27055 gcc_assert (!TARGET_64BIT);
27057 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27058 symb = targetm.strip_name_encoding (symb);
27060 length = strlen (stub);
27061 binder_name = XALLOCAVEC (char, length + 32);
27062 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27064 length = strlen (symb);
27065 symbol_name = XALLOCAVEC (char, length + 32);
27066 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27068 sprintf (lazy_ptr_name, "L%d$lz", label);
27071 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27073 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27075 fprintf (file, "%s:\n", stub);
27076 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27080 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27081 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27082 fprintf (file, "\tjmp\t*%%edx\n");
27085 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27087 fprintf (file, "%s:\n", binder_name);
27091 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27092 fputs ("\tpushl\t%eax\n", file);
27095 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27097 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27099 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27100 fprintf (file, "%s:\n", lazy_ptr_name);
27101 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27102 fprintf (file, ASM_LONG "%s\n", binder_name);
27104 #endif /* TARGET_MACHO */
27106 /* Order the registers for register allocator. */
27109 x86_order_regs_for_local_alloc (void)
27114 /* First allocate the local general purpose registers. */
27115 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27116 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27117 reg_alloc_order [pos++] = i;
27119 /* Global general purpose registers. */
27120 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27121 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27122 reg_alloc_order [pos++] = i;
27124 /* x87 registers come first in case we are doing FP math
27126 if (!TARGET_SSE_MATH)
27127 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27128 reg_alloc_order [pos++] = i;
27130 /* SSE registers. */
27131 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27132 reg_alloc_order [pos++] = i;
27133 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27134 reg_alloc_order [pos++] = i;
27136 /* x87 registers. */
27137 if (TARGET_SSE_MATH)
27138 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27139 reg_alloc_order [pos++] = i;
27141 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27142 reg_alloc_order [pos++] = i;
27144 /* Initialize the rest of array as we do not allocate some registers
27146 while (pos < FIRST_PSEUDO_REGISTER)
27147 reg_alloc_order [pos++] = 0;
27150 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27151 struct attribute_spec.handler. */
27153 ix86_handle_abi_attribute (tree *node, tree name,
27154 tree args ATTRIBUTE_UNUSED,
27155 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27157 if (TREE_CODE (*node) != FUNCTION_TYPE
27158 && TREE_CODE (*node) != METHOD_TYPE
27159 && TREE_CODE (*node) != FIELD_DECL
27160 && TREE_CODE (*node) != TYPE_DECL)
27162 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27164 *no_add_attrs = true;
27169 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27171 *no_add_attrs = true;
27175 /* Can combine regparm with all attributes but fastcall. */
27176 if (is_attribute_p ("ms_abi", name))
27178 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27180 error ("ms_abi and sysv_abi attributes are not compatible");
27185 else if (is_attribute_p ("sysv_abi", name))
27187 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27189 error ("ms_abi and sysv_abi attributes are not compatible");
27198 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27199 struct attribute_spec.handler. */
27201 ix86_handle_struct_attribute (tree *node, tree name,
27202 tree args ATTRIBUTE_UNUSED,
27203 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27206 if (DECL_P (*node))
27208 if (TREE_CODE (*node) == TYPE_DECL)
27209 type = &TREE_TYPE (*node);
27214 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27215 || TREE_CODE (*type) == UNION_TYPE)))
27217 warning (OPT_Wattributes, "%qE attribute ignored",
27219 *no_add_attrs = true;
27222 else if ((is_attribute_p ("ms_struct", name)
27223 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27224 || ((is_attribute_p ("gcc_struct", name)
27225 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27227 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27229 *no_add_attrs = true;
27236 ix86_handle_fndecl_attribute (tree *node, tree name,
27237 tree args ATTRIBUTE_UNUSED,
27238 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27240 if (TREE_CODE (*node) != FUNCTION_DECL)
27242 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27244 *no_add_attrs = true;
27250 ix86_ms_bitfield_layout_p (const_tree record_type)
27252 return ((TARGET_MS_BITFIELD_LAYOUT
27253 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27254 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27257 /* Returns an expression indicating where the this parameter is
27258 located on entry to the FUNCTION. */
27261 x86_this_parameter (tree function)
27263 tree type = TREE_TYPE (function);
27264 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27269 const int *parm_regs;
27271 if (ix86_function_type_abi (type) == MS_ABI)
27272 parm_regs = x86_64_ms_abi_int_parameter_registers;
27274 parm_regs = x86_64_int_parameter_registers;
27275 return gen_rtx_REG (DImode, parm_regs[aggr]);
27278 nregs = ix86_function_regparm (type, function);
27280 if (nregs > 0 && !stdarg_p (type))
27284 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27285 regno = aggr ? DX_REG : CX_REG;
27286 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27290 return gen_rtx_MEM (SImode,
27291 plus_constant (stack_pointer_rtx, 4));
27300 return gen_rtx_MEM (SImode,
27301 plus_constant (stack_pointer_rtx, 4));
27304 return gen_rtx_REG (SImode, regno);
27307 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27310 /* Determine whether x86_output_mi_thunk can succeed. */
27313 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27314 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27315 HOST_WIDE_INT vcall_offset, const_tree function)
27317 /* 64-bit can handle anything. */
27321 /* For 32-bit, everything's fine if we have one free register. */
27322 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27325 /* Need a free register for vcall_offset. */
27329 /* Need a free register for GOT references. */
27330 if (flag_pic && !targetm.binds_local_p (function))
27333 /* Otherwise ok. */
27337 /* Output the assembler code for a thunk function. THUNK_DECL is the
27338 declaration for the thunk function itself, FUNCTION is the decl for
27339 the target function. DELTA is an immediate constant offset to be
27340 added to THIS. If VCALL_OFFSET is nonzero, the word at
27341 *(*this + vcall_offset) should be added to THIS. */
27344 x86_output_mi_thunk (FILE *file,
27345 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27346 HOST_WIDE_INT vcall_offset, tree function)
27349 rtx this_param = x86_this_parameter (function);
27352 /* Make sure unwind info is emitted for the thunk if needed. */
27353 final_start_function (emit_barrier (), file, 1);
27355 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27356 pull it in now and let DELTA benefit. */
27357 if (REG_P (this_param))
27358 this_reg = this_param;
27359 else if (vcall_offset)
27361 /* Put the this parameter into %eax. */
27362 xops[0] = this_param;
27363 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27364 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27367 this_reg = NULL_RTX;
27369 /* Adjust the this parameter by a fixed constant. */
27372 xops[0] = GEN_INT (delta);
27373 xops[1] = this_reg ? this_reg : this_param;
27376 if (!x86_64_general_operand (xops[0], DImode))
27378 tmp = gen_rtx_REG (DImode, R10_REG);
27380 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27382 xops[1] = this_param;
27384 if (x86_maybe_negate_const_int (&xops[0], DImode))
27385 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27387 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27389 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27390 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27392 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27395 /* Adjust the this parameter by a value stored in the vtable. */
27399 tmp = gen_rtx_REG (DImode, R10_REG);
27402 int tmp_regno = CX_REG;
27403 if (lookup_attribute ("fastcall",
27404 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27405 || lookup_attribute ("thiscall",
27406 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27407 tmp_regno = AX_REG;
27408 tmp = gen_rtx_REG (SImode, tmp_regno);
27411 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27413 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27415 /* Adjust the this parameter. */
27416 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27417 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27419 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27420 xops[0] = GEN_INT (vcall_offset);
27422 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27423 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27425 xops[1] = this_reg;
27426 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27429 /* If necessary, drop THIS back to its stack slot. */
27430 if (this_reg && this_reg != this_param)
27432 xops[0] = this_reg;
27433 xops[1] = this_param;
27434 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27437 xops[0] = XEXP (DECL_RTL (function), 0);
27440 if (!flag_pic || targetm.binds_local_p (function))
27441 output_asm_insn ("jmp\t%P0", xops);
27442 /* All thunks should be in the same object as their target,
27443 and thus binds_local_p should be true. */
27444 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27445 gcc_unreachable ();
27448 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27449 tmp = gen_rtx_CONST (Pmode, tmp);
27450 tmp = gen_rtx_MEM (QImode, tmp);
27452 output_asm_insn ("jmp\t%A0", xops);
27457 if (!flag_pic || targetm.binds_local_p (function))
27458 output_asm_insn ("jmp\t%P0", xops);
27463 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27464 if (TARGET_MACHO_BRANCH_ISLANDS)
27465 sym_ref = (gen_rtx_SYMBOL_REF
27467 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27468 tmp = gen_rtx_MEM (QImode, sym_ref);
27470 output_asm_insn ("jmp\t%0", xops);
27473 #endif /* TARGET_MACHO */
27475 tmp = gen_rtx_REG (SImode, CX_REG);
27476 output_set_got (tmp, NULL_RTX);
27479 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27480 output_asm_insn ("jmp\t{*}%1", xops);
27483 final_end_function ();
27487 x86_file_start (void)
27489 default_file_start ();
27491 darwin_file_start ();
27493 if (X86_FILE_START_VERSION_DIRECTIVE)
27494 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27495 if (X86_FILE_START_FLTUSED)
27496 fputs ("\t.global\t__fltused\n", asm_out_file);
27497 if (ix86_asm_dialect == ASM_INTEL)
27498 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27502 x86_field_alignment (tree field, int computed)
27504 enum machine_mode mode;
27505 tree type = TREE_TYPE (field);
27507 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27509 mode = TYPE_MODE (strip_array_types (type));
27510 if (mode == DFmode || mode == DCmode
27511 || GET_MODE_CLASS (mode) == MODE_INT
27512 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27513 return MIN (32, computed);
27517 /* Output assembler code to FILE to increment profiler label # LABELNO
27518 for profiling a function entry. */
27520 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27522 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
27527 #ifndef NO_PROFILE_COUNTERS
27528 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
27531 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27532 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
27534 fprintf (file, "\tcall\t%s\n", mcount_name);
27538 #ifndef NO_PROFILE_COUNTERS
27539 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
27542 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
27546 #ifndef NO_PROFILE_COUNTERS
27547 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
27550 fprintf (file, "\tcall\t%s\n", mcount_name);
27554 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27555 /* We don't have exact information about the insn sizes, but we may assume
27556 quite safely that we are informed about all 1 byte insns and memory
27557 address sizes. This is enough to eliminate unnecessary padding in
27561 min_insn_size (rtx insn)
27565 if (!INSN_P (insn) || !active_insn_p (insn))
27568 /* Discard alignments we've emit and jump instructions. */
27569 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27570 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27572 if (JUMP_TABLE_DATA_P (insn))
27575 /* Important case - calls are always 5 bytes.
27576 It is common to have many calls in the row. */
27578 && symbolic_reference_mentioned_p (PATTERN (insn))
27579 && !SIBLING_CALL_P (insn))
27581 len = get_attr_length (insn);
27585 /* For normal instructions we rely on get_attr_length being exact,
27586 with a few exceptions. */
27587 if (!JUMP_P (insn))
27589 enum attr_type type = get_attr_type (insn);
27594 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27595 || asm_noperands (PATTERN (insn)) >= 0)
27602 /* Otherwise trust get_attr_length. */
27606 l = get_attr_length_address (insn);
27607 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27616 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27620 ix86_avoid_jump_mispredicts (void)
27622 rtx insn, start = get_insns ();
27623 int nbytes = 0, njumps = 0;
27626 /* Look for all minimal intervals of instructions containing 4 jumps.
27627 The intervals are bounded by START and INSN. NBYTES is the total
27628 size of instructions in the interval including INSN and not including
27629 START. When the NBYTES is smaller than 16 bytes, it is possible
27630 that the end of START and INSN ends up in the same 16byte page.
27632 The smallest offset in the page INSN can start is the case where START
27633 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27634 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27636 for (insn = start; insn; insn = NEXT_INSN (insn))
27640 if (LABEL_P (insn))
27642 int align = label_to_alignment (insn);
27643 int max_skip = label_to_max_skip (insn);
27647 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27648 already in the current 16 byte page, because otherwise
27649 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27650 bytes to reach 16 byte boundary. */
27652 || (align <= 3 && max_skip != (1 << align) - 1))
27655 fprintf (dump_file, "Label %i with max_skip %i\n",
27656 INSN_UID (insn), max_skip);
27659 while (nbytes + max_skip >= 16)
27661 start = NEXT_INSN (start);
27662 if ((JUMP_P (start)
27663 && GET_CODE (PATTERN (start)) != ADDR_VEC
27664 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27666 njumps--, isjump = 1;
27669 nbytes -= min_insn_size (start);
27675 min_size = min_insn_size (insn);
27676 nbytes += min_size;
27678 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27679 INSN_UID (insn), min_size);
27681 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27682 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27690 start = NEXT_INSN (start);
27691 if ((JUMP_P (start)
27692 && GET_CODE (PATTERN (start)) != ADDR_VEC
27693 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27695 njumps--, isjump = 1;
27698 nbytes -= min_insn_size (start);
27700 gcc_assert (njumps >= 0);
27702 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27703 INSN_UID (start), INSN_UID (insn), nbytes);
27705 if (njumps == 3 && isjump && nbytes < 16)
27707 int padsize = 15 - nbytes + min_insn_size (insn);
27710 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27711 INSN_UID (insn), padsize);
27712 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27718 /* AMD Athlon works faster
27719 when RET is not destination of conditional jump or directly preceded
27720 by other jump instruction. We avoid the penalty by inserting NOP just
27721 before the RET instructions in such cases. */
27723 ix86_pad_returns (void)
27728 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27730 basic_block bb = e->src;
27731 rtx ret = BB_END (bb);
27733 bool replace = false;
27735 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27736 || optimize_bb_for_size_p (bb))
27738 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27739 if (active_insn_p (prev) || LABEL_P (prev))
27741 if (prev && LABEL_P (prev))
27746 FOR_EACH_EDGE (e, ei, bb->preds)
27747 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27748 && !(e->flags & EDGE_FALLTHRU))
27753 prev = prev_active_insn (ret);
27755 && ((JUMP_P (prev) && any_condjump_p (prev))
27758 /* Empty functions get branch mispredict even when the jump destination
27759 is not visible to us. */
27760 if (!prev && !optimize_function_for_size_p (cfun))
27765 emit_jump_insn_before (gen_return_internal_long (), ret);
27771 /* Implement machine specific optimizations. We implement padding of returns
27772 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27776 if (optimize && optimize_function_for_speed_p (cfun))
27778 if (TARGET_PAD_RETURNS)
27779 ix86_pad_returns ();
27780 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27781 if (TARGET_FOUR_JUMP_LIMIT)
27782 ix86_avoid_jump_mispredicts ();
27787 /* Return nonzero when QImode register that must be represented via REX prefix
27790 x86_extended_QIreg_mentioned_p (rtx insn)
27793 extract_insn_cached (insn);
27794 for (i = 0; i < recog_data.n_operands; i++)
27795 if (REG_P (recog_data.operand[i])
27796 && REGNO (recog_data.operand[i]) > BX_REG)
27801 /* Return nonzero when P points to register encoded via REX prefix.
27802 Called via for_each_rtx. */
27804 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27806 unsigned int regno;
27809 regno = REGNO (*p);
27810 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27813 /* Return true when INSN mentions register that must be encoded using REX
27816 x86_extended_reg_mentioned_p (rtx insn)
27818 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27819 extended_reg_mentioned_1, NULL);
27822 /* If profitable, negate (without causing overflow) integer constant
27823 of mode MODE at location LOC. Return true in this case. */
27825 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
27829 if (!CONST_INT_P (*loc))
27835 /* DImode x86_64 constants must fit in 32 bits. */
27836 gcc_assert (x86_64_immediate_operand (*loc, mode));
27847 gcc_unreachable ();
27850 /* Avoid overflows. */
27851 if (mode_signbit_p (mode, *loc))
27854 val = INTVAL (*loc);
27856 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
27857 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
27858 if ((val < 0 && val != -128)
27861 *loc = GEN_INT (-val);
27868 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27869 optabs would emit if we didn't have TFmode patterns. */
27872 x86_emit_floatuns (rtx operands[2])
27874 rtx neglab, donelab, i0, i1, f0, in, out;
27875 enum machine_mode mode, inmode;
27877 inmode = GET_MODE (operands[1]);
27878 gcc_assert (inmode == SImode || inmode == DImode);
27881 in = force_reg (inmode, operands[1]);
27882 mode = GET_MODE (out);
27883 neglab = gen_label_rtx ();
27884 donelab = gen_label_rtx ();
27885 f0 = gen_reg_rtx (mode);
27887 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27889 expand_float (out, in, 0);
27891 emit_jump_insn (gen_jump (donelab));
27894 emit_label (neglab);
27896 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27898 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27900 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27902 expand_float (f0, i0, 0);
27904 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27906 emit_label (donelab);
27909 /* AVX does not support 32-byte integer vector operations,
27910 thus the longest vector we are faced with is V16QImode. */
27911 #define MAX_VECT_LEN 16
27913 struct expand_vec_perm_d
27915 rtx target, op0, op1;
27916 unsigned char perm[MAX_VECT_LEN];
27917 enum machine_mode vmode;
27918 unsigned char nelt;
27922 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
27923 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
27925 /* Get a vector mode of the same size as the original but with elements
27926 twice as wide. This is only guaranteed to apply to integral vectors. */
27928 static inline enum machine_mode
27929 get_mode_wider_vector (enum machine_mode o)
27931 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
27932 enum machine_mode n = GET_MODE_WIDER_MODE (o);
27933 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
27934 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
27938 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27939 with all elements equal to VAR. Return true if successful. */
27942 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27943 rtx target, rtx val)
27966 /* First attempt to recognize VAL as-is. */
27967 dup = gen_rtx_VEC_DUPLICATE (mode, val);
27968 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
27969 if (recog_memoized (insn) < 0)
27972 /* If that fails, force VAL into a register. */
27975 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
27976 seq = get_insns ();
27979 emit_insn_before (seq, insn);
27981 ok = recog_memoized (insn) >= 0;
27990 if (TARGET_SSE || TARGET_3DNOW_A)
27994 val = gen_lowpart (SImode, val);
27995 x = gen_rtx_TRUNCATE (HImode, val);
27996 x = gen_rtx_VEC_DUPLICATE (mode, x);
27997 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28010 struct expand_vec_perm_d dperm;
28014 memset (&dperm, 0, sizeof (dperm));
28015 dperm.target = target;
28016 dperm.vmode = mode;
28017 dperm.nelt = GET_MODE_NUNITS (mode);
28018 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
28020 /* Extend to SImode using a paradoxical SUBREG. */
28021 tmp1 = gen_reg_rtx (SImode);
28022 emit_move_insn (tmp1, gen_lowpart (SImode, val));
28024 /* Insert the SImode value as low element of a V4SImode vector. */
28025 tmp2 = gen_lowpart (V4SImode, dperm.op0);
28026 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
28028 ok = (expand_vec_perm_1 (&dperm)
28029 || expand_vec_perm_broadcast_1 (&dperm));
28041 /* Replicate the value once into the next wider mode and recurse. */
28043 enum machine_mode smode, wsmode, wvmode;
28046 smode = GET_MODE_INNER (mode);
28047 wvmode = get_mode_wider_vector (mode);
28048 wsmode = GET_MODE_INNER (wvmode);
28050 val = convert_modes (wsmode, smode, val, true);
28051 x = expand_simple_binop (wsmode, ASHIFT, val,
28052 GEN_INT (GET_MODE_BITSIZE (smode)),
28053 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28054 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28056 x = gen_lowpart (wvmode, target);
28057 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28065 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28066 rtx x = gen_reg_rtx (hvmode);
28068 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28071 x = gen_rtx_VEC_CONCAT (mode, x, x);
28072 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28081 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28082 whose ONE_VAR element is VAR, and other elements are zero. Return true
28086 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28087 rtx target, rtx var, int one_var)
28089 enum machine_mode vsimode;
28092 bool use_vector_set = false;
28097 /* For SSE4.1, we normally use vector set. But if the second
28098 element is zero and inter-unit moves are OK, we use movq
28100 use_vector_set = (TARGET_64BIT
28102 && !(TARGET_INTER_UNIT_MOVES
28108 use_vector_set = TARGET_SSE4_1;
28111 use_vector_set = TARGET_SSE2;
28114 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28121 use_vector_set = TARGET_AVX;
28124 /* Use ix86_expand_vector_set in 64bit mode only. */
28125 use_vector_set = TARGET_AVX && TARGET_64BIT;
28131 if (use_vector_set)
28133 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28134 var = force_reg (GET_MODE_INNER (mode), var);
28135 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28151 var = force_reg (GET_MODE_INNER (mode), var);
28152 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28153 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28158 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28159 new_target = gen_reg_rtx (mode);
28161 new_target = target;
28162 var = force_reg (GET_MODE_INNER (mode), var);
28163 x = gen_rtx_VEC_DUPLICATE (mode, var);
28164 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28165 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28168 /* We need to shuffle the value to the correct position, so
28169 create a new pseudo to store the intermediate result. */
28171 /* With SSE2, we can use the integer shuffle insns. */
28172 if (mode != V4SFmode && TARGET_SSE2)
28174 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28176 GEN_INT (one_var == 1 ? 0 : 1),
28177 GEN_INT (one_var == 2 ? 0 : 1),
28178 GEN_INT (one_var == 3 ? 0 : 1)));
28179 if (target != new_target)
28180 emit_move_insn (target, new_target);
28184 /* Otherwise convert the intermediate result to V4SFmode and
28185 use the SSE1 shuffle instructions. */
28186 if (mode != V4SFmode)
28188 tmp = gen_reg_rtx (V4SFmode);
28189 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28194 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28196 GEN_INT (one_var == 1 ? 0 : 1),
28197 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28198 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28200 if (mode != V4SFmode)
28201 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28202 else if (tmp != target)
28203 emit_move_insn (target, tmp);
28205 else if (target != new_target)
28206 emit_move_insn (target, new_target);
28211 vsimode = V4SImode;
28217 vsimode = V2SImode;
28223 /* Zero extend the variable element to SImode and recurse. */
28224 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28226 x = gen_reg_rtx (vsimode);
28227 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28229 gcc_unreachable ();
28231 emit_move_insn (target, gen_lowpart (mode, x));
28239 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28240 consisting of the values in VALS. It is known that all elements
28241 except ONE_VAR are constants. Return true if successful. */
28244 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28245 rtx target, rtx vals, int one_var)
28247 rtx var = XVECEXP (vals, 0, one_var);
28248 enum machine_mode wmode;
28251 const_vec = copy_rtx (vals);
28252 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28253 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28261 /* For the two element vectors, it's just as easy to use
28262 the general case. */
28266 /* Use ix86_expand_vector_set in 64bit mode only. */
28289 /* There's no way to set one QImode entry easily. Combine
28290 the variable value with its adjacent constant value, and
28291 promote to an HImode set. */
28292 x = XVECEXP (vals, 0, one_var ^ 1);
28295 var = convert_modes (HImode, QImode, var, true);
28296 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28297 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28298 x = GEN_INT (INTVAL (x) & 0xff);
28302 var = convert_modes (HImode, QImode, var, true);
28303 x = gen_int_mode (INTVAL (x) << 8, HImode);
28305 if (x != const0_rtx)
28306 var = expand_simple_binop (HImode, IOR, var, x, var,
28307 1, OPTAB_LIB_WIDEN);
28309 x = gen_reg_rtx (wmode);
28310 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28311 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28313 emit_move_insn (target, gen_lowpart (mode, x));
28320 emit_move_insn (target, const_vec);
28321 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28325 /* A subroutine of ix86_expand_vector_init_general. Use vector
28326 concatenate to handle the most general case: all values variable,
28327 and none identical. */
28330 ix86_expand_vector_init_concat (enum machine_mode mode,
28331 rtx target, rtx *ops, int n)
28333 enum machine_mode cmode, hmode = VOIDmode;
28334 rtx first[8], second[4];
28374 gcc_unreachable ();
28377 if (!register_operand (ops[1], cmode))
28378 ops[1] = force_reg (cmode, ops[1]);
28379 if (!register_operand (ops[0], cmode))
28380 ops[0] = force_reg (cmode, ops[0]);
28381 emit_insn (gen_rtx_SET (VOIDmode, target,
28382 gen_rtx_VEC_CONCAT (mode, ops[0],
28402 gcc_unreachable ();
28418 gcc_unreachable ();
28423 /* FIXME: We process inputs backward to help RA. PR 36222. */
28426 for (; i > 0; i -= 2, j--)
28428 first[j] = gen_reg_rtx (cmode);
28429 v = gen_rtvec (2, ops[i - 1], ops[i]);
28430 ix86_expand_vector_init (false, first[j],
28431 gen_rtx_PARALLEL (cmode, v));
28437 gcc_assert (hmode != VOIDmode);
28438 for (i = j = 0; i < n; i += 2, j++)
28440 second[j] = gen_reg_rtx (hmode);
28441 ix86_expand_vector_init_concat (hmode, second [j],
28445 ix86_expand_vector_init_concat (mode, target, second, n);
28448 ix86_expand_vector_init_concat (mode, target, first, n);
28452 gcc_unreachable ();
28456 /* A subroutine of ix86_expand_vector_init_general. Use vector
28457 interleave to handle the most general case: all values variable,
28458 and none identical. */
28461 ix86_expand_vector_init_interleave (enum machine_mode mode,
28462 rtx target, rtx *ops, int n)
28464 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28467 rtx (*gen_load_even) (rtx, rtx, rtx);
28468 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28469 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28474 gen_load_even = gen_vec_setv8hi;
28475 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28476 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28477 inner_mode = HImode;
28478 first_imode = V4SImode;
28479 second_imode = V2DImode;
28480 third_imode = VOIDmode;
28483 gen_load_even = gen_vec_setv16qi;
28484 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28485 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28486 inner_mode = QImode;
28487 first_imode = V8HImode;
28488 second_imode = V4SImode;
28489 third_imode = V2DImode;
28492 gcc_unreachable ();
28495 for (i = 0; i < n; i++)
28497 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28498 op0 = gen_reg_rtx (SImode);
28499 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28501 /* Insert the SImode value as low element of V4SImode vector. */
28502 op1 = gen_reg_rtx (V4SImode);
28503 op0 = gen_rtx_VEC_MERGE (V4SImode,
28504 gen_rtx_VEC_DUPLICATE (V4SImode,
28506 CONST0_RTX (V4SImode),
28508 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28510 /* Cast the V4SImode vector back to a vector in orignal mode. */
28511 op0 = gen_reg_rtx (mode);
28512 emit_move_insn (op0, gen_lowpart (mode, op1));
28514 /* Load even elements into the second positon. */
28515 emit_insn (gen_load_even (op0,
28516 force_reg (inner_mode,
28520 /* Cast vector to FIRST_IMODE vector. */
28521 ops[i] = gen_reg_rtx (first_imode);
28522 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28525 /* Interleave low FIRST_IMODE vectors. */
28526 for (i = j = 0; i < n; i += 2, j++)
28528 op0 = gen_reg_rtx (first_imode);
28529 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
28531 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28532 ops[j] = gen_reg_rtx (second_imode);
28533 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28536 /* Interleave low SECOND_IMODE vectors. */
28537 switch (second_imode)
28540 for (i = j = 0; i < n / 2; i += 2, j++)
28542 op0 = gen_reg_rtx (second_imode);
28543 emit_insn (gen_interleave_second_low (op0, ops[i],
28546 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28548 ops[j] = gen_reg_rtx (third_imode);
28549 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28551 second_imode = V2DImode;
28552 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28556 op0 = gen_reg_rtx (second_imode);
28557 emit_insn (gen_interleave_second_low (op0, ops[0],
28560 /* Cast the SECOND_IMODE vector back to a vector on original
28562 emit_insn (gen_rtx_SET (VOIDmode, target,
28563 gen_lowpart (mode, op0)));
28567 gcc_unreachable ();
28571 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28572 all values variable, and none identical. */
28575 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28576 rtx target, rtx vals)
28578 rtx ops[32], op0, op1;
28579 enum machine_mode half_mode = VOIDmode;
28586 if (!mmx_ok && !TARGET_SSE)
28598 n = GET_MODE_NUNITS (mode);
28599 for (i = 0; i < n; i++)
28600 ops[i] = XVECEXP (vals, 0, i);
28601 ix86_expand_vector_init_concat (mode, target, ops, n);
28605 half_mode = V16QImode;
28609 half_mode = V8HImode;
28613 n = GET_MODE_NUNITS (mode);
28614 for (i = 0; i < n; i++)
28615 ops[i] = XVECEXP (vals, 0, i);
28616 op0 = gen_reg_rtx (half_mode);
28617 op1 = gen_reg_rtx (half_mode);
28618 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28620 ix86_expand_vector_init_interleave (half_mode, op1,
28621 &ops [n >> 1], n >> 2);
28622 emit_insn (gen_rtx_SET (VOIDmode, target,
28623 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28627 if (!TARGET_SSE4_1)
28635 /* Don't use ix86_expand_vector_init_interleave if we can't
28636 move from GPR to SSE register directly. */
28637 if (!TARGET_INTER_UNIT_MOVES)
28640 n = GET_MODE_NUNITS (mode);
28641 for (i = 0; i < n; i++)
28642 ops[i] = XVECEXP (vals, 0, i);
28643 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28651 gcc_unreachable ();
28655 int i, j, n_elts, n_words, n_elt_per_word;
28656 enum machine_mode inner_mode;
28657 rtx words[4], shift;
28659 inner_mode = GET_MODE_INNER (mode);
28660 n_elts = GET_MODE_NUNITS (mode);
28661 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28662 n_elt_per_word = n_elts / n_words;
28663 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28665 for (i = 0; i < n_words; ++i)
28667 rtx word = NULL_RTX;
28669 for (j = 0; j < n_elt_per_word; ++j)
28671 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28672 elt = convert_modes (word_mode, inner_mode, elt, true);
28678 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28679 word, 1, OPTAB_LIB_WIDEN);
28680 word = expand_simple_binop (word_mode, IOR, word, elt,
28681 word, 1, OPTAB_LIB_WIDEN);
28689 emit_move_insn (target, gen_lowpart (mode, words[0]));
28690 else if (n_words == 2)
28692 rtx tmp = gen_reg_rtx (mode);
28693 emit_clobber (tmp);
28694 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28695 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28696 emit_move_insn (target, tmp);
28698 else if (n_words == 4)
28700 rtx tmp = gen_reg_rtx (V4SImode);
28701 gcc_assert (word_mode == SImode);
28702 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28703 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28704 emit_move_insn (target, gen_lowpart (mode, tmp));
28707 gcc_unreachable ();
28711 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28712 instructions unless MMX_OK is true. */
28715 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28717 enum machine_mode mode = GET_MODE (target);
28718 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28719 int n_elts = GET_MODE_NUNITS (mode);
28720 int n_var = 0, one_var = -1;
28721 bool all_same = true, all_const_zero = true;
28725 for (i = 0; i < n_elts; ++i)
28727 x = XVECEXP (vals, 0, i);
28728 if (!(CONST_INT_P (x)
28729 || GET_CODE (x) == CONST_DOUBLE
28730 || GET_CODE (x) == CONST_FIXED))
28731 n_var++, one_var = i;
28732 else if (x != CONST0_RTX (inner_mode))
28733 all_const_zero = false;
28734 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28738 /* Constants are best loaded from the constant pool. */
28741 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28745 /* If all values are identical, broadcast the value. */
28747 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28748 XVECEXP (vals, 0, 0)))
28751 /* Values where only one field is non-constant are best loaded from
28752 the pool and overwritten via move later. */
28756 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28757 XVECEXP (vals, 0, one_var),
28761 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28765 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28769 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28771 enum machine_mode mode = GET_MODE (target);
28772 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28773 enum machine_mode half_mode;
28774 bool use_vec_merge = false;
28776 static rtx (*gen_extract[6][2]) (rtx, rtx)
28778 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28779 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28780 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28781 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28782 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28783 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28785 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28787 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28788 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28789 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28790 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28791 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28792 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28802 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28803 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28805 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28807 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28808 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28814 use_vec_merge = TARGET_SSE4_1;
28822 /* For the two element vectors, we implement a VEC_CONCAT with
28823 the extraction of the other element. */
28825 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28826 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28829 op0 = val, op1 = tmp;
28831 op0 = tmp, op1 = val;
28833 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28834 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28839 use_vec_merge = TARGET_SSE4_1;
28846 use_vec_merge = true;
28850 /* tmp = target = A B C D */
28851 tmp = copy_to_reg (target);
28852 /* target = A A B B */
28853 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
28854 /* target = X A B B */
28855 ix86_expand_vector_set (false, target, val, 0);
28856 /* target = A X C D */
28857 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28858 const1_rtx, const0_rtx,
28859 GEN_INT (2+4), GEN_INT (3+4)));
28863 /* tmp = target = A B C D */
28864 tmp = copy_to_reg (target);
28865 /* tmp = X B C D */
28866 ix86_expand_vector_set (false, tmp, val, 0);
28867 /* target = A B X D */
28868 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28869 const0_rtx, const1_rtx,
28870 GEN_INT (0+4), GEN_INT (3+4)));
28874 /* tmp = target = A B C D */
28875 tmp = copy_to_reg (target);
28876 /* tmp = X B C D */
28877 ix86_expand_vector_set (false, tmp, val, 0);
28878 /* target = A B X D */
28879 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28880 const0_rtx, const1_rtx,
28881 GEN_INT (2+4), GEN_INT (0+4)));
28885 gcc_unreachable ();
28890 use_vec_merge = TARGET_SSE4_1;
28894 /* Element 0 handled by vec_merge below. */
28897 use_vec_merge = true;
28903 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28904 store into element 0, then shuffle them back. */
28908 order[0] = GEN_INT (elt);
28909 order[1] = const1_rtx;
28910 order[2] = const2_rtx;
28911 order[3] = GEN_INT (3);
28912 order[elt] = const0_rtx;
28914 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28915 order[1], order[2], order[3]));
28917 ix86_expand_vector_set (false, target, val, 0);
28919 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28920 order[1], order[2], order[3]));
28924 /* For SSE1, we have to reuse the V4SF code. */
28925 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28926 gen_lowpart (SFmode, val), elt);
28931 use_vec_merge = TARGET_SSE2;
28934 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28938 use_vec_merge = TARGET_SSE4_1;
28945 half_mode = V16QImode;
28951 half_mode = V8HImode;
28957 half_mode = V4SImode;
28963 half_mode = V2DImode;
28969 half_mode = V4SFmode;
28975 half_mode = V2DFmode;
28981 /* Compute offset. */
28985 gcc_assert (i <= 1);
28987 /* Extract the half. */
28988 tmp = gen_reg_rtx (half_mode);
28989 emit_insn (gen_extract[j][i] (tmp, target));
28991 /* Put val in tmp at elt. */
28992 ix86_expand_vector_set (false, tmp, val, elt);
28995 emit_insn (gen_insert[j][i] (target, target, tmp));
29004 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
29005 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
29006 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29010 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29012 emit_move_insn (mem, target);
29014 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29015 emit_move_insn (tmp, val);
29017 emit_move_insn (target, mem);
29022 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
29024 enum machine_mode mode = GET_MODE (vec);
29025 enum machine_mode inner_mode = GET_MODE_INNER (mode);
29026 bool use_vec_extr = false;
29039 use_vec_extr = true;
29043 use_vec_extr = TARGET_SSE4_1;
29055 tmp = gen_reg_rtx (mode);
29056 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29057 GEN_INT (elt), GEN_INT (elt),
29058 GEN_INT (elt+4), GEN_INT (elt+4)));
29062 tmp = gen_reg_rtx (mode);
29063 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29067 gcc_unreachable ();
29070 use_vec_extr = true;
29075 use_vec_extr = TARGET_SSE4_1;
29089 tmp = gen_reg_rtx (mode);
29090 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29091 GEN_INT (elt), GEN_INT (elt),
29092 GEN_INT (elt), GEN_INT (elt)));
29096 tmp = gen_reg_rtx (mode);
29097 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29101 gcc_unreachable ();
29104 use_vec_extr = true;
29109 /* For SSE1, we have to reuse the V4SF code. */
29110 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29111 gen_lowpart (V4SFmode, vec), elt);
29117 use_vec_extr = TARGET_SSE2;
29120 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29124 use_vec_extr = TARGET_SSE4_1;
29128 /* ??? Could extract the appropriate HImode element and shift. */
29135 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29136 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29138 /* Let the rtl optimizers know about the zero extension performed. */
29139 if (inner_mode == QImode || inner_mode == HImode)
29141 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29142 target = gen_lowpart (SImode, target);
29145 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29149 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29151 emit_move_insn (mem, vec);
29153 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29154 emit_move_insn (target, tmp);
29158 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29159 pattern to reduce; DEST is the destination; IN is the input vector. */
29162 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29164 rtx tmp1, tmp2, tmp3;
29166 tmp1 = gen_reg_rtx (V4SFmode);
29167 tmp2 = gen_reg_rtx (V4SFmode);
29168 tmp3 = gen_reg_rtx (V4SFmode);
29170 emit_insn (gen_sse_movhlps (tmp1, in, in));
29171 emit_insn (fn (tmp2, tmp1, in));
29173 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29174 const1_rtx, const1_rtx,
29175 GEN_INT (1+4), GEN_INT (1+4)));
29176 emit_insn (fn (dest, tmp2, tmp3));
29179 /* Target hook for scalar_mode_supported_p. */
29181 ix86_scalar_mode_supported_p (enum machine_mode mode)
29183 if (DECIMAL_FLOAT_MODE_P (mode))
29184 return default_decimal_float_supported_p ();
29185 else if (mode == TFmode)
29188 return default_scalar_mode_supported_p (mode);
29191 /* Implements target hook vector_mode_supported_p. */
29193 ix86_vector_mode_supported_p (enum machine_mode mode)
29195 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29197 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29199 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29201 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29203 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29208 /* Target hook for c_mode_for_suffix. */
29209 static enum machine_mode
29210 ix86_c_mode_for_suffix (char suffix)
29220 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29222 We do this in the new i386 backend to maintain source compatibility
29223 with the old cc0-based compiler. */
29226 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29227 tree inputs ATTRIBUTE_UNUSED,
29230 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29232 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29237 /* Implements target vector targetm.asm.encode_section_info. This
29238 is not used by netware. */
29240 static void ATTRIBUTE_UNUSED
29241 ix86_encode_section_info (tree decl, rtx rtl, int first)
29243 default_encode_section_info (decl, rtl, first);
29245 if (TREE_CODE (decl) == VAR_DECL
29246 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29247 && ix86_in_large_data_p (decl))
29248 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29251 /* Worker function for REVERSE_CONDITION. */
29254 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29256 return (mode != CCFPmode && mode != CCFPUmode
29257 ? reverse_condition (code)
29258 : reverse_condition_maybe_unordered (code));
29261 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29265 output_387_reg_move (rtx insn, rtx *operands)
29267 if (REG_P (operands[0]))
29269 if (REG_P (operands[1])
29270 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29272 if (REGNO (operands[0]) == FIRST_STACK_REG)
29273 return output_387_ffreep (operands, 0);
29274 return "fstp\t%y0";
29276 if (STACK_TOP_P (operands[0]))
29277 return "fld%Z1\t%y1";
29280 else if (MEM_P (operands[0]))
29282 gcc_assert (REG_P (operands[1]));
29283 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29284 return "fstp%Z0\t%y0";
29287 /* There is no non-popping store to memory for XFmode.
29288 So if we need one, follow the store with a load. */
29289 if (GET_MODE (operands[0]) == XFmode)
29290 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29292 return "fst%Z0\t%y0";
29299 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29300 FP status register is set. */
29303 ix86_emit_fp_unordered_jump (rtx label)
29305 rtx reg = gen_reg_rtx (HImode);
29308 emit_insn (gen_x86_fnstsw_1 (reg));
29310 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29312 emit_insn (gen_x86_sahf_1 (reg));
29314 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29315 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29319 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29321 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29322 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29325 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29326 gen_rtx_LABEL_REF (VOIDmode, label),
29328 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29330 emit_jump_insn (temp);
29331 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29334 /* Output code to perform a log1p XFmode calculation. */
29336 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29338 rtx label1 = gen_label_rtx ();
29339 rtx label2 = gen_label_rtx ();
29341 rtx tmp = gen_reg_rtx (XFmode);
29342 rtx tmp2 = gen_reg_rtx (XFmode);
29345 emit_insn (gen_absxf2 (tmp, op1));
29346 test = gen_rtx_GE (VOIDmode, tmp,
29347 CONST_DOUBLE_FROM_REAL_VALUE (
29348 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29350 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29352 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29353 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29354 emit_jump (label2);
29356 emit_label (label1);
29357 emit_move_insn (tmp, CONST1_RTX (XFmode));
29358 emit_insn (gen_addxf3 (tmp, op1, tmp));
29359 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29360 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29362 emit_label (label2);
29365 /* Output code to perform a Newton-Rhapson approximation of a single precision
29366 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29368 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29370 rtx x0, x1, e0, e1, two;
29372 x0 = gen_reg_rtx (mode);
29373 e0 = gen_reg_rtx (mode);
29374 e1 = gen_reg_rtx (mode);
29375 x1 = gen_reg_rtx (mode);
29377 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29379 if (VECTOR_MODE_P (mode))
29380 two = ix86_build_const_vector (SFmode, true, two);
29382 two = force_reg (mode, two);
29384 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29386 /* x0 = rcp(b) estimate */
29387 emit_insn (gen_rtx_SET (VOIDmode, x0,
29388 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29391 emit_insn (gen_rtx_SET (VOIDmode, e0,
29392 gen_rtx_MULT (mode, x0, a)));
29394 emit_insn (gen_rtx_SET (VOIDmode, e1,
29395 gen_rtx_MULT (mode, x0, b)));
29397 emit_insn (gen_rtx_SET (VOIDmode, x1,
29398 gen_rtx_MINUS (mode, two, e1)));
29399 /* res = e0 * x1 */
29400 emit_insn (gen_rtx_SET (VOIDmode, res,
29401 gen_rtx_MULT (mode, e0, x1)));
29404 /* Output code to perform a Newton-Rhapson approximation of a
29405 single precision floating point [reciprocal] square root. */
29407 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29410 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29413 x0 = gen_reg_rtx (mode);
29414 e0 = gen_reg_rtx (mode);
29415 e1 = gen_reg_rtx (mode);
29416 e2 = gen_reg_rtx (mode);
29417 e3 = gen_reg_rtx (mode);
29419 real_from_integer (&r, VOIDmode, -3, -1, 0);
29420 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29422 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29423 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29425 if (VECTOR_MODE_P (mode))
29427 mthree = ix86_build_const_vector (SFmode, true, mthree);
29428 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29431 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29432 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29434 /* x0 = rsqrt(a) estimate */
29435 emit_insn (gen_rtx_SET (VOIDmode, x0,
29436 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29439 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29444 zero = gen_reg_rtx (mode);
29445 mask = gen_reg_rtx (mode);
29447 zero = force_reg (mode, CONST0_RTX(mode));
29448 emit_insn (gen_rtx_SET (VOIDmode, mask,
29449 gen_rtx_NE (mode, zero, a)));
29451 emit_insn (gen_rtx_SET (VOIDmode, x0,
29452 gen_rtx_AND (mode, x0, mask)));
29456 emit_insn (gen_rtx_SET (VOIDmode, e0,
29457 gen_rtx_MULT (mode, x0, a)));
29459 emit_insn (gen_rtx_SET (VOIDmode, e1,
29460 gen_rtx_MULT (mode, e0, x0)));
29463 mthree = force_reg (mode, mthree);
29464 emit_insn (gen_rtx_SET (VOIDmode, e2,
29465 gen_rtx_PLUS (mode, e1, mthree)));
29467 mhalf = force_reg (mode, mhalf);
29469 /* e3 = -.5 * x0 */
29470 emit_insn (gen_rtx_SET (VOIDmode, e3,
29471 gen_rtx_MULT (mode, x0, mhalf)));
29473 /* e3 = -.5 * e0 */
29474 emit_insn (gen_rtx_SET (VOIDmode, e3,
29475 gen_rtx_MULT (mode, e0, mhalf)));
29476 /* ret = e2 * e3 */
29477 emit_insn (gen_rtx_SET (VOIDmode, res,
29478 gen_rtx_MULT (mode, e2, e3)));
29481 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29483 static void ATTRIBUTE_UNUSED
29484 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29487 /* With Binutils 2.15, the "@unwind" marker must be specified on
29488 every occurrence of the ".eh_frame" section, not just the first
29491 && strcmp (name, ".eh_frame") == 0)
29493 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29494 flags & SECTION_WRITE ? "aw" : "a");
29497 default_elf_asm_named_section (name, flags, decl);
29500 /* Return the mangling of TYPE if it is an extended fundamental type. */
29502 static const char *
29503 ix86_mangle_type (const_tree type)
29505 type = TYPE_MAIN_VARIANT (type);
29507 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29508 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29511 switch (TYPE_MODE (type))
29514 /* __float128 is "g". */
29517 /* "long double" or __float80 is "e". */
29524 /* For 32-bit code we can save PIC register setup by using
29525 __stack_chk_fail_local hidden function instead of calling
29526 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29527 register, so it is better to call __stack_chk_fail directly. */
29530 ix86_stack_protect_fail (void)
29532 return TARGET_64BIT
29533 ? default_external_stack_protect_fail ()
29534 : default_hidden_stack_protect_fail ();
29537 /* Select a format to encode pointers in exception handling data. CODE
29538 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29539 true if the symbol may be affected by dynamic relocations.
29541 ??? All x86 object file formats are capable of representing this.
29542 After all, the relocation needed is the same as for the call insn.
29543 Whether or not a particular assembler allows us to enter such, I
29544 guess we'll have to see. */
29546 asm_preferred_eh_data_format (int code, int global)
29550 int type = DW_EH_PE_sdata8;
29552 || ix86_cmodel == CM_SMALL_PIC
29553 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29554 type = DW_EH_PE_sdata4;
29555 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29557 if (ix86_cmodel == CM_SMALL
29558 || (ix86_cmodel == CM_MEDIUM && code))
29559 return DW_EH_PE_udata4;
29560 return DW_EH_PE_absptr;
29563 /* Expand copysign from SIGN to the positive value ABS_VALUE
29564 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29567 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29569 enum machine_mode mode = GET_MODE (sign);
29570 rtx sgn = gen_reg_rtx (mode);
29571 if (mask == NULL_RTX)
29573 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29574 if (!VECTOR_MODE_P (mode))
29576 /* We need to generate a scalar mode mask in this case. */
29577 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29578 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29579 mask = gen_reg_rtx (mode);
29580 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29584 mask = gen_rtx_NOT (mode, mask);
29585 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29586 gen_rtx_AND (mode, mask, sign)));
29587 emit_insn (gen_rtx_SET (VOIDmode, result,
29588 gen_rtx_IOR (mode, abs_value, sgn)));
29591 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29592 mask for masking out the sign-bit is stored in *SMASK, if that is
29595 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29597 enum machine_mode mode = GET_MODE (op0);
29600 xa = gen_reg_rtx (mode);
29601 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29602 if (!VECTOR_MODE_P (mode))
29604 /* We need to generate a scalar mode mask in this case. */
29605 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29606 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29607 mask = gen_reg_rtx (mode);
29608 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29610 emit_insn (gen_rtx_SET (VOIDmode, xa,
29611 gen_rtx_AND (mode, op0, mask)));
29619 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29620 swapping the operands if SWAP_OPERANDS is true. The expanded
29621 code is a forward jump to a newly created label in case the
29622 comparison is true. The generated label rtx is returned. */
29624 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29625 bool swap_operands)
29636 label = gen_label_rtx ();
29637 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29638 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29639 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29640 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29641 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29642 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29643 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29644 JUMP_LABEL (tmp) = label;
29649 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29650 using comparison code CODE. Operands are swapped for the comparison if
29651 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29653 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29654 bool swap_operands)
29656 enum machine_mode mode = GET_MODE (op0);
29657 rtx mask = gen_reg_rtx (mode);
29666 if (mode == DFmode)
29667 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29668 gen_rtx_fmt_ee (code, mode, op0, op1)));
29670 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29671 gen_rtx_fmt_ee (code, mode, op0, op1)));
29676 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29677 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29679 ix86_gen_TWO52 (enum machine_mode mode)
29681 REAL_VALUE_TYPE TWO52r;
29684 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29685 TWO52 = const_double_from_real_value (TWO52r, mode);
29686 TWO52 = force_reg (mode, TWO52);
29691 /* Expand SSE sequence for computing lround from OP1 storing
29694 ix86_expand_lround (rtx op0, rtx op1)
29696 /* C code for the stuff we're doing below:
29697 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29700 enum machine_mode mode = GET_MODE (op1);
29701 const struct real_format *fmt;
29702 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29705 /* load nextafter (0.5, 0.0) */
29706 fmt = REAL_MODE_FORMAT (mode);
29707 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29708 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29710 /* adj = copysign (0.5, op1) */
29711 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29712 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29714 /* adj = op1 + adj */
29715 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29717 /* op0 = (imode)adj */
29718 expand_fix (op0, adj, 0);
29721 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29724 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29726 /* C code for the stuff we're doing below (for do_floor):
29728 xi -= (double)xi > op1 ? 1 : 0;
29731 enum machine_mode fmode = GET_MODE (op1);
29732 enum machine_mode imode = GET_MODE (op0);
29733 rtx ireg, freg, label, tmp;
29735 /* reg = (long)op1 */
29736 ireg = gen_reg_rtx (imode);
29737 expand_fix (ireg, op1, 0);
29739 /* freg = (double)reg */
29740 freg = gen_reg_rtx (fmode);
29741 expand_float (freg, ireg, 0);
29743 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29744 label = ix86_expand_sse_compare_and_jump (UNLE,
29745 freg, op1, !do_floor);
29746 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29747 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29748 emit_move_insn (ireg, tmp);
29750 emit_label (label);
29751 LABEL_NUSES (label) = 1;
29753 emit_move_insn (op0, ireg);
29756 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29757 result in OPERAND0. */
29759 ix86_expand_rint (rtx operand0, rtx operand1)
29761 /* C code for the stuff we're doing below:
29762 xa = fabs (operand1);
29763 if (!isless (xa, 2**52))
29765 xa = xa + 2**52 - 2**52;
29766 return copysign (xa, operand1);
29768 enum machine_mode mode = GET_MODE (operand0);
29769 rtx res, xa, label, TWO52, mask;
29771 res = gen_reg_rtx (mode);
29772 emit_move_insn (res, operand1);
29774 /* xa = abs (operand1) */
29775 xa = ix86_expand_sse_fabs (res, &mask);
29777 /* if (!isless (xa, TWO52)) goto label; */
29778 TWO52 = ix86_gen_TWO52 (mode);
29779 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29781 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29782 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29784 ix86_sse_copysign_to_positive (res, xa, res, mask);
29786 emit_label (label);
29787 LABEL_NUSES (label) = 1;
29789 emit_move_insn (operand0, res);
29792 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29795 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29797 /* C code for the stuff we expand below.
29798 double xa = fabs (x), x2;
29799 if (!isless (xa, TWO52))
29801 xa = xa + TWO52 - TWO52;
29802 x2 = copysign (xa, x);
29811 enum machine_mode mode = GET_MODE (operand0);
29812 rtx xa, TWO52, tmp, label, one, res, mask;
29814 TWO52 = ix86_gen_TWO52 (mode);
29816 /* Temporary for holding the result, initialized to the input
29817 operand to ease control flow. */
29818 res = gen_reg_rtx (mode);
29819 emit_move_insn (res, operand1);
29821 /* xa = abs (operand1) */
29822 xa = ix86_expand_sse_fabs (res, &mask);
29824 /* if (!isless (xa, TWO52)) goto label; */
29825 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29827 /* xa = xa + TWO52 - TWO52; */
29828 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29829 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29831 /* xa = copysign (xa, operand1) */
29832 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29834 /* generate 1.0 or -1.0 */
29835 one = force_reg (mode,
29836 const_double_from_real_value (do_floor
29837 ? dconst1 : dconstm1, mode));
29839 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29840 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29841 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29842 gen_rtx_AND (mode, one, tmp)));
29843 /* We always need to subtract here to preserve signed zero. */
29844 tmp = expand_simple_binop (mode, MINUS,
29845 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29846 emit_move_insn (res, tmp);
29848 emit_label (label);
29849 LABEL_NUSES (label) = 1;
29851 emit_move_insn (operand0, res);
29854 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29857 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29859 /* C code for the stuff we expand below.
29860 double xa = fabs (x), x2;
29861 if (!isless (xa, TWO52))
29863 x2 = (double)(long)x;
29870 if (HONOR_SIGNED_ZEROS (mode))
29871 return copysign (x2, x);
29874 enum machine_mode mode = GET_MODE (operand0);
29875 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29877 TWO52 = ix86_gen_TWO52 (mode);
29879 /* Temporary for holding the result, initialized to the input
29880 operand to ease control flow. */
29881 res = gen_reg_rtx (mode);
29882 emit_move_insn (res, operand1);
29884 /* xa = abs (operand1) */
29885 xa = ix86_expand_sse_fabs (res, &mask);
29887 /* if (!isless (xa, TWO52)) goto label; */
29888 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29890 /* xa = (double)(long)x */
29891 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29892 expand_fix (xi, res, 0);
29893 expand_float (xa, xi, 0);
29896 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29898 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29899 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29900 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29901 gen_rtx_AND (mode, one, tmp)));
29902 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29903 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29904 emit_move_insn (res, tmp);
29906 if (HONOR_SIGNED_ZEROS (mode))
29907 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29909 emit_label (label);
29910 LABEL_NUSES (label) = 1;
29912 emit_move_insn (operand0, res);
29915 /* Expand SSE sequence for computing round from OPERAND1 storing
29916 into OPERAND0. Sequence that works without relying on DImode truncation
29917 via cvttsd2siq that is only available on 64bit targets. */
29919 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29921 /* C code for the stuff we expand below.
29922 double xa = fabs (x), xa2, x2;
29923 if (!isless (xa, TWO52))
29925 Using the absolute value and copying back sign makes
29926 -0.0 -> -0.0 correct.
29927 xa2 = xa + TWO52 - TWO52;
29932 else if (dxa > 0.5)
29934 x2 = copysign (xa2, x);
29937 enum machine_mode mode = GET_MODE (operand0);
29938 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29940 TWO52 = ix86_gen_TWO52 (mode);
29942 /* Temporary for holding the result, initialized to the input
29943 operand to ease control flow. */
29944 res = gen_reg_rtx (mode);
29945 emit_move_insn (res, operand1);
29947 /* xa = abs (operand1) */
29948 xa = ix86_expand_sse_fabs (res, &mask);
29950 /* if (!isless (xa, TWO52)) goto label; */
29951 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29953 /* xa2 = xa + TWO52 - TWO52; */
29954 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29955 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29957 /* dxa = xa2 - xa; */
29958 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29960 /* generate 0.5, 1.0 and -0.5 */
29961 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29962 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29963 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29967 tmp = gen_reg_rtx (mode);
29968 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29969 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29970 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29971 gen_rtx_AND (mode, one, tmp)));
29972 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29973 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29974 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29975 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29976 gen_rtx_AND (mode, one, tmp)));
29977 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29979 /* res = copysign (xa2, operand1) */
29980 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29982 emit_label (label);
29983 LABEL_NUSES (label) = 1;
29985 emit_move_insn (operand0, res);
29988 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29991 ix86_expand_trunc (rtx operand0, rtx operand1)
29993 /* C code for SSE variant we expand below.
29994 double xa = fabs (x), x2;
29995 if (!isless (xa, TWO52))
29997 x2 = (double)(long)x;
29998 if (HONOR_SIGNED_ZEROS (mode))
29999 return copysign (x2, x);
30002 enum machine_mode mode = GET_MODE (operand0);
30003 rtx xa, xi, TWO52, label, res, mask;
30005 TWO52 = ix86_gen_TWO52 (mode);
30007 /* Temporary for holding the result, initialized to the input
30008 operand to ease control flow. */
30009 res = gen_reg_rtx (mode);
30010 emit_move_insn (res, operand1);
30012 /* xa = abs (operand1) */
30013 xa = ix86_expand_sse_fabs (res, &mask);
30015 /* if (!isless (xa, TWO52)) goto label; */
30016 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30018 /* x = (double)(long)x */
30019 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30020 expand_fix (xi, res, 0);
30021 expand_float (res, xi, 0);
30023 if (HONOR_SIGNED_ZEROS (mode))
30024 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
30026 emit_label (label);
30027 LABEL_NUSES (label) = 1;
30029 emit_move_insn (operand0, res);
30032 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30035 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
30037 enum machine_mode mode = GET_MODE (operand0);
30038 rtx xa, mask, TWO52, label, one, res, smask, tmp;
30040 /* C code for SSE variant we expand below.
30041 double xa = fabs (x), x2;
30042 if (!isless (xa, TWO52))
30044 xa2 = xa + TWO52 - TWO52;
30048 x2 = copysign (xa2, x);
30052 TWO52 = ix86_gen_TWO52 (mode);
30054 /* Temporary for holding the result, initialized to the input
30055 operand to ease control flow. */
30056 res = gen_reg_rtx (mode);
30057 emit_move_insn (res, operand1);
30059 /* xa = abs (operand1) */
30060 xa = ix86_expand_sse_fabs (res, &smask);
30062 /* if (!isless (xa, TWO52)) goto label; */
30063 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30065 /* res = xa + TWO52 - TWO52; */
30066 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30067 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30068 emit_move_insn (res, tmp);
30071 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30073 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30074 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30075 emit_insn (gen_rtx_SET (VOIDmode, mask,
30076 gen_rtx_AND (mode, mask, one)));
30077 tmp = expand_simple_binop (mode, MINUS,
30078 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30079 emit_move_insn (res, tmp);
30081 /* res = copysign (res, operand1) */
30082 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30084 emit_label (label);
30085 LABEL_NUSES (label) = 1;
30087 emit_move_insn (operand0, res);
30090 /* Expand SSE sequence for computing round from OPERAND1 storing
30093 ix86_expand_round (rtx operand0, rtx operand1)
30095 /* C code for the stuff we're doing below:
30096 double xa = fabs (x);
30097 if (!isless (xa, TWO52))
30099 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30100 return copysign (xa, x);
30102 enum machine_mode mode = GET_MODE (operand0);
30103 rtx res, TWO52, xa, label, xi, half, mask;
30104 const struct real_format *fmt;
30105 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30107 /* Temporary for holding the result, initialized to the input
30108 operand to ease control flow. */
30109 res = gen_reg_rtx (mode);
30110 emit_move_insn (res, operand1);
30112 TWO52 = ix86_gen_TWO52 (mode);
30113 xa = ix86_expand_sse_fabs (res, &mask);
30114 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30116 /* load nextafter (0.5, 0.0) */
30117 fmt = REAL_MODE_FORMAT (mode);
30118 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30119 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30121 /* xa = xa + 0.5 */
30122 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30123 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30125 /* xa = (double)(int64_t)xa */
30126 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30127 expand_fix (xi, xa, 0);
30128 expand_float (xa, xi, 0);
30130 /* res = copysign (xa, operand1) */
30131 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30133 emit_label (label);
30134 LABEL_NUSES (label) = 1;
30136 emit_move_insn (operand0, res);
30140 /* Table of valid machine attributes. */
30141 static const struct attribute_spec ix86_attribute_table[] =
30143 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30144 /* Stdcall attribute says callee is responsible for popping arguments
30145 if they are not variable. */
30146 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30147 /* Fastcall attribute says callee is responsible for popping arguments
30148 if they are not variable. */
30149 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30150 /* Thiscall attribute says callee is responsible for popping arguments
30151 if they are not variable. */
30152 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30153 /* Cdecl attribute says the callee is a normal C declaration */
30154 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30155 /* Regparm attribute specifies how many integer arguments are to be
30156 passed in registers. */
30157 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30158 /* Sseregparm attribute says we are using x86_64 calling conventions
30159 for FP arguments. */
30160 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30161 /* force_align_arg_pointer says this function realigns the stack at entry. */
30162 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30163 false, true, true, ix86_handle_cconv_attribute },
30164 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30165 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30166 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30167 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30169 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30170 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30171 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30172 SUBTARGET_ATTRIBUTE_TABLE,
30174 /* ms_abi and sysv_abi calling convention function attributes. */
30175 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30176 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30177 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30179 { NULL, 0, 0, false, false, false, NULL }
30182 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30184 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30185 tree vectype ATTRIBUTE_UNUSED,
30186 int misalign ATTRIBUTE_UNUSED)
30188 switch (type_of_cost)
30191 return ix86_cost->scalar_stmt_cost;
30194 return ix86_cost->scalar_load_cost;
30197 return ix86_cost->scalar_store_cost;
30200 return ix86_cost->vec_stmt_cost;
30203 return ix86_cost->vec_align_load_cost;
30206 return ix86_cost->vec_store_cost;
30208 case vec_to_scalar:
30209 return ix86_cost->vec_to_scalar_cost;
30211 case scalar_to_vec:
30212 return ix86_cost->scalar_to_vec_cost;
30214 case unaligned_load:
30215 case unaligned_store:
30216 return ix86_cost->vec_unalign_load_cost;
30218 case cond_branch_taken:
30219 return ix86_cost->cond_taken_branch_cost;
30221 case cond_branch_not_taken:
30222 return ix86_cost->cond_not_taken_branch_cost;
30228 gcc_unreachable ();
30233 /* Implement targetm.vectorize.builtin_vec_perm. */
30236 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30238 tree itype = TREE_TYPE (vec_type);
30239 bool u = TYPE_UNSIGNED (itype);
30240 enum machine_mode vmode = TYPE_MODE (vec_type);
30241 enum ix86_builtins fcode;
30242 bool ok = TARGET_SSE2;
30248 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30251 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30253 itype = ix86_get_builtin_type (IX86_BT_DI);
30258 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30262 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30264 itype = ix86_get_builtin_type (IX86_BT_SI);
30268 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30271 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30274 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30277 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30287 *mask_type = itype;
30288 return ix86_builtins[(int) fcode];
30291 /* Return a vector mode with twice as many elements as VMODE. */
30292 /* ??? Consider moving this to a table generated by genmodes.c. */
30294 static enum machine_mode
30295 doublesize_vector_mode (enum machine_mode vmode)
30299 case V2SFmode: return V4SFmode;
30300 case V1DImode: return V2DImode;
30301 case V2SImode: return V4SImode;
30302 case V4HImode: return V8HImode;
30303 case V8QImode: return V16QImode;
30305 case V2DFmode: return V4DFmode;
30306 case V4SFmode: return V8SFmode;
30307 case V2DImode: return V4DImode;
30308 case V4SImode: return V8SImode;
30309 case V8HImode: return V16HImode;
30310 case V16QImode: return V32QImode;
30312 case V4DFmode: return V8DFmode;
30313 case V8SFmode: return V16SFmode;
30314 case V4DImode: return V8DImode;
30315 case V8SImode: return V16SImode;
30316 case V16HImode: return V32HImode;
30317 case V32QImode: return V64QImode;
30320 gcc_unreachable ();
30324 /* Construct (set target (vec_select op0 (parallel perm))) and
30325 return true if that's a valid instruction in the active ISA. */
30328 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30330 rtx rperm[MAX_VECT_LEN], x;
30333 for (i = 0; i < nelt; ++i)
30334 rperm[i] = GEN_INT (perm[i]);
30336 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30337 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30338 x = gen_rtx_SET (VOIDmode, target, x);
30341 if (recog_memoized (x) < 0)
30349 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30352 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30353 const unsigned char *perm, unsigned nelt)
30355 enum machine_mode v2mode;
30358 v2mode = doublesize_vector_mode (GET_MODE (op0));
30359 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30360 return expand_vselect (target, x, perm, nelt);
30363 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30364 in terms of blendp[sd] / pblendw / pblendvb. */
30367 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30369 enum machine_mode vmode = d->vmode;
30370 unsigned i, mask, nelt = d->nelt;
30371 rtx target, op0, op1, x;
30373 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30375 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
30378 /* This is a blend, not a permute. Elements must stay in their
30379 respective lanes. */
30380 for (i = 0; i < nelt; ++i)
30382 unsigned e = d->perm[i];
30383 if (!(e == i || e == i + nelt))
30390 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
30391 decision should be extracted elsewhere, so that we only try that
30392 sequence once all budget==3 options have been tried. */
30394 /* For bytes, see if bytes move in pairs so we can use pblendw with
30395 an immediate argument, rather than pblendvb with a vector argument. */
30396 if (vmode == V16QImode)
30398 bool pblendw_ok = true;
30399 for (i = 0; i < 16 && pblendw_ok; i += 2)
30400 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
30404 rtx rperm[16], vperm;
30406 for (i = 0; i < nelt; ++i)
30407 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
30409 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30410 vperm = force_reg (V16QImode, vperm);
30412 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
30417 target = d->target;
30429 for (i = 0; i < nelt; ++i)
30430 mask |= (d->perm[i] >= nelt) << i;
30434 for (i = 0; i < 2; ++i)
30435 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
30439 for (i = 0; i < 4; ++i)
30440 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
30444 for (i = 0; i < 8; ++i)
30445 mask |= (d->perm[i * 2] >= 16) << i;
30449 target = gen_lowpart (vmode, target);
30450 op0 = gen_lowpart (vmode, op0);
30451 op1 = gen_lowpart (vmode, op1);
30455 gcc_unreachable ();
30458 /* This matches five different patterns with the different modes. */
30459 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
30460 x = gen_rtx_SET (VOIDmode, target, x);
30466 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30467 in terms of the variable form of vpermilps.
30469 Note that we will have already failed the immediate input vpermilps,
30470 which requires that the high and low part shuffle be identical; the
30471 variable form doesn't require that. */
30474 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
30476 rtx rperm[8], vperm;
30479 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
30482 /* We can only permute within the 128-bit lane. */
30483 for (i = 0; i < 8; ++i)
30485 unsigned e = d->perm[i];
30486 if (i < 4 ? e >= 4 : e < 4)
30493 for (i = 0; i < 8; ++i)
30495 unsigned e = d->perm[i];
30497 /* Within each 128-bit lane, the elements of op0 are numbered
30498 from 0 and the elements of op1 are numbered from 4. */
30504 rperm[i] = GEN_INT (e);
30507 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
30508 vperm = force_reg (V8SImode, vperm);
30509 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
30514 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30515 in terms of pshufb or vpperm. */
30518 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
30520 unsigned i, nelt, eltsz;
30521 rtx rperm[16], vperm, target, op0, op1;
30523 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
30525 if (GET_MODE_SIZE (d->vmode) != 16)
30532 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30534 for (i = 0; i < nelt; ++i)
30536 unsigned j, e = d->perm[i];
30537 for (j = 0; j < eltsz; ++j)
30538 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
30541 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30542 vperm = force_reg (V16QImode, vperm);
30544 target = gen_lowpart (V16QImode, d->target);
30545 op0 = gen_lowpart (V16QImode, d->op0);
30546 if (d->op0 == d->op1)
30547 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
30550 op1 = gen_lowpart (V16QImode, d->op1);
30551 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
30557 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
30558 in a single instruction. */
30561 expand_vec_perm_1 (struct expand_vec_perm_d *d)
30563 unsigned i, nelt = d->nelt;
30564 unsigned char perm2[MAX_VECT_LEN];
30566 /* Check plain VEC_SELECT first, because AVX has instructions that could
30567 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
30568 input where SEL+CONCAT may not. */
30569 if (d->op0 == d->op1)
30571 int mask = nelt - 1;
30573 for (i = 0; i < nelt; i++)
30574 perm2[i] = d->perm[i] & mask;
30576 if (expand_vselect (d->target, d->op0, perm2, nelt))
30579 /* There are plenty of patterns in sse.md that are written for
30580 SEL+CONCAT and are not replicated for a single op. Perhaps
30581 that should be changed, to avoid the nastiness here. */
30583 /* Recognize interleave style patterns, which means incrementing
30584 every other permutation operand. */
30585 for (i = 0; i < nelt; i += 2)
30587 perm2[i] = d->perm[i] & mask;
30588 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
30590 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30593 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
30596 for (i = 0; i < nelt; i += 4)
30598 perm2[i + 0] = d->perm[i + 0] & mask;
30599 perm2[i + 1] = d->perm[i + 1] & mask;
30600 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
30601 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
30604 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30609 /* Finally, try the fully general two operand permute. */
30610 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
30613 /* Recognize interleave style patterns with reversed operands. */
30614 if (d->op0 != d->op1)
30616 for (i = 0; i < nelt; ++i)
30618 unsigned e = d->perm[i];
30626 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
30630 /* Try the SSE4.1 blend variable merge instructions. */
30631 if (expand_vec_perm_blend (d))
30634 /* Try one of the AVX vpermil variable permutations. */
30635 if (expand_vec_perm_vpermil (d))
30638 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
30639 if (expand_vec_perm_pshufb (d))
30645 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30646 in terms of a pair of pshuflw + pshufhw instructions. */
30649 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
30651 unsigned char perm2[MAX_VECT_LEN];
30655 if (d->vmode != V8HImode || d->op0 != d->op1)
30658 /* The two permutations only operate in 64-bit lanes. */
30659 for (i = 0; i < 4; ++i)
30660 if (d->perm[i] >= 4)
30662 for (i = 4; i < 8; ++i)
30663 if (d->perm[i] < 4)
30669 /* Emit the pshuflw. */
30670 memcpy (perm2, d->perm, 4);
30671 for (i = 4; i < 8; ++i)
30673 ok = expand_vselect (d->target, d->op0, perm2, 8);
30676 /* Emit the pshufhw. */
30677 memcpy (perm2 + 4, d->perm + 4, 4);
30678 for (i = 0; i < 4; ++i)
30680 ok = expand_vselect (d->target, d->target, perm2, 8);
30686 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30687 the permutation using the SSSE3 palignr instruction. This succeeds
30688 when all of the elements in PERM fit within one vector and we merely
30689 need to shift them down so that a single vector permutation has a
30690 chance to succeed. */
30693 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
30695 unsigned i, nelt = d->nelt;
30700 /* Even with AVX, palignr only operates on 128-bit vectors. */
30701 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30704 min = nelt, max = 0;
30705 for (i = 0; i < nelt; ++i)
30707 unsigned e = d->perm[i];
30713 if (min == 0 || max - min >= nelt)
30716 /* Given that we have SSSE3, we know we'll be able to implement the
30717 single operand permutation after the palignr with pshufb. */
30721 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
30722 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
30723 gen_lowpart (TImode, d->op1),
30724 gen_lowpart (TImode, d->op0), shift));
30726 d->op0 = d->op1 = d->target;
30729 for (i = 0; i < nelt; ++i)
30731 unsigned e = d->perm[i] - min;
30737 /* Test for the degenerate case where the alignment by itself
30738 produces the desired permutation. */
30742 ok = expand_vec_perm_1 (d);
30748 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30749 a two vector permutation into a single vector permutation by using
30750 an interleave operation to merge the vectors. */
30753 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
30755 struct expand_vec_perm_d dremap, dfinal;
30756 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
30757 unsigned contents, h1, h2, h3, h4;
30758 unsigned char remap[2 * MAX_VECT_LEN];
30762 if (d->op0 == d->op1)
30765 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
30766 lanes. We can use similar techniques with the vperm2f128 instruction,
30767 but it requires slightly different logic. */
30768 if (GET_MODE_SIZE (d->vmode) != 16)
30771 /* Examine from whence the elements come. */
30773 for (i = 0; i < nelt; ++i)
30774 contents |= 1u << d->perm[i];
30776 /* Split the two input vectors into 4 halves. */
30777 h1 = (1u << nelt2) - 1;
30782 memset (remap, 0xff, sizeof (remap));
30785 /* If the elements from the low halves use interleave low, and similarly
30786 for interleave high. If the elements are from mis-matched halves, we
30787 can use shufps for V4SF/V4SI or do a DImode shuffle. */
30788 if ((contents & (h1 | h3)) == contents)
30790 for (i = 0; i < nelt2; ++i)
30793 remap[i + nelt] = i * 2 + 1;
30794 dremap.perm[i * 2] = i;
30795 dremap.perm[i * 2 + 1] = i + nelt;
30798 else if ((contents & (h2 | h4)) == contents)
30800 for (i = 0; i < nelt2; ++i)
30802 remap[i + nelt2] = i * 2;
30803 remap[i + nelt + nelt2] = i * 2 + 1;
30804 dremap.perm[i * 2] = i + nelt2;
30805 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
30808 else if ((contents & (h1 | h4)) == contents)
30810 for (i = 0; i < nelt2; ++i)
30813 remap[i + nelt + nelt2] = i + nelt2;
30814 dremap.perm[i] = i;
30815 dremap.perm[i + nelt2] = i + nelt + nelt2;
30819 dremap.vmode = V2DImode;
30821 dremap.perm[0] = 0;
30822 dremap.perm[1] = 3;
30825 else if ((contents & (h2 | h3)) == contents)
30827 for (i = 0; i < nelt2; ++i)
30829 remap[i + nelt2] = i;
30830 remap[i + nelt] = i + nelt2;
30831 dremap.perm[i] = i + nelt2;
30832 dremap.perm[i + nelt2] = i + nelt;
30836 dremap.vmode = V2DImode;
30838 dremap.perm[0] = 1;
30839 dremap.perm[1] = 2;
30845 /* Use the remapping array set up above to move the elements from their
30846 swizzled locations into their final destinations. */
30848 for (i = 0; i < nelt; ++i)
30850 unsigned e = remap[d->perm[i]];
30851 gcc_assert (e < nelt);
30852 dfinal.perm[i] = e;
30854 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
30855 dfinal.op1 = dfinal.op0;
30856 dremap.target = dfinal.op0;
30858 /* Test if the final remap can be done with a single insn. For V4SFmode or
30859 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
30861 ok = expand_vec_perm_1 (&dfinal);
30862 seq = get_insns ();
30868 if (dremap.vmode != dfinal.vmode)
30870 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
30871 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
30872 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
30875 ok = expand_vec_perm_1 (&dremap);
30882 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
30883 permutation with two pshufb insns and an ior. We should have already
30884 failed all two instruction sequences. */
30887 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
30889 rtx rperm[2][16], vperm, l, h, op, m128;
30890 unsigned int i, nelt, eltsz;
30892 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30894 gcc_assert (d->op0 != d->op1);
30897 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30899 /* Generate two permutation masks. If the required element is within
30900 the given vector it is shuffled into the proper lane. If the required
30901 element is in the other vector, force a zero into the lane by setting
30902 bit 7 in the permutation mask. */
30903 m128 = GEN_INT (-128);
30904 for (i = 0; i < nelt; ++i)
30906 unsigned j, e = d->perm[i];
30907 unsigned which = (e >= nelt);
30911 for (j = 0; j < eltsz; ++j)
30913 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
30914 rperm[1-which][i*eltsz + j] = m128;
30918 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
30919 vperm = force_reg (V16QImode, vperm);
30921 l = gen_reg_rtx (V16QImode);
30922 op = gen_lowpart (V16QImode, d->op0);
30923 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
30925 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
30926 vperm = force_reg (V16QImode, vperm);
30928 h = gen_reg_rtx (V16QImode);
30929 op = gen_lowpart (V16QImode, d->op1);
30930 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
30932 op = gen_lowpart (V16QImode, d->target);
30933 emit_insn (gen_iorv16qi3 (op, l, h));
30938 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
30939 and extract-odd permutations. */
30942 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
30944 rtx t1, t2, t3, t4;
30949 t1 = gen_reg_rtx (V4DFmode);
30950 t2 = gen_reg_rtx (V4DFmode);
30952 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
30953 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
30954 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
30956 /* Now an unpck[lh]pd will produce the result required. */
30958 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
30960 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
30966 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
30967 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
30968 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
30970 t1 = gen_reg_rtx (V8SFmode);
30971 t2 = gen_reg_rtx (V8SFmode);
30972 t3 = gen_reg_rtx (V8SFmode);
30973 t4 = gen_reg_rtx (V8SFmode);
30975 /* Shuffle within the 128-bit lanes to produce:
30976 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
30977 expand_vselect (t1, d->op0, perm1, 8);
30978 expand_vselect (t2, d->op1, perm1, 8);
30980 /* Shuffle the lanes around to produce:
30981 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
30982 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
30983 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
30985 /* Now a vpermil2p will produce the result required. */
30986 /* ??? The vpermil2p requires a vector constant. Another option
30987 is a unpck[lh]ps to merge the two vectors to produce
30988 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
30989 vpermilps to get the elements into the final order. */
30992 memcpy (d->perm, odd ? permo: perme, 8);
30993 expand_vec_perm_vpermil (d);
31001 /* These are always directly implementable by expand_vec_perm_1. */
31002 gcc_unreachable ();
31006 return expand_vec_perm_pshufb2 (d);
31009 /* We need 2*log2(N)-1 operations to achieve odd/even
31010 with interleave. */
31011 t1 = gen_reg_rtx (V8HImode);
31012 t2 = gen_reg_rtx (V8HImode);
31013 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
31014 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
31015 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
31016 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
31018 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
31020 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
31027 return expand_vec_perm_pshufb2 (d);
31030 t1 = gen_reg_rtx (V16QImode);
31031 t2 = gen_reg_rtx (V16QImode);
31032 t3 = gen_reg_rtx (V16QImode);
31033 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
31034 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
31035 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
31036 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
31037 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
31038 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
31040 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
31042 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
31048 gcc_unreachable ();
31054 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31055 extract-even and extract-odd permutations. */
31058 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
31060 unsigned i, odd, nelt = d->nelt;
31063 if (odd != 0 && odd != 1)
31066 for (i = 1; i < nelt; ++i)
31067 if (d->perm[i] != 2 * i + odd)
31070 return expand_vec_perm_even_odd_1 (d, odd);
31073 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31074 permutations. We assume that expand_vec_perm_1 has already failed. */
31077 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31079 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31080 enum machine_mode vmode = d->vmode;
31081 unsigned char perm2[4];
31089 /* These are special-cased in sse.md so that we can optionally
31090 use the vbroadcast instruction. They expand to two insns
31091 if the input happens to be in a register. */
31092 gcc_unreachable ();
31098 /* These are always implementable using standard shuffle patterns. */
31099 gcc_unreachable ();
31103 /* These can be implemented via interleave. We save one insn by
31104 stopping once we have promoted to V4SImode and then use pshufd. */
31107 optab otab = vec_interleave_low_optab;
31111 otab = vec_interleave_high_optab;
31116 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31117 vmode = get_mode_wider_vector (vmode);
31118 op0 = gen_lowpart (vmode, op0);
31120 while (vmode != V4SImode);
31122 memset (perm2, elt, 4);
31123 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31128 gcc_unreachable ();
31132 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31133 broadcast permutations. */
31136 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31138 unsigned i, elt, nelt = d->nelt;
31140 if (d->op0 != d->op1)
31144 for (i = 1; i < nelt; ++i)
31145 if (d->perm[i] != elt)
31148 return expand_vec_perm_broadcast_1 (d);
31151 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31152 With all of the interface bits taken care of, perform the expansion
31153 in D and return true on success. */
31156 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31158 /* Try a single instruction expansion. */
31159 if (expand_vec_perm_1 (d))
31162 /* Try sequences of two instructions. */
31164 if (expand_vec_perm_pshuflw_pshufhw (d))
31167 if (expand_vec_perm_palignr (d))
31170 if (expand_vec_perm_interleave2 (d))
31173 if (expand_vec_perm_broadcast (d))
31176 /* Try sequences of three instructions. */
31178 if (expand_vec_perm_pshufb2 (d))
31181 /* ??? Look for narrow permutations whose element orderings would
31182 allow the promotion to a wider mode. */
31184 /* ??? Look for sequences of interleave or a wider permute that place
31185 the data into the correct lanes for a half-vector shuffle like
31186 pshuf[lh]w or vpermilps. */
31188 /* ??? Look for sequences of interleave that produce the desired results.
31189 The combinatorics of punpck[lh] get pretty ugly... */
31191 if (expand_vec_perm_even_odd (d))
31197 /* Extract the values from the vector CST into the permutation array in D.
31198 Return 0 on error, 1 if all values from the permutation come from the
31199 first vector, 2 if all values from the second vector, and 3 otherwise. */
31202 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31204 tree list = TREE_VECTOR_CST_ELTS (cst);
31205 unsigned i, nelt = d->nelt;
31208 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31210 unsigned HOST_WIDE_INT e;
31212 if (!host_integerp (TREE_VALUE (list), 1))
31214 e = tree_low_cst (TREE_VALUE (list), 1);
31218 ret |= (e < nelt ? 1 : 2);
31221 gcc_assert (list == NULL);
31223 /* For all elements from second vector, fold the elements to first. */
31225 for (i = 0; i < nelt; ++i)
31226 d->perm[i] -= nelt;
31232 ix86_expand_vec_perm_builtin (tree exp)
31234 struct expand_vec_perm_d d;
31235 tree arg0, arg1, arg2;
31237 arg0 = CALL_EXPR_ARG (exp, 0);
31238 arg1 = CALL_EXPR_ARG (exp, 1);
31239 arg2 = CALL_EXPR_ARG (exp, 2);
31241 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31242 d.nelt = GET_MODE_NUNITS (d.vmode);
31243 d.testing_p = false;
31244 gcc_assert (VECTOR_MODE_P (d.vmode));
31246 if (TREE_CODE (arg2) != VECTOR_CST)
31248 error_at (EXPR_LOCATION (exp),
31249 "vector permutation requires vector constant");
31253 switch (extract_vec_perm_cst (&d, arg2))
31259 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31263 if (!operand_equal_p (arg0, arg1, 0))
31265 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31266 d.op0 = force_reg (d.vmode, d.op0);
31267 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31268 d.op1 = force_reg (d.vmode, d.op1);
31272 /* The elements of PERM do not suggest that only the first operand
31273 is used, but both operands are identical. Allow easier matching
31274 of the permutation by folding the permutation into the single
31277 unsigned i, nelt = d.nelt;
31278 for (i = 0; i < nelt; ++i)
31279 if (d.perm[i] >= nelt)
31285 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31286 d.op0 = force_reg (d.vmode, d.op0);
31291 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31292 d.op0 = force_reg (d.vmode, d.op0);
31297 d.target = gen_reg_rtx (d.vmode);
31298 if (ix86_expand_vec_perm_builtin_1 (&d))
31301 /* For compiler generated permutations, we should never got here, because
31302 the compiler should also be checking the ok hook. But since this is a
31303 builtin the user has access too, so don't abort. */
31307 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31310 sorry ("vector permutation (%d %d %d %d)",
31311 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31314 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31315 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31316 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31319 sorry ("vector permutation "
31320 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31321 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31322 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31323 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31324 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31327 gcc_unreachable ();
31330 return CONST0_RTX (d.vmode);
31333 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31336 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31338 struct expand_vec_perm_d d;
31342 d.vmode = TYPE_MODE (vec_type);
31343 d.nelt = GET_MODE_NUNITS (d.vmode);
31344 d.testing_p = true;
31346 /* Given sufficient ISA support we can just return true here
31347 for selected vector modes. */
31348 if (GET_MODE_SIZE (d.vmode) == 16)
31350 /* All implementable with a single vpperm insn. */
31353 /* All implementable with 2 pshufb + 1 ior. */
31356 /* All implementable with shufpd or unpck[lh]pd. */
31361 vec_mask = extract_vec_perm_cst (&d, mask);
31363 /* This hook is cannot be called in response to something that the
31364 user does (unlike the builtin expander) so we shouldn't ever see
31365 an error generated from the extract. */
31366 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31367 one_vec = (vec_mask != 3);
31369 /* Implementable with shufps or pshufd. */
31370 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31373 /* Otherwise we have to go through the motions and see if we can
31374 figure out how to generate the requested permutation. */
31375 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31376 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31378 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31381 ret = ix86_expand_vec_perm_builtin_1 (&d);
31388 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
31390 struct expand_vec_perm_d d;
31396 d.vmode = GET_MODE (targ);
31397 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31398 d.testing_p = false;
31400 for (i = 0; i < nelt; ++i)
31401 d.perm[i] = i * 2 + odd;
31403 /* We'll either be able to implement the permutation directly... */
31404 if (expand_vec_perm_1 (&d))
31407 /* ... or we use the special-case patterns. */
31408 expand_vec_perm_even_odd_1 (&d, odd);
31411 /* This function returns the calling abi specific va_list type node.
31412 It returns the FNDECL specific va_list type. */
31415 ix86_fn_abi_va_list (tree fndecl)
31418 return va_list_type_node;
31419 gcc_assert (fndecl != NULL_TREE);
31421 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
31422 return ms_va_list_type_node;
31424 return sysv_va_list_type_node;
31427 /* Returns the canonical va_list type specified by TYPE. If there
31428 is no valid TYPE provided, it return NULL_TREE. */
31431 ix86_canonical_va_list_type (tree type)
31435 /* Resolve references and pointers to va_list type. */
31436 if (TREE_CODE (type) == MEM_REF)
31437 type = TREE_TYPE (type);
31438 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
31439 type = TREE_TYPE (type);
31440 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
31441 type = TREE_TYPE (type);
31445 wtype = va_list_type_node;
31446 gcc_assert (wtype != NULL_TREE);
31448 if (TREE_CODE (wtype) == ARRAY_TYPE)
31450 /* If va_list is an array type, the argument may have decayed
31451 to a pointer type, e.g. by being passed to another function.
31452 In that case, unwrap both types so that we can compare the
31453 underlying records. */
31454 if (TREE_CODE (htype) == ARRAY_TYPE
31455 || POINTER_TYPE_P (htype))
31457 wtype = TREE_TYPE (wtype);
31458 htype = TREE_TYPE (htype);
31461 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31462 return va_list_type_node;
31463 wtype = sysv_va_list_type_node;
31464 gcc_assert (wtype != NULL_TREE);
31466 if (TREE_CODE (wtype) == ARRAY_TYPE)
31468 /* If va_list is an array type, the argument may have decayed
31469 to a pointer type, e.g. by being passed to another function.
31470 In that case, unwrap both types so that we can compare the
31471 underlying records. */
31472 if (TREE_CODE (htype) == ARRAY_TYPE
31473 || POINTER_TYPE_P (htype))
31475 wtype = TREE_TYPE (wtype);
31476 htype = TREE_TYPE (htype);
31479 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31480 return sysv_va_list_type_node;
31481 wtype = ms_va_list_type_node;
31482 gcc_assert (wtype != NULL_TREE);
31484 if (TREE_CODE (wtype) == ARRAY_TYPE)
31486 /* If va_list is an array type, the argument may have decayed
31487 to a pointer type, e.g. by being passed to another function.
31488 In that case, unwrap both types so that we can compare the
31489 underlying records. */
31490 if (TREE_CODE (htype) == ARRAY_TYPE
31491 || POINTER_TYPE_P (htype))
31493 wtype = TREE_TYPE (wtype);
31494 htype = TREE_TYPE (htype);
31497 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31498 return ms_va_list_type_node;
31501 return std_canonical_va_list_type (type);
31504 /* Iterate through the target-specific builtin types for va_list.
31505 IDX denotes the iterator, *PTREE is set to the result type of
31506 the va_list builtin, and *PNAME to its internal type.
31507 Returns zero if there is no element for this index, otherwise
31508 IDX should be increased upon the next call.
31509 Note, do not iterate a base builtin's name like __builtin_va_list.
31510 Used from c_common_nodes_and_builtins. */
31513 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
31523 *ptree = ms_va_list_type_node;
31524 *pname = "__builtin_ms_va_list";
31528 *ptree = sysv_va_list_type_node;
31529 *pname = "__builtin_sysv_va_list";
31537 /* Initialize the GCC target structure. */
31538 #undef TARGET_RETURN_IN_MEMORY
31539 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
31541 #undef TARGET_LEGITIMIZE_ADDRESS
31542 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
31544 #undef TARGET_ATTRIBUTE_TABLE
31545 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
31546 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31547 # undef TARGET_MERGE_DECL_ATTRIBUTES
31548 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
31551 #undef TARGET_COMP_TYPE_ATTRIBUTES
31552 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
31554 #undef TARGET_INIT_BUILTINS
31555 #define TARGET_INIT_BUILTINS ix86_init_builtins
31556 #undef TARGET_BUILTIN_DECL
31557 #define TARGET_BUILTIN_DECL ix86_builtin_decl
31558 #undef TARGET_EXPAND_BUILTIN
31559 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
31561 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
31562 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
31563 ix86_builtin_vectorized_function
31565 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
31566 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
31568 #undef TARGET_BUILTIN_RECIPROCAL
31569 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
31571 #undef TARGET_ASM_FUNCTION_EPILOGUE
31572 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
31574 #undef TARGET_ENCODE_SECTION_INFO
31575 #ifndef SUBTARGET_ENCODE_SECTION_INFO
31576 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
31578 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
31581 #undef TARGET_ASM_OPEN_PAREN
31582 #define TARGET_ASM_OPEN_PAREN ""
31583 #undef TARGET_ASM_CLOSE_PAREN
31584 #define TARGET_ASM_CLOSE_PAREN ""
31586 #undef TARGET_ASM_BYTE_OP
31587 #define TARGET_ASM_BYTE_OP ASM_BYTE
31589 #undef TARGET_ASM_ALIGNED_HI_OP
31590 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
31591 #undef TARGET_ASM_ALIGNED_SI_OP
31592 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
31594 #undef TARGET_ASM_ALIGNED_DI_OP
31595 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
31598 #undef TARGET_PROFILE_BEFORE_PROLOGUE
31599 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
31601 #undef TARGET_ASM_UNALIGNED_HI_OP
31602 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
31603 #undef TARGET_ASM_UNALIGNED_SI_OP
31604 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
31605 #undef TARGET_ASM_UNALIGNED_DI_OP
31606 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
31608 #undef TARGET_PRINT_OPERAND
31609 #define TARGET_PRINT_OPERAND ix86_print_operand
31610 #undef TARGET_PRINT_OPERAND_ADDRESS
31611 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
31612 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
31613 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
31614 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
31615 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
31617 #undef TARGET_SCHED_ADJUST_COST
31618 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
31619 #undef TARGET_SCHED_ISSUE_RATE
31620 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
31621 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
31622 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
31623 ia32_multipass_dfa_lookahead
31625 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
31626 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
31629 #undef TARGET_HAVE_TLS
31630 #define TARGET_HAVE_TLS true
31632 #undef TARGET_CANNOT_FORCE_CONST_MEM
31633 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
31634 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
31635 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
31637 #undef TARGET_DELEGITIMIZE_ADDRESS
31638 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
31640 #undef TARGET_MS_BITFIELD_LAYOUT_P
31641 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
31644 #undef TARGET_BINDS_LOCAL_P
31645 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
31647 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31648 #undef TARGET_BINDS_LOCAL_P
31649 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
31652 #undef TARGET_ASM_OUTPUT_MI_THUNK
31653 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
31654 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
31655 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
31657 #undef TARGET_ASM_FILE_START
31658 #define TARGET_ASM_FILE_START x86_file_start
31660 #undef TARGET_DEFAULT_TARGET_FLAGS
31661 #define TARGET_DEFAULT_TARGET_FLAGS \
31663 | TARGET_SUBTARGET_DEFAULT \
31664 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
31667 #undef TARGET_HANDLE_OPTION
31668 #define TARGET_HANDLE_OPTION ix86_handle_option
31670 #undef TARGET_REGISTER_MOVE_COST
31671 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
31672 #undef TARGET_MEMORY_MOVE_COST
31673 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
31674 #undef TARGET_RTX_COSTS
31675 #define TARGET_RTX_COSTS ix86_rtx_costs
31676 #undef TARGET_ADDRESS_COST
31677 #define TARGET_ADDRESS_COST ix86_address_cost
31679 #undef TARGET_FIXED_CONDITION_CODE_REGS
31680 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
31681 #undef TARGET_CC_MODES_COMPATIBLE
31682 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
31684 #undef TARGET_MACHINE_DEPENDENT_REORG
31685 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
31687 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
31688 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
31690 #undef TARGET_BUILD_BUILTIN_VA_LIST
31691 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
31693 #undef TARGET_ENUM_VA_LIST_P
31694 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
31696 #undef TARGET_FN_ABI_VA_LIST
31697 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
31699 #undef TARGET_CANONICAL_VA_LIST_TYPE
31700 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
31702 #undef TARGET_EXPAND_BUILTIN_VA_START
31703 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
31705 #undef TARGET_MD_ASM_CLOBBERS
31706 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
31708 #undef TARGET_PROMOTE_PROTOTYPES
31709 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
31710 #undef TARGET_STRUCT_VALUE_RTX
31711 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
31712 #undef TARGET_SETUP_INCOMING_VARARGS
31713 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
31714 #undef TARGET_MUST_PASS_IN_STACK
31715 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
31716 #undef TARGET_FUNCTION_ARG_ADVANCE
31717 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
31718 #undef TARGET_FUNCTION_ARG
31719 #define TARGET_FUNCTION_ARG ix86_function_arg
31720 #undef TARGET_PASS_BY_REFERENCE
31721 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
31722 #undef TARGET_INTERNAL_ARG_POINTER
31723 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
31724 #undef TARGET_UPDATE_STACK_BOUNDARY
31725 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
31726 #undef TARGET_GET_DRAP_RTX
31727 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
31728 #undef TARGET_STRICT_ARGUMENT_NAMING
31729 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
31730 #undef TARGET_STATIC_CHAIN
31731 #define TARGET_STATIC_CHAIN ix86_static_chain
31732 #undef TARGET_TRAMPOLINE_INIT
31733 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
31734 #undef TARGET_RETURN_POPS_ARGS
31735 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
31737 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
31738 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
31740 #undef TARGET_SCALAR_MODE_SUPPORTED_P
31741 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
31743 #undef TARGET_VECTOR_MODE_SUPPORTED_P
31744 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
31746 #undef TARGET_C_MODE_FOR_SUFFIX
31747 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
31750 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
31751 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
31754 #ifdef SUBTARGET_INSERT_ATTRIBUTES
31755 #undef TARGET_INSERT_ATTRIBUTES
31756 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
31759 #undef TARGET_MANGLE_TYPE
31760 #define TARGET_MANGLE_TYPE ix86_mangle_type
31762 #undef TARGET_STACK_PROTECT_FAIL
31763 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
31765 #undef TARGET_FUNCTION_VALUE
31766 #define TARGET_FUNCTION_VALUE ix86_function_value
31768 #undef TARGET_FUNCTION_VALUE_REGNO_P
31769 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
31771 #undef TARGET_SECONDARY_RELOAD
31772 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
31774 #undef TARGET_CLASS_LIKELY_SPILLED_P
31775 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
31777 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
31778 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
31779 ix86_builtin_vectorization_cost
31780 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
31781 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
31782 ix86_vectorize_builtin_vec_perm
31783 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
31784 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
31785 ix86_vectorize_builtin_vec_perm_ok
31787 #undef TARGET_SET_CURRENT_FUNCTION
31788 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
31790 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
31791 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
31793 #undef TARGET_OPTION_SAVE
31794 #define TARGET_OPTION_SAVE ix86_function_specific_save
31796 #undef TARGET_OPTION_RESTORE
31797 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
31799 #undef TARGET_OPTION_PRINT
31800 #define TARGET_OPTION_PRINT ix86_function_specific_print
31802 #undef TARGET_CAN_INLINE_P
31803 #define TARGET_CAN_INLINE_P ix86_can_inline_p
31805 #undef TARGET_EXPAND_TO_RTL_HOOK
31806 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
31808 #undef TARGET_LEGITIMATE_ADDRESS_P
31809 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
31811 #undef TARGET_IRA_COVER_CLASSES
31812 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
31814 #undef TARGET_FRAME_POINTER_REQUIRED
31815 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
31817 #undef TARGET_CAN_ELIMINATE
31818 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
31820 #undef TARGET_ASM_CODE_END
31821 #define TARGET_ASM_CODE_END ix86_code_end
31823 struct gcc_target targetm = TARGET_INITIALIZER;
31825 #include "gt-i386.h"