1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1484 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_ADD_ESP_8 */
1488 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1489 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1491 /* X86_TUNE_SUB_ESP_4 */
1492 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1495 /* X86_TUNE_SUB_ESP_8 */
1496 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1497 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_EXT_80387_CONSTANTS */
1580 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1581 | m_CORE2 | m_GENERIC,
1583 /* X86_TUNE_SHORTEN_X87_SSE */
1586 /* X86_TUNE_AVOID_VECTOR_DECODE */
1589 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1590 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1593 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1594 vector path on AMD machines. */
1595 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1597 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1599 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1601 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1605 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1606 but one byte longer. */
1609 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1610 operand that cannot be represented using a modRM byte. The XOR
1611 replacement is long decoded, so this split helps here as well. */
1614 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1616 m_AMDFAM10 | m_GENERIC,
1618 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1619 from integer to FP. */
1622 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1623 with a subsequent conditional jump instruction into a single
1624 compare-and-branch uop. */
1627 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1628 will impact LEA instruction selection. */
1632 /* Feature tests against the various architecture variations. */
1633 unsigned char ix86_arch_features[X86_ARCH_LAST];
1635 /* Feature tests against the various architecture variations, used to create
1636 ix86_arch_features based on the processor mask. */
1637 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1638 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1639 ~(m_386 | m_486 | m_PENT | m_K6),
1641 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1644 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1647 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1650 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1654 static const unsigned int x86_accumulate_outgoing_args
1655 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1658 static const unsigned int x86_arch_always_fancy_math_387
1659 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1660 | m_NOCONA | m_CORE2 | m_GENERIC;
1662 static enum stringop_alg stringop_alg = no_stringop;
1664 /* In case the average insn count for single function invocation is
1665 lower than this constant, emit fast (but longer) prologue and
1667 #define FAST_PROLOGUE_INSN_COUNT 20
1669 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1670 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1671 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1672 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1674 /* Array of the smallest class containing reg number REGNO, indexed by
1675 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1677 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1679 /* ax, dx, cx, bx */
1680 AREG, DREG, CREG, BREG,
1681 /* si, di, bp, sp */
1682 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1684 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1685 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1688 /* flags, fpsr, fpcr, frame */
1689 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1691 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1694 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1697 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1698 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1699 /* SSE REX registers */
1700 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1704 /* The "default" register map used in 32bit mode. */
1706 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1708 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1709 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1710 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1711 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1712 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1713 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1714 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1717 /* The "default" register map used in 64bit mode. */
1719 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1721 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1722 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1723 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1724 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1725 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1726 8,9,10,11,12,13,14,15, /* extended integer registers */
1727 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1730 /* Define the register numbers to be used in Dwarf debugging information.
1731 The SVR4 reference port C compiler uses the following register numbers
1732 in its Dwarf output code:
1733 0 for %eax (gcc regno = 0)
1734 1 for %ecx (gcc regno = 2)
1735 2 for %edx (gcc regno = 1)
1736 3 for %ebx (gcc regno = 3)
1737 4 for %esp (gcc regno = 7)
1738 5 for %ebp (gcc regno = 6)
1739 6 for %esi (gcc regno = 4)
1740 7 for %edi (gcc regno = 5)
1741 The following three DWARF register numbers are never generated by
1742 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1743 believes these numbers have these meanings.
1744 8 for %eip (no gcc equivalent)
1745 9 for %eflags (gcc regno = 17)
1746 10 for %trapno (no gcc equivalent)
1747 It is not at all clear how we should number the FP stack registers
1748 for the x86 architecture. If the version of SDB on x86/svr4 were
1749 a bit less brain dead with respect to floating-point then we would
1750 have a precedent to follow with respect to DWARF register numbers
1751 for x86 FP registers, but the SDB on x86/svr4 is so completely
1752 broken with respect to FP registers that it is hardly worth thinking
1753 of it as something to strive for compatibility with.
1754 The version of x86/svr4 SDB I have at the moment does (partially)
1755 seem to believe that DWARF register number 11 is associated with
1756 the x86 register %st(0), but that's about all. Higher DWARF
1757 register numbers don't seem to be associated with anything in
1758 particular, and even for DWARF regno 11, SDB only seems to under-
1759 stand that it should say that a variable lives in %st(0) (when
1760 asked via an `=' command) if we said it was in DWARF regno 11,
1761 but SDB still prints garbage when asked for the value of the
1762 variable in question (via a `/' command).
1763 (Also note that the labels SDB prints for various FP stack regs
1764 when doing an `x' command are all wrong.)
1765 Note that these problems generally don't affect the native SVR4
1766 C compiler because it doesn't allow the use of -O with -g and
1767 because when it is *not* optimizing, it allocates a memory
1768 location for each floating-point variable, and the memory
1769 location is what gets described in the DWARF AT_location
1770 attribute for the variable in question.
1771 Regardless of the severe mental illness of the x86/svr4 SDB, we
1772 do something sensible here and we use the following DWARF
1773 register numbers. Note that these are all stack-top-relative
1775 11 for %st(0) (gcc regno = 8)
1776 12 for %st(1) (gcc regno = 9)
1777 13 for %st(2) (gcc regno = 10)
1778 14 for %st(3) (gcc regno = 11)
1779 15 for %st(4) (gcc regno = 12)
1780 16 for %st(5) (gcc regno = 13)
1781 17 for %st(6) (gcc regno = 14)
1782 18 for %st(7) (gcc regno = 15)
1784 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1786 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1787 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1788 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1789 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1790 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1791 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1792 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1795 /* Define parameter passing and return registers. */
1797 static int const x86_64_int_parameter_registers[6] =
1799 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1802 static int const x86_64_ms_abi_int_parameter_registers[4] =
1804 CX_REG, DX_REG, R8_REG, R9_REG
1807 static int const x86_64_int_return_registers[4] =
1809 AX_REG, DX_REG, DI_REG, SI_REG
1812 /* Define the structure for the machine field in struct function. */
1814 struct GTY(()) stack_local_entry {
1815 unsigned short mode;
1818 struct stack_local_entry *next;
1821 /* Structure describing stack frame layout.
1822 Stack grows downward:
1828 saved static chain if ix86_static_chain_on_stack
1830 saved frame pointer if frame_pointer_needed
1831 <- HARD_FRAME_POINTER
1837 <- sse_regs_save_offset
1840 [va_arg registers] |
1844 [padding2] | = to_allocate
1853 int outgoing_arguments_size;
1854 HOST_WIDE_INT frame;
1856 /* The offsets relative to ARG_POINTER. */
1857 HOST_WIDE_INT frame_pointer_offset;
1858 HOST_WIDE_INT hard_frame_pointer_offset;
1859 HOST_WIDE_INT stack_pointer_offset;
1860 HOST_WIDE_INT reg_save_offset;
1861 HOST_WIDE_INT sse_reg_save_offset;
1863 /* When save_regs_using_mov is set, emit prologue using
1864 move instead of push instructions. */
1865 bool save_regs_using_mov;
1868 /* Code model option. */
1869 enum cmodel ix86_cmodel;
1871 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1873 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1875 /* Which unit we are generating floating point math for. */
1876 enum fpmath_unit ix86_fpmath;
1878 /* Which cpu are we scheduling for. */
1879 enum attr_cpu ix86_schedule;
1881 /* Which cpu are we optimizing for. */
1882 enum processor_type ix86_tune;
1884 /* Which instruction set architecture to use. */
1885 enum processor_type ix86_arch;
1887 /* true if sse prefetch instruction is not NOOP. */
1888 int x86_prefetch_sse;
1890 /* ix86_regparm_string as a number */
1891 static int ix86_regparm;
1893 /* -mstackrealign option */
1894 extern int ix86_force_align_arg_pointer;
1895 static const char ix86_force_align_arg_pointer_string[]
1896 = "force_align_arg_pointer";
1898 static rtx (*ix86_gen_leave) (void);
1899 static rtx (*ix86_gen_pop1) (rtx);
1900 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1901 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1902 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1903 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1904 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1905 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1906 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1907 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1908 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1910 /* Preferred alignment for stack boundary in bits. */
1911 unsigned int ix86_preferred_stack_boundary;
1913 /* Alignment for incoming stack boundary in bits specified at
1915 static unsigned int ix86_user_incoming_stack_boundary;
1917 /* Default alignment for incoming stack boundary in bits. */
1918 static unsigned int ix86_default_incoming_stack_boundary;
1920 /* Alignment for incoming stack boundary in bits. */
1921 unsigned int ix86_incoming_stack_boundary;
1923 /* The abi used by target. */
1924 enum calling_abi ix86_abi;
1926 /* Values 1-5: see jump.c */
1927 int ix86_branch_cost;
1929 /* Calling abi specific va_list type nodes. */
1930 static GTY(()) tree sysv_va_list_type_node;
1931 static GTY(()) tree ms_va_list_type_node;
1933 /* Variables which are this size or smaller are put in the data/bss
1934 or ldata/lbss sections. */
1936 int ix86_section_threshold = 65536;
1938 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1939 char internal_label_prefix[16];
1940 int internal_label_prefix_len;
1942 /* Fence to use after loop using movnt. */
1945 /* Register class used for passing given 64bit part of the argument.
1946 These represent classes as documented by the PS ABI, with the exception
1947 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1948 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1950 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1951 whenever possible (upper half does contain padding). */
1952 enum x86_64_reg_class
1955 X86_64_INTEGER_CLASS,
1956 X86_64_INTEGERSI_CLASS,
1963 X86_64_COMPLEX_X87_CLASS,
1967 #define MAX_CLASSES 4
1969 /* Table of constants used by fldpi, fldln2, etc.... */
1970 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1971 static bool ext_80387_constants_init = 0;
1974 static struct machine_function * ix86_init_machine_status (void);
1975 static rtx ix86_function_value (const_tree, const_tree, bool);
1976 static bool ix86_function_value_regno_p (const unsigned int);
1977 static rtx ix86_static_chain (const_tree, bool);
1978 static int ix86_function_regparm (const_tree, const_tree);
1979 static void ix86_compute_frame_layout (struct ix86_frame *);
1980 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1982 static void ix86_add_new_builtins (int);
1983 static rtx ix86_expand_vec_perm_builtin (tree);
1984 static tree ix86_canonical_va_list_type (tree);
1986 enum ix86_function_specific_strings
1988 IX86_FUNCTION_SPECIFIC_ARCH,
1989 IX86_FUNCTION_SPECIFIC_TUNE,
1990 IX86_FUNCTION_SPECIFIC_FPMATH,
1991 IX86_FUNCTION_SPECIFIC_MAX
1994 static char *ix86_target_string (int, int, const char *, const char *,
1995 const char *, bool);
1996 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1997 static void ix86_function_specific_save (struct cl_target_option *);
1998 static void ix86_function_specific_restore (struct cl_target_option *);
1999 static void ix86_function_specific_print (FILE *, int,
2000 struct cl_target_option *);
2001 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2002 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2003 static bool ix86_can_inline_p (tree, tree);
2004 static void ix86_set_current_function (tree);
2005 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2007 static enum calling_abi ix86_function_abi (const_tree);
2010 #ifndef SUBTARGET32_DEFAULT_CPU
2011 #define SUBTARGET32_DEFAULT_CPU "i386"
2014 /* The svr4 ABI for the i386 says that records and unions are returned
2016 #ifndef DEFAULT_PCC_STRUCT_RETURN
2017 #define DEFAULT_PCC_STRUCT_RETURN 1
2020 /* Whether -mtune= or -march= were specified */
2021 static int ix86_tune_defaulted;
2022 static int ix86_arch_specified;
2024 /* Bit flags that specify the ISA we are compiling for. */
2025 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
2027 /* A mask of ix86_isa_flags that includes bit X if X
2028 was set or cleared on the command line. */
2029 static int ix86_isa_flags_explicit;
2031 /* Define a set of ISAs which are available when a given ISA is
2032 enabled. MMX and SSE ISAs are handled separately. */
2034 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2035 #define OPTION_MASK_ISA_3DNOW_SET \
2036 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2038 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2039 #define OPTION_MASK_ISA_SSE2_SET \
2040 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2041 #define OPTION_MASK_ISA_SSE3_SET \
2042 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2043 #define OPTION_MASK_ISA_SSSE3_SET \
2044 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2045 #define OPTION_MASK_ISA_SSE4_1_SET \
2046 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2047 #define OPTION_MASK_ISA_SSE4_2_SET \
2048 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2049 #define OPTION_MASK_ISA_AVX_SET \
2050 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2051 #define OPTION_MASK_ISA_FMA_SET \
2052 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2054 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2056 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2058 #define OPTION_MASK_ISA_SSE4A_SET \
2059 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2060 #define OPTION_MASK_ISA_FMA4_SET \
2061 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2062 | OPTION_MASK_ISA_AVX_SET)
2063 #define OPTION_MASK_ISA_XOP_SET \
2064 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2065 #define OPTION_MASK_ISA_LWP_SET \
2068 /* AES and PCLMUL need SSE2 because they use xmm registers */
2069 #define OPTION_MASK_ISA_AES_SET \
2070 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2071 #define OPTION_MASK_ISA_PCLMUL_SET \
2072 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2074 #define OPTION_MASK_ISA_ABM_SET \
2075 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2077 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2078 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2079 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2080 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2081 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2083 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2084 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2085 #define OPTION_MASK_ISA_F16C_SET \
2086 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2088 /* Define a set of ISAs which aren't available when a given ISA is
2089 disabled. MMX and SSE ISAs are handled separately. */
2091 #define OPTION_MASK_ISA_MMX_UNSET \
2092 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2093 #define OPTION_MASK_ISA_3DNOW_UNSET \
2094 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2095 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2097 #define OPTION_MASK_ISA_SSE_UNSET \
2098 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2099 #define OPTION_MASK_ISA_SSE2_UNSET \
2100 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2101 #define OPTION_MASK_ISA_SSE3_UNSET \
2102 (OPTION_MASK_ISA_SSE3 \
2103 | OPTION_MASK_ISA_SSSE3_UNSET \
2104 | OPTION_MASK_ISA_SSE4A_UNSET )
2105 #define OPTION_MASK_ISA_SSSE3_UNSET \
2106 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2107 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2108 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2109 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2110 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2111 #define OPTION_MASK_ISA_AVX_UNSET \
2112 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2113 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2114 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2116 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2118 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2120 #define OPTION_MASK_ISA_SSE4A_UNSET \
2121 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2123 #define OPTION_MASK_ISA_FMA4_UNSET \
2124 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2125 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2126 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2128 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2129 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2130 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2131 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2132 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2133 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2134 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2135 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2137 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2138 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2139 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2141 /* Vectorization library interface and handlers. */
2142 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2144 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2145 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2147 /* Processor target table, indexed by processor number */
2150 const struct processor_costs *cost; /* Processor costs */
2151 const int align_loop; /* Default alignments. */
2152 const int align_loop_max_skip;
2153 const int align_jump;
2154 const int align_jump_max_skip;
2155 const int align_func;
2158 static const struct ptt processor_target_table[PROCESSOR_max] =
2160 {&i386_cost, 4, 3, 4, 3, 4},
2161 {&i486_cost, 16, 15, 16, 15, 16},
2162 {&pentium_cost, 16, 7, 16, 7, 16},
2163 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2164 {&geode_cost, 0, 0, 0, 0, 0},
2165 {&k6_cost, 32, 7, 32, 7, 32},
2166 {&athlon_cost, 16, 7, 16, 7, 16},
2167 {&pentium4_cost, 0, 0, 0, 0, 0},
2168 {&k8_cost, 16, 7, 16, 7, 16},
2169 {&nocona_cost, 0, 0, 0, 0, 0},
2170 {&core2_cost, 16, 10, 16, 10, 16},
2171 {&generic32_cost, 16, 7, 16, 7, 16},
2172 {&generic64_cost, 16, 10, 16, 10, 16},
2173 {&amdfam10_cost, 32, 24, 32, 7, 32},
2174 {&bdver1_cost, 32, 24, 32, 7, 32},
2175 {&atom_cost, 16, 7, 16, 7, 16}
2178 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2205 /* Return true if a red-zone is in use. */
2208 ix86_using_red_zone (void)
2210 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2213 /* Implement TARGET_HANDLE_OPTION. */
2216 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2223 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2224 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2228 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2236 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2237 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2241 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2242 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2252 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2253 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2257 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2258 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2265 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2266 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2270 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2271 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2278 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2279 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2283 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2284 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2291 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2292 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2296 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2297 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2304 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2305 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2309 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2310 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2317 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2318 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2322 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2323 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2330 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2331 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2335 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2336 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2343 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2344 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2348 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2349 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2354 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2355 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2359 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2360 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2366 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2367 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2371 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2372 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2379 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2380 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2384 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2385 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2392 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2393 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2397 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2398 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2405 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2406 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2410 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2411 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2418 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2419 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2423 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2424 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2431 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2432 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2436 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2437 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2444 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2445 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2449 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2450 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2457 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2458 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2462 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2463 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2470 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2471 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2475 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2476 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2483 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2484 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2488 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2489 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2496 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2497 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2501 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2502 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2509 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2510 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2514 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2515 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2522 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2523 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2527 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2528 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2535 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2536 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2540 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2541 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2548 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2549 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2553 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2554 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2563 /* Return a string that documents the current -m options. The caller is
2564 responsible for freeing the string. */
2567 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2568 const char *fpmath, bool add_nl_p)
2570 struct ix86_target_opts
2572 const char *option; /* option string */
2573 int mask; /* isa mask options */
2576 /* This table is ordered so that options like -msse4.2 that imply
2577 preceding options while match those first. */
2578 static struct ix86_target_opts isa_opts[] =
2580 { "-m64", OPTION_MASK_ISA_64BIT },
2581 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2582 { "-mfma", OPTION_MASK_ISA_FMA },
2583 { "-mxop", OPTION_MASK_ISA_XOP },
2584 { "-mlwp", OPTION_MASK_ISA_LWP },
2585 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2586 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2587 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2588 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2589 { "-msse3", OPTION_MASK_ISA_SSE3 },
2590 { "-msse2", OPTION_MASK_ISA_SSE2 },
2591 { "-msse", OPTION_MASK_ISA_SSE },
2592 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2593 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2594 { "-mmmx", OPTION_MASK_ISA_MMX },
2595 { "-mabm", OPTION_MASK_ISA_ABM },
2596 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2597 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2598 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2599 { "-maes", OPTION_MASK_ISA_AES },
2600 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2601 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2602 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2603 { "-mf16c", OPTION_MASK_ISA_F16C },
2607 static struct ix86_target_opts flag_opts[] =
2609 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2610 { "-m80387", MASK_80387 },
2611 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2612 { "-malign-double", MASK_ALIGN_DOUBLE },
2613 { "-mcld", MASK_CLD },
2614 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2615 { "-mieee-fp", MASK_IEEE_FP },
2616 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2617 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2618 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2619 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2620 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2621 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2622 { "-mno-red-zone", MASK_NO_RED_ZONE },
2623 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2624 { "-mrecip", MASK_RECIP },
2625 { "-mrtd", MASK_RTD },
2626 { "-msseregparm", MASK_SSEREGPARM },
2627 { "-mstack-arg-probe", MASK_STACK_PROBE },
2628 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2631 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2634 char target_other[40];
2643 memset (opts, '\0', sizeof (opts));
2645 /* Add -march= option. */
2648 opts[num][0] = "-march=";
2649 opts[num++][1] = arch;
2652 /* Add -mtune= option. */
2655 opts[num][0] = "-mtune=";
2656 opts[num++][1] = tune;
2659 /* Pick out the options in isa options. */
2660 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2662 if ((isa & isa_opts[i].mask) != 0)
2664 opts[num++][0] = isa_opts[i].option;
2665 isa &= ~ isa_opts[i].mask;
2669 if (isa && add_nl_p)
2671 opts[num++][0] = isa_other;
2672 sprintf (isa_other, "(other isa: %#x)", isa);
2675 /* Add flag options. */
2676 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2678 if ((flags & flag_opts[i].mask) != 0)
2680 opts[num++][0] = flag_opts[i].option;
2681 flags &= ~ flag_opts[i].mask;
2685 if (flags && add_nl_p)
2687 opts[num++][0] = target_other;
2688 sprintf (target_other, "(other flags: %#x)", flags);
2691 /* Add -fpmath= option. */
2694 opts[num][0] = "-mfpmath=";
2695 opts[num++][1] = fpmath;
2702 gcc_assert (num < ARRAY_SIZE (opts));
2704 /* Size the string. */
2706 sep_len = (add_nl_p) ? 3 : 1;
2707 for (i = 0; i < num; i++)
2710 for (j = 0; j < 2; j++)
2712 len += strlen (opts[i][j]);
2715 /* Build the string. */
2716 ret = ptr = (char *) xmalloc (len);
2719 for (i = 0; i < num; i++)
2723 for (j = 0; j < 2; j++)
2724 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2731 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2739 for (j = 0; j < 2; j++)
2742 memcpy (ptr, opts[i][j], len2[j]);
2744 line_len += len2[j];
2749 gcc_assert (ret + len >= ptr);
2754 /* Return TRUE if software prefetching is beneficial for the
2758 software_prefetching_beneficial_p (void)
2762 case PROCESSOR_GEODE:
2764 case PROCESSOR_ATHLON:
2766 case PROCESSOR_AMDFAM10:
2774 /* Return true, if profiling code should be emitted before
2775 prologue. Otherwise it returns false.
2776 Note: For x86 with "hotfix" it is sorried. */
2778 ix86_profile_before_prologue (void)
2780 return flag_fentry != 0;
2783 /* Function that is callable from the debugger to print the current
2786 ix86_debug_options (void)
2788 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2789 ix86_arch_string, ix86_tune_string,
2790 ix86_fpmath_string, true);
2794 fprintf (stderr, "%s\n\n", opts);
2798 fputs ("<no options>\n\n", stderr);
2803 /* Sometimes certain combinations of command options do not make
2804 sense on a particular target machine. You can define a macro
2805 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2806 defined, is executed once just after all the command options have
2809 Don't use this macro to turn on various extra optimizations for
2810 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2813 override_options (bool main_args_p)
2816 unsigned int ix86_arch_mask, ix86_tune_mask;
2817 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2822 /* Comes from final.c -- no real reason to change it. */
2823 #define MAX_CODE_ALIGN 16
2831 PTA_PREFETCH_SSE = 1 << 4,
2833 PTA_3DNOW_A = 1 << 6,
2837 PTA_POPCNT = 1 << 10,
2839 PTA_SSE4A = 1 << 12,
2840 PTA_NO_SAHF = 1 << 13,
2841 PTA_SSE4_1 = 1 << 14,
2842 PTA_SSE4_2 = 1 << 15,
2844 PTA_PCLMUL = 1 << 17,
2847 PTA_MOVBE = 1 << 20,
2851 PTA_FSGSBASE = 1 << 24,
2852 PTA_RDRND = 1 << 25,
2858 const char *const name; /* processor name or nickname. */
2859 const enum processor_type processor;
2860 const enum attr_cpu schedule;
2861 const unsigned /*enum pta_flags*/ flags;
2863 const processor_alias_table[] =
2865 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2866 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2867 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2868 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2869 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2870 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2871 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2872 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2873 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2874 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2875 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2876 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2877 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2879 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2881 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2882 PTA_MMX | PTA_SSE | PTA_SSE2},
2883 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2884 PTA_MMX |PTA_SSE | PTA_SSE2},
2885 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2886 PTA_MMX | PTA_SSE | PTA_SSE2},
2887 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2888 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2889 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2890 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2891 | PTA_CX16 | PTA_NO_SAHF},
2892 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2893 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2894 | PTA_SSSE3 | PTA_CX16},
2895 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2896 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2897 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2898 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2899 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2900 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2901 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2902 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2903 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2904 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2905 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2906 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2907 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2908 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2909 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2910 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2911 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2912 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2913 {"x86-64", PROCESSOR_K8, CPU_K8,
2914 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2915 {"k8", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2917 | PTA_SSE2 | PTA_NO_SAHF},
2918 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2919 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2920 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2921 {"opteron", PROCESSOR_K8, CPU_K8,
2922 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2923 | PTA_SSE2 | PTA_NO_SAHF},
2924 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2925 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2926 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2927 {"athlon64", PROCESSOR_K8, CPU_K8,
2928 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2929 | PTA_SSE2 | PTA_NO_SAHF},
2930 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2931 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2932 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2933 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2934 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2935 | PTA_SSE2 | PTA_NO_SAHF},
2936 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2937 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2938 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2939 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2940 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2941 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2942 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2943 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2944 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2945 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2946 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2947 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2948 0 /* flags are only used for -march switch. */ },
2949 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2950 PTA_64BIT /* flags are only used for -march switch. */ },
2953 int const pta_size = ARRAY_SIZE (processor_alias_table);
2955 /* Set up prefix/suffix so the error messages refer to either the command
2956 line argument, or the attribute(target). */
2965 prefix = "option(\"";
2970 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2971 SUBTARGET_OVERRIDE_OPTIONS;
2974 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2975 SUBSUBTARGET_OVERRIDE_OPTIONS;
2978 /* -fPIC is the default for x86_64. */
2979 if (TARGET_MACHO && TARGET_64BIT)
2982 /* Need to check -mtune=generic first. */
2983 if (ix86_tune_string)
2985 if (!strcmp (ix86_tune_string, "generic")
2986 || !strcmp (ix86_tune_string, "i686")
2987 /* As special support for cross compilers we read -mtune=native
2988 as -mtune=generic. With native compilers we won't see the
2989 -mtune=native, as it was changed by the driver. */
2990 || !strcmp (ix86_tune_string, "native"))
2993 ix86_tune_string = "generic64";
2995 ix86_tune_string = "generic32";
2997 /* If this call is for setting the option attribute, allow the
2998 generic32/generic64 that was previously set. */
2999 else if (!main_args_p
3000 && (!strcmp (ix86_tune_string, "generic32")
3001 || !strcmp (ix86_tune_string, "generic64")))
3003 else if (!strncmp (ix86_tune_string, "generic", 7))
3004 error ("bad value (%s) for %stune=%s %s",
3005 ix86_tune_string, prefix, suffix, sw);
3006 else if (!strcmp (ix86_tune_string, "x86-64"))
3007 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3008 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3009 prefix, suffix, prefix, suffix, prefix, suffix);
3013 if (ix86_arch_string)
3014 ix86_tune_string = ix86_arch_string;
3015 if (!ix86_tune_string)
3017 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3018 ix86_tune_defaulted = 1;
3021 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3022 need to use a sensible tune option. */
3023 if (!strcmp (ix86_tune_string, "generic")
3024 || !strcmp (ix86_tune_string, "x86-64")
3025 || !strcmp (ix86_tune_string, "i686"))
3028 ix86_tune_string = "generic64";
3030 ix86_tune_string = "generic32";
3034 if (ix86_stringop_string)
3036 if (!strcmp (ix86_stringop_string, "rep_byte"))
3037 stringop_alg = rep_prefix_1_byte;
3038 else if (!strcmp (ix86_stringop_string, "libcall"))
3039 stringop_alg = libcall;
3040 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3041 stringop_alg = rep_prefix_4_byte;
3042 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3044 /* rep; movq isn't available in 32-bit code. */
3045 stringop_alg = rep_prefix_8_byte;
3046 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3047 stringop_alg = loop_1_byte;
3048 else if (!strcmp (ix86_stringop_string, "loop"))
3049 stringop_alg = loop;
3050 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3051 stringop_alg = unrolled_loop;
3053 error ("bad value (%s) for %sstringop-strategy=%s %s",
3054 ix86_stringop_string, prefix, suffix, sw);
3057 if (!ix86_arch_string)
3058 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3060 ix86_arch_specified = 1;
3062 /* Validate -mabi= value. */
3063 if (ix86_abi_string)
3065 if (strcmp (ix86_abi_string, "sysv") == 0)
3066 ix86_abi = SYSV_ABI;
3067 else if (strcmp (ix86_abi_string, "ms") == 0)
3070 error ("unknown ABI (%s) for %sabi=%s %s",
3071 ix86_abi_string, prefix, suffix, sw);
3074 ix86_abi = DEFAULT_ABI;
3076 if (ix86_cmodel_string != 0)
3078 if (!strcmp (ix86_cmodel_string, "small"))
3079 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3080 else if (!strcmp (ix86_cmodel_string, "medium"))
3081 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3082 else if (!strcmp (ix86_cmodel_string, "large"))
3083 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3085 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3086 else if (!strcmp (ix86_cmodel_string, "32"))
3087 ix86_cmodel = CM_32;
3088 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3089 ix86_cmodel = CM_KERNEL;
3091 error ("bad value (%s) for %scmodel=%s %s",
3092 ix86_cmodel_string, prefix, suffix, sw);
3096 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3097 use of rip-relative addressing. This eliminates fixups that
3098 would otherwise be needed if this object is to be placed in a
3099 DLL, and is essentially just as efficient as direct addressing. */
3100 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3101 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3102 else if (TARGET_64BIT)
3103 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3105 ix86_cmodel = CM_32;
3107 if (ix86_asm_string != 0)
3110 && !strcmp (ix86_asm_string, "intel"))
3111 ix86_asm_dialect = ASM_INTEL;
3112 else if (!strcmp (ix86_asm_string, "att"))
3113 ix86_asm_dialect = ASM_ATT;
3115 error ("bad value (%s) for %sasm=%s %s",
3116 ix86_asm_string, prefix, suffix, sw);
3118 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3119 error ("code model %qs not supported in the %s bit mode",
3120 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3121 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3122 sorry ("%i-bit mode not compiled in",
3123 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3125 for (i = 0; i < pta_size; i++)
3126 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3128 ix86_schedule = processor_alias_table[i].schedule;
3129 ix86_arch = processor_alias_table[i].processor;
3130 /* Default cpu tuning to the architecture. */
3131 ix86_tune = ix86_arch;
3133 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3134 error ("CPU you selected does not support x86-64 "
3137 if (processor_alias_table[i].flags & PTA_MMX
3138 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3139 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3140 if (processor_alias_table[i].flags & PTA_3DNOW
3141 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3142 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3143 if (processor_alias_table[i].flags & PTA_3DNOW_A
3144 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3145 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3146 if (processor_alias_table[i].flags & PTA_SSE
3147 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3148 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3149 if (processor_alias_table[i].flags & PTA_SSE2
3150 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3151 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3152 if (processor_alias_table[i].flags & PTA_SSE3
3153 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3154 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3155 if (processor_alias_table[i].flags & PTA_SSSE3
3156 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3157 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3158 if (processor_alias_table[i].flags & PTA_SSE4_1
3159 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3160 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3161 if (processor_alias_table[i].flags & PTA_SSE4_2
3162 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3163 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3164 if (processor_alias_table[i].flags & PTA_AVX
3165 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3166 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3167 if (processor_alias_table[i].flags & PTA_FMA
3168 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3169 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3170 if (processor_alias_table[i].flags & PTA_SSE4A
3171 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3172 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3173 if (processor_alias_table[i].flags & PTA_FMA4
3174 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3175 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3176 if (processor_alias_table[i].flags & PTA_XOP
3177 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3178 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3179 if (processor_alias_table[i].flags & PTA_LWP
3180 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3181 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3182 if (processor_alias_table[i].flags & PTA_ABM
3183 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3184 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3185 if (processor_alias_table[i].flags & PTA_CX16
3186 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3187 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3188 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3189 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3190 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3191 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3192 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3193 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3194 if (processor_alias_table[i].flags & PTA_MOVBE
3195 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3196 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3197 if (processor_alias_table[i].flags & PTA_AES
3198 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3199 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3200 if (processor_alias_table[i].flags & PTA_PCLMUL
3201 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3202 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3203 if (processor_alias_table[i].flags & PTA_FSGSBASE
3204 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3205 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3206 if (processor_alias_table[i].flags & PTA_RDRND
3207 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3208 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3209 if (processor_alias_table[i].flags & PTA_F16C
3210 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3211 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3212 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3213 x86_prefetch_sse = true;
3218 if (!strcmp (ix86_arch_string, "generic"))
3219 error ("generic CPU can be used only for %stune=%s %s",
3220 prefix, suffix, sw);
3221 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3222 error ("bad value (%s) for %sarch=%s %s",
3223 ix86_arch_string, prefix, suffix, sw);
3225 ix86_arch_mask = 1u << ix86_arch;
3226 for (i = 0; i < X86_ARCH_LAST; ++i)
3227 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3229 for (i = 0; i < pta_size; i++)
3230 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3232 ix86_schedule = processor_alias_table[i].schedule;
3233 ix86_tune = processor_alias_table[i].processor;
3234 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3236 if (ix86_tune_defaulted)
3238 ix86_tune_string = "x86-64";
3239 for (i = 0; i < pta_size; i++)
3240 if (! strcmp (ix86_tune_string,
3241 processor_alias_table[i].name))
3243 ix86_schedule = processor_alias_table[i].schedule;
3244 ix86_tune = processor_alias_table[i].processor;
3247 error ("CPU you selected does not support x86-64 "
3250 /* Intel CPUs have always interpreted SSE prefetch instructions as
3251 NOPs; so, we can enable SSE prefetch instructions even when
3252 -mtune (rather than -march) points us to a processor that has them.
3253 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3254 higher processors. */
3256 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3257 x86_prefetch_sse = true;
3261 if (ix86_tune_specified && i == pta_size)
3262 error ("bad value (%s) for %stune=%s %s",
3263 ix86_tune_string, prefix, suffix, sw);
3265 ix86_tune_mask = 1u << ix86_tune;
3266 for (i = 0; i < X86_TUNE_LAST; ++i)
3267 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3269 #ifndef USE_IX86_FRAME_POINTER
3270 #define USE_IX86_FRAME_POINTER 0
3273 /* Set the default values for switches whose default depends on TARGET_64BIT
3274 in case they weren't overwritten by command line options. */
3279 /* Mach-O doesn't support omitting the frame pointer for now. */
3280 if (flag_omit_frame_pointer == 2)
3281 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
3282 if (flag_asynchronous_unwind_tables == 2)
3283 flag_asynchronous_unwind_tables = 1;
3284 if (flag_pcc_struct_return == 2)
3285 flag_pcc_struct_return = 0;
3291 /* Mach-O doesn't support omitting the frame pointer for now. */
3292 if (flag_omit_frame_pointer == 2)
3293 flag_omit_frame_pointer =
3294 (TARGET_MACHO ? 0 : !(USE_IX86_FRAME_POINTER || optimize_size));
3295 if (flag_asynchronous_unwind_tables == 2)
3296 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3297 if (flag_pcc_struct_return == 2)
3298 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3302 ix86_cost = &ix86_size_cost;
3304 ix86_cost = processor_target_table[ix86_tune].cost;
3306 /* Arrange to set up i386_stack_locals for all functions. */
3307 init_machine_status = ix86_init_machine_status;
3309 /* Validate -mregparm= value. */
3310 if (ix86_regparm_string)
3313 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3314 i = atoi (ix86_regparm_string);
3315 if (i < 0 || i > REGPARM_MAX)
3316 error ("%sregparm=%d%s is not between 0 and %d",
3317 prefix, i, suffix, REGPARM_MAX);
3322 ix86_regparm = REGPARM_MAX;
3324 /* If the user has provided any of the -malign-* options,
3325 warn and use that value only if -falign-* is not set.
3326 Remove this code in GCC 3.2 or later. */
3327 if (ix86_align_loops_string)
3329 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3330 prefix, suffix, suffix);
3331 if (align_loops == 0)
3333 i = atoi (ix86_align_loops_string);
3334 if (i < 0 || i > MAX_CODE_ALIGN)
3335 error ("%salign-loops=%d%s is not between 0 and %d",
3336 prefix, i, suffix, MAX_CODE_ALIGN);
3338 align_loops = 1 << i;
3342 if (ix86_align_jumps_string)
3344 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3345 prefix, suffix, suffix);
3346 if (align_jumps == 0)
3348 i = atoi (ix86_align_jumps_string);
3349 if (i < 0 || i > MAX_CODE_ALIGN)
3350 error ("%salign-loops=%d%s is not between 0 and %d",
3351 prefix, i, suffix, MAX_CODE_ALIGN);
3353 align_jumps = 1 << i;
3357 if (ix86_align_funcs_string)
3359 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3360 prefix, suffix, suffix);
3361 if (align_functions == 0)
3363 i = atoi (ix86_align_funcs_string);
3364 if (i < 0 || i > MAX_CODE_ALIGN)
3365 error ("%salign-loops=%d%s is not between 0 and %d",
3366 prefix, i, suffix, MAX_CODE_ALIGN);
3368 align_functions = 1 << i;
3372 /* Default align_* from the processor table. */
3373 if (align_loops == 0)
3375 align_loops = processor_target_table[ix86_tune].align_loop;
3376 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3378 if (align_jumps == 0)
3380 align_jumps = processor_target_table[ix86_tune].align_jump;
3381 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3383 if (align_functions == 0)
3385 align_functions = processor_target_table[ix86_tune].align_func;
3388 /* Validate -mbranch-cost= value, or provide default. */
3389 ix86_branch_cost = ix86_cost->branch_cost;
3390 if (ix86_branch_cost_string)
3392 i = atoi (ix86_branch_cost_string);
3394 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3396 ix86_branch_cost = i;
3398 if (ix86_section_threshold_string)
3400 i = atoi (ix86_section_threshold_string);
3402 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3404 ix86_section_threshold = i;
3407 if (ix86_tls_dialect_string)
3409 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3410 ix86_tls_dialect = TLS_DIALECT_GNU;
3411 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3412 ix86_tls_dialect = TLS_DIALECT_GNU2;
3414 error ("bad value (%s) for %stls-dialect=%s %s",
3415 ix86_tls_dialect_string, prefix, suffix, sw);
3418 if (ix87_precision_string)
3420 i = atoi (ix87_precision_string);
3421 if (i != 32 && i != 64 && i != 80)
3422 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3427 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3429 /* Enable by default the SSE and MMX builtins. Do allow the user to
3430 explicitly disable any of these. In particular, disabling SSE and
3431 MMX for kernel code is extremely useful. */
3432 if (!ix86_arch_specified)
3434 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3435 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3438 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3442 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3444 if (!ix86_arch_specified)
3446 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3448 /* i386 ABI does not specify red zone. It still makes sense to use it
3449 when programmer takes care to stack from being destroyed. */
3450 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3451 target_flags |= MASK_NO_RED_ZONE;
3454 /* Keep nonleaf frame pointers. */
3455 if (flag_omit_frame_pointer)
3456 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3457 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3458 flag_omit_frame_pointer = 1;
3460 /* If we're doing fast math, we don't care about comparison order
3461 wrt NaNs. This lets us use a shorter comparison sequence. */
3462 if (flag_finite_math_only)
3463 target_flags &= ~MASK_IEEE_FP;
3465 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3466 since the insns won't need emulation. */
3467 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3468 target_flags &= ~MASK_NO_FANCY_MATH_387;
3470 /* Likewise, if the target doesn't have a 387, or we've specified
3471 software floating point, don't use 387 inline intrinsics. */
3473 target_flags |= MASK_NO_FANCY_MATH_387;
3475 /* Turn on MMX builtins for -msse. */
3478 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3479 x86_prefetch_sse = true;
3482 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3483 if (TARGET_SSE4_2 || TARGET_ABM)
3484 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3486 /* Validate -mpreferred-stack-boundary= value or default it to
3487 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3488 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3489 if (ix86_preferred_stack_boundary_string)
3491 i = atoi (ix86_preferred_stack_boundary_string);
3492 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3493 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3494 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3496 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3499 /* Set the default value for -mstackrealign. */
3500 if (ix86_force_align_arg_pointer == -1)
3501 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3503 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3505 /* Validate -mincoming-stack-boundary= value or default it to
3506 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3507 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3508 if (ix86_incoming_stack_boundary_string)
3510 i = atoi (ix86_incoming_stack_boundary_string);
3511 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3512 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3513 i, TARGET_64BIT ? 4 : 2);
3516 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3517 ix86_incoming_stack_boundary
3518 = ix86_user_incoming_stack_boundary;
3522 /* Accept -msseregparm only if at least SSE support is enabled. */
3523 if (TARGET_SSEREGPARM
3525 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3527 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3528 if (ix86_fpmath_string != 0)
3530 if (! strcmp (ix86_fpmath_string, "387"))
3531 ix86_fpmath = FPMATH_387;
3532 else if (! strcmp (ix86_fpmath_string, "sse"))
3536 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3537 ix86_fpmath = FPMATH_387;
3540 ix86_fpmath = FPMATH_SSE;
3542 else if (! strcmp (ix86_fpmath_string, "387,sse")
3543 || ! strcmp (ix86_fpmath_string, "387+sse")
3544 || ! strcmp (ix86_fpmath_string, "sse,387")
3545 || ! strcmp (ix86_fpmath_string, "sse+387")
3546 || ! strcmp (ix86_fpmath_string, "both"))
3550 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3551 ix86_fpmath = FPMATH_387;
3553 else if (!TARGET_80387)
3555 warning (0, "387 instruction set disabled, using SSE arithmetics");
3556 ix86_fpmath = FPMATH_SSE;
3559 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3562 error ("bad value (%s) for %sfpmath=%s %s",
3563 ix86_fpmath_string, prefix, suffix, sw);
3566 /* If the i387 is disabled, then do not return values in it. */
3568 target_flags &= ~MASK_FLOAT_RETURNS;
3570 /* Use external vectorized library in vectorizing intrinsics. */
3571 if (ix86_veclibabi_string)
3573 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3574 ix86_veclib_handler = ix86_veclibabi_svml;
3575 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3576 ix86_veclib_handler = ix86_veclibabi_acml;
3578 error ("unknown vectorization library ABI type (%s) for "
3579 "%sveclibabi=%s %s", ix86_veclibabi_string,
3580 prefix, suffix, sw);
3583 if ((!USE_IX86_FRAME_POINTER
3584 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3585 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3587 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3589 /* ??? Unwind info is not correct around the CFG unless either a frame
3590 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3591 unwind info generation to be aware of the CFG and propagating states
3593 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3594 || flag_exceptions || flag_non_call_exceptions)
3595 && flag_omit_frame_pointer
3596 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3598 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3599 warning (0, "unwind tables currently require either a frame pointer "
3600 "or %saccumulate-outgoing-args%s for correctness",
3602 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3605 /* If stack probes are required, the space used for large function
3606 arguments on the stack must also be probed, so enable
3607 -maccumulate-outgoing-args so this happens in the prologue. */
3608 if (TARGET_STACK_PROBE
3609 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3611 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3612 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3613 "for correctness", prefix, suffix);
3614 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3617 /* For sane SSE instruction set generation we need fcomi instruction.
3618 It is safe to enable all CMOVE instructions. */
3622 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3625 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3626 p = strchr (internal_label_prefix, 'X');
3627 internal_label_prefix_len = p - internal_label_prefix;
3631 /* When scheduling description is not available, disable scheduler pass
3632 so it won't slow down the compilation and make x87 code slower. */
3633 if (!TARGET_SCHEDULE)
3634 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3636 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3637 set_param_value ("simultaneous-prefetches",
3638 ix86_cost->simultaneous_prefetches);
3639 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3640 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3641 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3642 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3643 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3644 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3646 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3647 if (flag_prefetch_loop_arrays < 0
3650 && software_prefetching_beneficial_p ())
3651 flag_prefetch_loop_arrays = 1;
3653 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3654 can be optimized to ap = __builtin_next_arg (0). */
3656 targetm.expand_builtin_va_start = NULL;
3660 ix86_gen_leave = gen_leave_rex64;
3661 ix86_gen_pop1 = gen_popdi1;
3662 ix86_gen_add3 = gen_adddi3;
3663 ix86_gen_sub3 = gen_subdi3;
3664 ix86_gen_sub3_carry = gen_subdi3_carry;
3665 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3666 ix86_gen_monitor = gen_sse3_monitor64;
3667 ix86_gen_andsp = gen_anddi3;
3668 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
3669 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3670 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3674 ix86_gen_leave = gen_leave;
3675 ix86_gen_pop1 = gen_popsi1;
3676 ix86_gen_add3 = gen_addsi3;
3677 ix86_gen_sub3 = gen_subsi3;
3678 ix86_gen_sub3_carry = gen_subsi3_carry;
3679 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3680 ix86_gen_monitor = gen_sse3_monitor;
3681 ix86_gen_andsp = gen_andsi3;
3682 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
3683 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3684 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3688 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3690 target_flags |= MASK_CLD & ~target_flags_explicit;
3693 if (!TARGET_64BIT && flag_pic)
3695 if (flag_fentry > 0)
3696 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3699 if (flag_fentry < 0)
3701 #if defined(PROFILE_BEFORE_PROLOGUE)
3708 /* Save the initial options in case the user does function specific options */
3710 target_option_default_node = target_option_current_node
3711 = build_target_option_node ();
3714 /* Update register usage after having seen the compiler flags. */
3717 ix86_conditional_register_usage (void)
3722 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3724 if (fixed_regs[i] > 1)
3725 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3726 if (call_used_regs[i] > 1)
3727 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3730 /* The PIC register, if it exists, is fixed. */
3731 j = PIC_OFFSET_TABLE_REGNUM;
3732 if (j != INVALID_REGNUM)
3733 fixed_regs[j] = call_used_regs[j] = 1;
3735 /* The MS_ABI changes the set of call-used registers. */
3736 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3738 call_used_regs[SI_REG] = 0;
3739 call_used_regs[DI_REG] = 0;
3740 call_used_regs[XMM6_REG] = 0;
3741 call_used_regs[XMM7_REG] = 0;
3742 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3743 call_used_regs[i] = 0;
3746 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3747 other call-clobbered regs for 64-bit. */
3750 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3752 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3753 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3754 && call_used_regs[i])
3755 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3758 /* If MMX is disabled, squash the registers. */
3760 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3761 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3762 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3764 /* If SSE is disabled, squash the registers. */
3766 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3767 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3768 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3770 /* If the FPU is disabled, squash the registers. */
3771 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3772 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3773 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3774 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3776 /* If 32-bit, squash the 64-bit registers. */
3779 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3781 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3787 /* Save the current options */
3790 ix86_function_specific_save (struct cl_target_option *ptr)
3792 ptr->arch = ix86_arch;
3793 ptr->schedule = ix86_schedule;
3794 ptr->tune = ix86_tune;
3795 ptr->fpmath = ix86_fpmath;
3796 ptr->branch_cost = ix86_branch_cost;
3797 ptr->tune_defaulted = ix86_tune_defaulted;
3798 ptr->arch_specified = ix86_arch_specified;
3799 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3800 ptr->target_flags_explicit = target_flags_explicit;
3802 /* The fields are char but the variables are not; make sure the
3803 values fit in the fields. */
3804 gcc_assert (ptr->arch == ix86_arch);
3805 gcc_assert (ptr->schedule == ix86_schedule);
3806 gcc_assert (ptr->tune == ix86_tune);
3807 gcc_assert (ptr->fpmath == ix86_fpmath);
3808 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3811 /* Restore the current options */
3814 ix86_function_specific_restore (struct cl_target_option *ptr)
3816 enum processor_type old_tune = ix86_tune;
3817 enum processor_type old_arch = ix86_arch;
3818 unsigned int ix86_arch_mask, ix86_tune_mask;
3821 ix86_arch = (enum processor_type) ptr->arch;
3822 ix86_schedule = (enum attr_cpu) ptr->schedule;
3823 ix86_tune = (enum processor_type) ptr->tune;
3824 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3825 ix86_branch_cost = ptr->branch_cost;
3826 ix86_tune_defaulted = ptr->tune_defaulted;
3827 ix86_arch_specified = ptr->arch_specified;
3828 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3829 target_flags_explicit = ptr->target_flags_explicit;
3831 /* Recreate the arch feature tests if the arch changed */
3832 if (old_arch != ix86_arch)
3834 ix86_arch_mask = 1u << ix86_arch;
3835 for (i = 0; i < X86_ARCH_LAST; ++i)
3836 ix86_arch_features[i]
3837 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3840 /* Recreate the tune optimization tests */
3841 if (old_tune != ix86_tune)
3843 ix86_tune_mask = 1u << ix86_tune;
3844 for (i = 0; i < X86_TUNE_LAST; ++i)
3845 ix86_tune_features[i]
3846 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3850 /* Print the current options */
3853 ix86_function_specific_print (FILE *file, int indent,
3854 struct cl_target_option *ptr)
3857 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3858 NULL, NULL, NULL, false);
3860 fprintf (file, "%*sarch = %d (%s)\n",
3863 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3864 ? cpu_names[ptr->arch]
3867 fprintf (file, "%*stune = %d (%s)\n",
3870 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3871 ? cpu_names[ptr->tune]
3874 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3875 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3876 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3877 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3881 fprintf (file, "%*s%s\n", indent, "", target_string);
3882 free (target_string);
3887 /* Inner function to process the attribute((target(...))), take an argument and
3888 set the current options from the argument. If we have a list, recursively go
3892 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3897 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3898 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3899 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3900 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3915 enum ix86_opt_type type;
3920 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3921 IX86_ATTR_ISA ("abm", OPT_mabm),
3922 IX86_ATTR_ISA ("aes", OPT_maes),
3923 IX86_ATTR_ISA ("avx", OPT_mavx),
3924 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3925 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3926 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3927 IX86_ATTR_ISA ("sse", OPT_msse),
3928 IX86_ATTR_ISA ("sse2", OPT_msse2),
3929 IX86_ATTR_ISA ("sse3", OPT_msse3),
3930 IX86_ATTR_ISA ("sse4", OPT_msse4),
3931 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3932 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3933 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3934 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3935 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3936 IX86_ATTR_ISA ("xop", OPT_mxop),
3937 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3938 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3939 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3940 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3942 /* string options */
3943 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3944 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3945 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3948 IX86_ATTR_YES ("cld",
3952 IX86_ATTR_NO ("fancy-math-387",
3953 OPT_mfancy_math_387,
3954 MASK_NO_FANCY_MATH_387),
3956 IX86_ATTR_YES ("ieee-fp",
3960 IX86_ATTR_YES ("inline-all-stringops",
3961 OPT_minline_all_stringops,
3962 MASK_INLINE_ALL_STRINGOPS),
3964 IX86_ATTR_YES ("inline-stringops-dynamically",
3965 OPT_minline_stringops_dynamically,
3966 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3968 IX86_ATTR_NO ("align-stringops",
3969 OPT_mno_align_stringops,
3970 MASK_NO_ALIGN_STRINGOPS),
3972 IX86_ATTR_YES ("recip",
3978 /* If this is a list, recurse to get the options. */
3979 if (TREE_CODE (args) == TREE_LIST)
3983 for (; args; args = TREE_CHAIN (args))
3984 if (TREE_VALUE (args)
3985 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3991 else if (TREE_CODE (args) != STRING_CST)
3994 /* Handle multiple arguments separated by commas. */
3995 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3997 while (next_optstr && *next_optstr != '\0')
3999 char *p = next_optstr;
4001 char *comma = strchr (next_optstr, ',');
4002 const char *opt_string;
4003 size_t len, opt_len;
4008 enum ix86_opt_type type = ix86_opt_unknown;
4014 len = comma - next_optstr;
4015 next_optstr = comma + 1;
4023 /* Recognize no-xxx. */
4024 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4033 /* Find the option. */
4036 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4038 type = attrs[i].type;
4039 opt_len = attrs[i].len;
4040 if (ch == attrs[i].string[0]
4041 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4042 && memcmp (p, attrs[i].string, opt_len) == 0)
4045 mask = attrs[i].mask;
4046 opt_string = attrs[i].string;
4051 /* Process the option. */
4054 error ("attribute(target(\"%s\")) is unknown", orig_p);
4058 else if (type == ix86_opt_isa)
4059 ix86_handle_option (opt, p, opt_set_p);
4061 else if (type == ix86_opt_yes || type == ix86_opt_no)
4063 if (type == ix86_opt_no)
4064 opt_set_p = !opt_set_p;
4067 target_flags |= mask;
4069 target_flags &= ~mask;
4072 else if (type == ix86_opt_str)
4076 error ("option(\"%s\") was already specified", opt_string);
4080 p_strings[opt] = xstrdup (p + opt_len);
4090 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4093 ix86_valid_target_attribute_tree (tree args)
4095 const char *orig_arch_string = ix86_arch_string;
4096 const char *orig_tune_string = ix86_tune_string;
4097 const char *orig_fpmath_string = ix86_fpmath_string;
4098 int orig_tune_defaulted = ix86_tune_defaulted;
4099 int orig_arch_specified = ix86_arch_specified;
4100 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4103 struct cl_target_option *def
4104 = TREE_TARGET_OPTION (target_option_default_node);
4106 /* Process each of the options on the chain. */
4107 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4110 /* If the changed options are different from the default, rerun override_options,
4111 and then save the options away. The string options are are attribute options,
4112 and will be undone when we copy the save structure. */
4113 if (ix86_isa_flags != def->ix86_isa_flags
4114 || target_flags != def->target_flags
4115 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4116 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4117 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4119 /* If we are using the default tune= or arch=, undo the string assigned,
4120 and use the default. */
4121 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4122 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4123 else if (!orig_arch_specified)
4124 ix86_arch_string = NULL;
4126 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4127 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4128 else if (orig_tune_defaulted)
4129 ix86_tune_string = NULL;
4131 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4132 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4133 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4134 else if (!TARGET_64BIT && TARGET_SSE)
4135 ix86_fpmath_string = "sse,387";
4137 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4138 override_options (false);
4140 /* Add any builtin functions with the new isa if any. */
4141 ix86_add_new_builtins (ix86_isa_flags);
4143 /* Save the current options unless we are validating options for
4145 t = build_target_option_node ();
4147 ix86_arch_string = orig_arch_string;
4148 ix86_tune_string = orig_tune_string;
4149 ix86_fpmath_string = orig_fpmath_string;
4151 /* Free up memory allocated to hold the strings */
4152 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4153 if (option_strings[i])
4154 free (option_strings[i]);
4160 /* Hook to validate attribute((target("string"))). */
4163 ix86_valid_target_attribute_p (tree fndecl,
4164 tree ARG_UNUSED (name),
4166 int ARG_UNUSED (flags))
4168 struct cl_target_option cur_target;
4170 tree old_optimize = build_optimization_node ();
4171 tree new_target, new_optimize;
4172 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4174 /* If the function changed the optimization levels as well as setting target
4175 options, start with the optimizations specified. */
4176 if (func_optimize && func_optimize != old_optimize)
4177 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
4179 /* The target attributes may also change some optimization flags, so update
4180 the optimization options if necessary. */
4181 cl_target_option_save (&cur_target);
4182 new_target = ix86_valid_target_attribute_tree (args);
4183 new_optimize = build_optimization_node ();
4190 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4192 if (old_optimize != new_optimize)
4193 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4196 cl_target_option_restore (&cur_target);
4198 if (old_optimize != new_optimize)
4199 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
4205 /* Hook to determine if one function can safely inline another. */
4208 ix86_can_inline_p (tree caller, tree callee)
4211 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4212 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4214 /* If callee has no option attributes, then it is ok to inline. */
4218 /* If caller has no option attributes, but callee does then it is not ok to
4220 else if (!caller_tree)
4225 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4226 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4228 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4229 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4231 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
4232 != callee_opts->ix86_isa_flags)
4235 /* See if we have the same non-isa options. */
4236 else if (caller_opts->target_flags != callee_opts->target_flags)
4239 /* See if arch, tune, etc. are the same. */
4240 else if (caller_opts->arch != callee_opts->arch)
4243 else if (caller_opts->tune != callee_opts->tune)
4246 else if (caller_opts->fpmath != callee_opts->fpmath)
4249 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4260 /* Remember the last target of ix86_set_current_function. */
4261 static GTY(()) tree ix86_previous_fndecl;
4263 /* Establish appropriate back-end context for processing the function
4264 FNDECL. The argument might be NULL to indicate processing at top
4265 level, outside of any function scope. */
4267 ix86_set_current_function (tree fndecl)
4269 /* Only change the context if the function changes. This hook is called
4270 several times in the course of compiling a function, and we don't want to
4271 slow things down too much or call target_reinit when it isn't safe. */
4272 if (fndecl && fndecl != ix86_previous_fndecl)
4274 tree old_tree = (ix86_previous_fndecl
4275 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4278 tree new_tree = (fndecl
4279 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4282 ix86_previous_fndecl = fndecl;
4283 if (old_tree == new_tree)
4288 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4294 struct cl_target_option *def
4295 = TREE_TARGET_OPTION (target_option_current_node);
4297 cl_target_option_restore (def);
4304 /* Return true if this goes in large data/bss. */
4307 ix86_in_large_data_p (tree exp)
4309 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4312 /* Functions are never large data. */
4313 if (TREE_CODE (exp) == FUNCTION_DECL)
4316 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4318 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4319 if (strcmp (section, ".ldata") == 0
4320 || strcmp (section, ".lbss") == 0)
4326 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4328 /* If this is an incomplete type with size 0, then we can't put it
4329 in data because it might be too big when completed. */
4330 if (!size || size > ix86_section_threshold)
4337 /* Switch to the appropriate section for output of DECL.
4338 DECL is either a `VAR_DECL' node or a constant of some sort.
4339 RELOC indicates whether forming the initial value of DECL requires
4340 link-time relocations. */
4342 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4346 x86_64_elf_select_section (tree decl, int reloc,
4347 unsigned HOST_WIDE_INT align)
4349 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4350 && ix86_in_large_data_p (decl))
4352 const char *sname = NULL;
4353 unsigned int flags = SECTION_WRITE;
4354 switch (categorize_decl_for_section (decl, reloc))
4359 case SECCAT_DATA_REL:
4360 sname = ".ldata.rel";
4362 case SECCAT_DATA_REL_LOCAL:
4363 sname = ".ldata.rel.local";
4365 case SECCAT_DATA_REL_RO:
4366 sname = ".ldata.rel.ro";
4368 case SECCAT_DATA_REL_RO_LOCAL:
4369 sname = ".ldata.rel.ro.local";
4373 flags |= SECTION_BSS;
4376 case SECCAT_RODATA_MERGE_STR:
4377 case SECCAT_RODATA_MERGE_STR_INIT:
4378 case SECCAT_RODATA_MERGE_CONST:
4382 case SECCAT_SRODATA:
4389 /* We don't split these for medium model. Place them into
4390 default sections and hope for best. */
4395 /* We might get called with string constants, but get_named_section
4396 doesn't like them as they are not DECLs. Also, we need to set
4397 flags in that case. */
4399 return get_section (sname, flags, NULL);
4400 return get_named_section (decl, sname, reloc);
4403 return default_elf_select_section (decl, reloc, align);
4406 /* Build up a unique section name, expressed as a
4407 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4408 RELOC indicates whether the initial value of EXP requires
4409 link-time relocations. */
4411 static void ATTRIBUTE_UNUSED
4412 x86_64_elf_unique_section (tree decl, int reloc)
4414 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4415 && ix86_in_large_data_p (decl))
4417 const char *prefix = NULL;
4418 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4419 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4421 switch (categorize_decl_for_section (decl, reloc))
4424 case SECCAT_DATA_REL:
4425 case SECCAT_DATA_REL_LOCAL:
4426 case SECCAT_DATA_REL_RO:
4427 case SECCAT_DATA_REL_RO_LOCAL:
4428 prefix = one_only ? ".ld" : ".ldata";
4431 prefix = one_only ? ".lb" : ".lbss";
4434 case SECCAT_RODATA_MERGE_STR:
4435 case SECCAT_RODATA_MERGE_STR_INIT:
4436 case SECCAT_RODATA_MERGE_CONST:
4437 prefix = one_only ? ".lr" : ".lrodata";
4439 case SECCAT_SRODATA:
4446 /* We don't split these for medium model. Place them into
4447 default sections and hope for best. */
4452 const char *name, *linkonce;
4455 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4456 name = targetm.strip_name_encoding (name);
4458 /* If we're using one_only, then there needs to be a .gnu.linkonce
4459 prefix to the section name. */
4460 linkonce = one_only ? ".gnu.linkonce" : "";
4462 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4464 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4468 default_unique_section (decl, reloc);
4471 #ifdef COMMON_ASM_OP
4472 /* This says how to output assembler code to declare an
4473 uninitialized external linkage data object.
4475 For medium model x86-64 we need to use .largecomm opcode for
4478 x86_elf_aligned_common (FILE *file,
4479 const char *name, unsigned HOST_WIDE_INT size,
4482 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4483 && size > (unsigned int)ix86_section_threshold)
4484 fputs (".largecomm\t", file);
4486 fputs (COMMON_ASM_OP, file);
4487 assemble_name (file, name);
4488 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4489 size, align / BITS_PER_UNIT);
4493 /* Utility function for targets to use in implementing
4494 ASM_OUTPUT_ALIGNED_BSS. */
4497 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4498 const char *name, unsigned HOST_WIDE_INT size,
4501 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4502 && size > (unsigned int)ix86_section_threshold)
4503 switch_to_section (get_named_section (decl, ".lbss", 0));
4505 switch_to_section (bss_section);
4506 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4507 #ifdef ASM_DECLARE_OBJECT_NAME
4508 last_assemble_variable_decl = decl;
4509 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4511 /* Standard thing is just output label for the object. */
4512 ASM_OUTPUT_LABEL (file, name);
4513 #endif /* ASM_DECLARE_OBJECT_NAME */
4514 ASM_OUTPUT_SKIP (file, size ? size : 1);
4518 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4520 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4521 make the problem with not enough registers even worse. */
4522 #ifdef INSN_SCHEDULING
4524 flag_schedule_insns = 0;
4528 /* The Darwin libraries never set errno, so we might as well
4529 avoid calling them when that's the only reason we would. */
4530 flag_errno_math = 0;
4532 /* The default values of these switches depend on the TARGET_64BIT
4533 that is not known at this moment. Mark these values with 2 and
4534 let user the to override these. In case there is no command line option
4535 specifying them, we will set the defaults in override_options. */
4537 flag_omit_frame_pointer = 2;
4539 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4543 flag_pcc_struct_return = 2;
4544 flag_asynchronous_unwind_tables = 2;
4545 flag_vect_cost_model = 1;
4546 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4547 SUBTARGET_OPTIMIZATION_OPTIONS;
4551 /* Decide whether we must probe the stack before any space allocation
4552 on this target. It's essentially TARGET_STACK_PROBE except when
4553 -fstack-check causes the stack to be already probed differently. */
4556 ix86_target_stack_probe (void)
4558 /* Do not probe the stack twice if static stack checking is enabled. */
4559 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4562 return TARGET_STACK_PROBE;
4565 /* Decide whether we can make a sibling call to a function. DECL is the
4566 declaration of the function being targeted by the call and EXP is the
4567 CALL_EXPR representing the call. */
4570 ix86_function_ok_for_sibcall (tree decl, tree exp)
4572 tree type, decl_or_type;
4575 /* If we are generating position-independent code, we cannot sibcall
4576 optimize any indirect call, or a direct call to a global function,
4577 as the PLT requires %ebx be live. */
4578 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4581 /* If we need to align the outgoing stack, then sibcalling would
4582 unalign the stack, which may break the called function. */
4583 if (ix86_minimum_incoming_stack_boundary (true)
4584 < PREFERRED_STACK_BOUNDARY)
4589 decl_or_type = decl;
4590 type = TREE_TYPE (decl);
4594 /* We're looking at the CALL_EXPR, we need the type of the function. */
4595 type = CALL_EXPR_FN (exp); /* pointer expression */
4596 type = TREE_TYPE (type); /* pointer type */
4597 type = TREE_TYPE (type); /* function type */
4598 decl_or_type = type;
4601 /* Check that the return value locations are the same. Like
4602 if we are returning floats on the 80387 register stack, we cannot
4603 make a sibcall from a function that doesn't return a float to a
4604 function that does or, conversely, from a function that does return
4605 a float to a function that doesn't; the necessary stack adjustment
4606 would not be executed. This is also the place we notice
4607 differences in the return value ABI. Note that it is ok for one
4608 of the functions to have void return type as long as the return
4609 value of the other is passed in a register. */
4610 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4611 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4613 if (STACK_REG_P (a) || STACK_REG_P (b))
4615 if (!rtx_equal_p (a, b))
4618 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4620 else if (!rtx_equal_p (a, b))
4625 /* The SYSV ABI has more call-clobbered registers;
4626 disallow sibcalls from MS to SYSV. */
4627 if (cfun->machine->call_abi == MS_ABI
4628 && ix86_function_type_abi (type) == SYSV_ABI)
4633 /* If this call is indirect, we'll need to be able to use a
4634 call-clobbered register for the address of the target function.
4635 Make sure that all such registers are not used for passing
4636 parameters. Note that DLLIMPORT functions are indirect. */
4638 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4640 if (ix86_function_regparm (type, NULL) >= 3)
4642 /* ??? Need to count the actual number of registers to be used,
4643 not the possible number of registers. Fix later. */
4649 /* Otherwise okay. That also includes certain types of indirect calls. */
4653 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4654 and "sseregparm" calling convention attributes;
4655 arguments as in struct attribute_spec.handler. */
4658 ix86_handle_cconv_attribute (tree *node, tree name,
4660 int flags ATTRIBUTE_UNUSED,
4663 if (TREE_CODE (*node) != FUNCTION_TYPE
4664 && TREE_CODE (*node) != METHOD_TYPE
4665 && TREE_CODE (*node) != FIELD_DECL
4666 && TREE_CODE (*node) != TYPE_DECL)
4668 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4670 *no_add_attrs = true;
4674 /* Can combine regparm with all attributes but fastcall. */
4675 if (is_attribute_p ("regparm", name))
4679 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4681 error ("fastcall and regparm attributes are not compatible");
4684 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4686 error ("regparam and thiscall attributes are not compatible");
4689 cst = TREE_VALUE (args);
4690 if (TREE_CODE (cst) != INTEGER_CST)
4692 warning (OPT_Wattributes,
4693 "%qE attribute requires an integer constant argument",
4695 *no_add_attrs = true;
4697 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4699 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4701 *no_add_attrs = true;
4709 /* Do not warn when emulating the MS ABI. */
4710 if ((TREE_CODE (*node) != FUNCTION_TYPE
4711 && TREE_CODE (*node) != METHOD_TYPE)
4712 || ix86_function_type_abi (*node) != MS_ABI)
4713 warning (OPT_Wattributes, "%qE attribute ignored",
4715 *no_add_attrs = true;
4719 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4720 if (is_attribute_p ("fastcall", name))
4722 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4724 error ("fastcall and cdecl attributes are not compatible");
4726 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4728 error ("fastcall and stdcall attributes are not compatible");
4730 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4732 error ("fastcall and regparm attributes are not compatible");
4734 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4736 error ("fastcall and thiscall attributes are not compatible");
4740 /* Can combine stdcall with fastcall (redundant), regparm and
4742 else if (is_attribute_p ("stdcall", name))
4744 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4746 error ("stdcall and cdecl attributes are not compatible");
4748 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4750 error ("stdcall and fastcall attributes are not compatible");
4752 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4754 error ("stdcall and thiscall attributes are not compatible");
4758 /* Can combine cdecl with regparm and sseregparm. */
4759 else if (is_attribute_p ("cdecl", name))
4761 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4763 error ("stdcall and cdecl attributes are not compatible");
4765 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4767 error ("fastcall and cdecl attributes are not compatible");
4769 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4771 error ("cdecl and thiscall attributes are not compatible");
4774 else if (is_attribute_p ("thiscall", name))
4776 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4777 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4779 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4781 error ("stdcall and thiscall attributes are not compatible");
4783 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4785 error ("fastcall and thiscall attributes are not compatible");
4787 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4789 error ("cdecl and thiscall attributes are not compatible");
4793 /* Can combine sseregparm with all attributes. */
4798 /* Return 0 if the attributes for two types are incompatible, 1 if they
4799 are compatible, and 2 if they are nearly compatible (which causes a
4800 warning to be generated). */
4803 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4805 /* Check for mismatch of non-default calling convention. */
4806 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4808 if (TREE_CODE (type1) != FUNCTION_TYPE
4809 && TREE_CODE (type1) != METHOD_TYPE)
4812 /* Check for mismatched fastcall/regparm types. */
4813 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4814 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4815 || (ix86_function_regparm (type1, NULL)
4816 != ix86_function_regparm (type2, NULL)))
4819 /* Check for mismatched sseregparm types. */
4820 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4821 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4824 /* Check for mismatched thiscall types. */
4825 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4826 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4829 /* Check for mismatched return types (cdecl vs stdcall). */
4830 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4831 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4837 /* Return the regparm value for a function with the indicated TYPE and DECL.
4838 DECL may be NULL when calling function indirectly
4839 or considering a libcall. */
4842 ix86_function_regparm (const_tree type, const_tree decl)
4848 return (ix86_function_type_abi (type) == SYSV_ABI
4849 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4851 regparm = ix86_regparm;
4852 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4855 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4859 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4862 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4865 /* Use register calling convention for local functions when possible. */
4867 && TREE_CODE (decl) == FUNCTION_DECL
4869 && !(profile_flag && !flag_fentry))
4871 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4872 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4875 int local_regparm, globals = 0, regno;
4877 /* Make sure no regparm register is taken by a
4878 fixed register variable. */
4879 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4880 if (fixed_regs[local_regparm])
4883 /* We don't want to use regparm(3) for nested functions as
4884 these use a static chain pointer in the third argument. */
4885 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4888 /* Each fixed register usage increases register pressure,
4889 so less registers should be used for argument passing.
4890 This functionality can be overriden by an explicit
4892 for (regno = 0; regno <= DI_REG; regno++)
4893 if (fixed_regs[regno])
4897 = globals < local_regparm ? local_regparm - globals : 0;
4899 if (local_regparm > regparm)
4900 regparm = local_regparm;
4907 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4908 DFmode (2) arguments in SSE registers for a function with the
4909 indicated TYPE and DECL. DECL may be NULL when calling function
4910 indirectly or considering a libcall. Otherwise return 0. */
4913 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4915 gcc_assert (!TARGET_64BIT);
4917 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4918 by the sseregparm attribute. */
4919 if (TARGET_SSEREGPARM
4920 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4927 error ("Calling %qD with attribute sseregparm without "
4928 "SSE/SSE2 enabled", decl);
4930 error ("Calling %qT with attribute sseregparm without "
4931 "SSE/SSE2 enabled", type);
4939 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4940 (and DFmode for SSE2) arguments in SSE registers. */
4941 if (decl && TARGET_SSE_MATH && optimize
4942 && !(profile_flag && !flag_fentry))
4944 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4945 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4947 return TARGET_SSE2 ? 2 : 1;
4953 /* Return true if EAX is live at the start of the function. Used by
4954 ix86_expand_prologue to determine if we need special help before
4955 calling allocate_stack_worker. */
4958 ix86_eax_live_at_start_p (void)
4960 /* Cheat. Don't bother working forward from ix86_function_regparm
4961 to the function type to whether an actual argument is located in
4962 eax. Instead just look at cfg info, which is still close enough
4963 to correct at this point. This gives false positives for broken
4964 functions that might use uninitialized data that happens to be
4965 allocated in eax, but who cares? */
4966 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4969 /* Value is the number of bytes of arguments automatically
4970 popped when returning from a subroutine call.
4971 FUNDECL is the declaration node of the function (as a tree),
4972 FUNTYPE is the data type of the function (as a tree),
4973 or for a library call it is an identifier node for the subroutine name.
4974 SIZE is the number of bytes of arguments passed on the stack.
4976 On the 80386, the RTD insn may be used to pop them if the number
4977 of args is fixed, but if the number is variable then the caller
4978 must pop them all. RTD can't be used for library calls now
4979 because the library is compiled with the Unix compiler.
4980 Use of RTD is a selectable option, since it is incompatible with
4981 standard Unix calling sequences. If the option is not selected,
4982 the caller must always pop the args.
4984 The attribute stdcall is equivalent to RTD on a per module basis. */
4987 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4991 /* None of the 64-bit ABIs pop arguments. */
4995 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4997 /* Cdecl functions override -mrtd, and never pop the stack. */
4998 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
5000 /* Stdcall and fastcall functions will pop the stack if not
5002 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5003 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5004 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5007 if (rtd && ! stdarg_p (funtype))
5011 /* Lose any fake structure return argument if it is passed on the stack. */
5012 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5013 && !KEEP_AGGREGATE_RETURN_POINTER)
5015 int nregs = ix86_function_regparm (funtype, fundecl);
5017 return GET_MODE_SIZE (Pmode);
5023 /* Argument support functions. */
5025 /* Return true when register may be used to pass function parameters. */
5027 ix86_function_arg_regno_p (int regno)
5030 const int *parm_regs;
5035 return (regno < REGPARM_MAX
5036 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5038 return (regno < REGPARM_MAX
5039 || (TARGET_MMX && MMX_REGNO_P (regno)
5040 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5041 || (TARGET_SSE && SSE_REGNO_P (regno)
5042 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5047 if (SSE_REGNO_P (regno) && TARGET_SSE)
5052 if (TARGET_SSE && SSE_REGNO_P (regno)
5053 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5057 /* TODO: The function should depend on current function ABI but
5058 builtins.c would need updating then. Therefore we use the
5061 /* RAX is used as hidden argument to va_arg functions. */
5062 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5065 if (ix86_abi == MS_ABI)
5066 parm_regs = x86_64_ms_abi_int_parameter_registers;
5068 parm_regs = x86_64_int_parameter_registers;
5069 for (i = 0; i < (ix86_abi == MS_ABI
5070 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5071 if (regno == parm_regs[i])
5076 /* Return if we do not know how to pass TYPE solely in registers. */
5079 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5081 if (must_pass_in_stack_var_size_or_pad (mode, type))
5084 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5085 The layout_type routine is crafty and tries to trick us into passing
5086 currently unsupported vector types on the stack by using TImode. */
5087 return (!TARGET_64BIT && mode == TImode
5088 && type && TREE_CODE (type) != VECTOR_TYPE);
5091 /* It returns the size, in bytes, of the area reserved for arguments passed
5092 in registers for the function represented by fndecl dependent to the used
5095 ix86_reg_parm_stack_space (const_tree fndecl)
5097 enum calling_abi call_abi = SYSV_ABI;
5098 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5099 call_abi = ix86_function_abi (fndecl);
5101 call_abi = ix86_function_type_abi (fndecl);
5102 if (call_abi == MS_ABI)
5107 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5110 ix86_function_type_abi (const_tree fntype)
5112 if (TARGET_64BIT && fntype != NULL)
5114 enum calling_abi abi = ix86_abi;
5115 if (abi == SYSV_ABI)
5117 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5120 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5128 ix86_function_ms_hook_prologue (const_tree fn)
5130 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5132 if (decl_function_context (fn) != NULL_TREE)
5133 error_at (DECL_SOURCE_LOCATION (fn),
5134 "ms_hook_prologue is not compatible with nested function");
5141 static enum calling_abi
5142 ix86_function_abi (const_tree fndecl)
5146 return ix86_function_type_abi (TREE_TYPE (fndecl));
5149 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5152 ix86_cfun_abi (void)
5154 if (! cfun || ! TARGET_64BIT)
5156 return cfun->machine->call_abi;
5159 /* Write the extra assembler code needed to declare a function properly. */
5162 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5165 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5169 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5170 unsigned int filler_cc = 0xcccccccc;
5172 for (i = 0; i < filler_count; i += 4)
5173 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5176 ASM_OUTPUT_LABEL (asm_out_file, fname);
5178 /* Output magic byte marker, if hot-patch attribute is set. */
5183 /* leaq [%rsp + 0], %rsp */
5184 asm_fprintf (asm_out_file, ASM_BYTE
5185 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5189 /* movl.s %edi, %edi
5191 movl.s %esp, %ebp */
5192 asm_fprintf (asm_out_file, ASM_BYTE
5193 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5199 extern void init_regs (void);
5201 /* Implementation of call abi switching target hook. Specific to FNDECL
5202 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5203 for more details. */
5205 ix86_call_abi_override (const_tree fndecl)
5207 if (fndecl == NULL_TREE)
5208 cfun->machine->call_abi = ix86_abi;
5210 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5213 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5214 re-initialization of init_regs each time we switch function context since
5215 this is needed only during RTL expansion. */
5217 ix86_maybe_switch_abi (void)
5220 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5224 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5225 for a call to a function whose data type is FNTYPE.
5226 For a library call, FNTYPE is 0. */
5229 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5230 tree fntype, /* tree ptr for function decl */
5231 rtx libname, /* SYMBOL_REF of library name or 0 */
5234 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5235 memset (cum, 0, sizeof (*cum));
5238 cum->call_abi = ix86_function_abi (fndecl);
5240 cum->call_abi = ix86_function_type_abi (fntype);
5241 /* Set up the number of registers to use for passing arguments. */
5243 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5244 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5245 "or subtarget optimization implying it");
5246 cum->nregs = ix86_regparm;
5249 cum->nregs = (cum->call_abi == SYSV_ABI
5250 ? X86_64_REGPARM_MAX
5251 : X86_64_MS_REGPARM_MAX);
5255 cum->sse_nregs = SSE_REGPARM_MAX;
5258 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5259 ? X86_64_SSE_REGPARM_MAX
5260 : X86_64_MS_SSE_REGPARM_MAX);
5264 cum->mmx_nregs = MMX_REGPARM_MAX;
5265 cum->warn_avx = true;
5266 cum->warn_sse = true;
5267 cum->warn_mmx = true;
5269 /* Because type might mismatch in between caller and callee, we need to
5270 use actual type of function for local calls.
5271 FIXME: cgraph_analyze can be told to actually record if function uses
5272 va_start so for local functions maybe_vaarg can be made aggressive
5274 FIXME: once typesytem is fixed, we won't need this code anymore. */
5276 fntype = TREE_TYPE (fndecl);
5277 cum->maybe_vaarg = (fntype
5278 ? (!prototype_p (fntype) || stdarg_p (fntype))
5283 /* If there are variable arguments, then we won't pass anything
5284 in registers in 32-bit mode. */
5285 if (stdarg_p (fntype))
5296 /* Use ecx and edx registers if function has fastcall attribute,
5297 else look for regparm information. */
5300 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5303 cum->fastcall = 1; /* Same first register as in fastcall. */
5305 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5311 cum->nregs = ix86_function_regparm (fntype, fndecl);
5314 /* Set up the number of SSE registers used for passing SFmode
5315 and DFmode arguments. Warn for mismatching ABI. */
5316 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5320 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5321 But in the case of vector types, it is some vector mode.
5323 When we have only some of our vector isa extensions enabled, then there
5324 are some modes for which vector_mode_supported_p is false. For these
5325 modes, the generic vector support in gcc will choose some non-vector mode
5326 in order to implement the type. By computing the natural mode, we'll
5327 select the proper ABI location for the operand and not depend on whatever
5328 the middle-end decides to do with these vector types.
5330 The midde-end can't deal with the vector types > 16 bytes. In this
5331 case, we return the original mode and warn ABI change if CUM isn't
5334 static enum machine_mode
5335 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5337 enum machine_mode mode = TYPE_MODE (type);
5339 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5341 HOST_WIDE_INT size = int_size_in_bytes (type);
5342 if ((size == 8 || size == 16 || size == 32)
5343 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5344 && TYPE_VECTOR_SUBPARTS (type) > 1)
5346 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5348 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5349 mode = MIN_MODE_VECTOR_FLOAT;
5351 mode = MIN_MODE_VECTOR_INT;
5353 /* Get the mode which has this inner mode and number of units. */
5354 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5355 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5356 && GET_MODE_INNER (mode) == innermode)
5358 if (size == 32 && !TARGET_AVX)
5360 static bool warnedavx;
5367 warning (0, "AVX vector argument without AVX "
5368 "enabled changes the ABI");
5370 return TYPE_MODE (type);
5383 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5384 this may not agree with the mode that the type system has chosen for the
5385 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5386 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5389 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5394 if (orig_mode != BLKmode)
5395 tmp = gen_rtx_REG (orig_mode, regno);
5398 tmp = gen_rtx_REG (mode, regno);
5399 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5400 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5406 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5407 of this code is to classify each 8bytes of incoming argument by the register
5408 class and assign registers accordingly. */
5410 /* Return the union class of CLASS1 and CLASS2.
5411 See the x86-64 PS ABI for details. */
5413 static enum x86_64_reg_class
5414 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5416 /* Rule #1: If both classes are equal, this is the resulting class. */
5417 if (class1 == class2)
5420 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5422 if (class1 == X86_64_NO_CLASS)
5424 if (class2 == X86_64_NO_CLASS)
5427 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5428 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5429 return X86_64_MEMORY_CLASS;
5431 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5432 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5433 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5434 return X86_64_INTEGERSI_CLASS;
5435 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5436 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5437 return X86_64_INTEGER_CLASS;
5439 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5441 if (class1 == X86_64_X87_CLASS
5442 || class1 == X86_64_X87UP_CLASS
5443 || class1 == X86_64_COMPLEX_X87_CLASS
5444 || class2 == X86_64_X87_CLASS
5445 || class2 == X86_64_X87UP_CLASS
5446 || class2 == X86_64_COMPLEX_X87_CLASS)
5447 return X86_64_MEMORY_CLASS;
5449 /* Rule #6: Otherwise class SSE is used. */
5450 return X86_64_SSE_CLASS;
5453 /* Classify the argument of type TYPE and mode MODE.
5454 CLASSES will be filled by the register class used to pass each word
5455 of the operand. The number of words is returned. In case the parameter
5456 should be passed in memory, 0 is returned. As a special case for zero
5457 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5459 BIT_OFFSET is used internally for handling records and specifies offset
5460 of the offset in bits modulo 256 to avoid overflow cases.
5462 See the x86-64 PS ABI for details.
5466 classify_argument (enum machine_mode mode, const_tree type,
5467 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5469 HOST_WIDE_INT bytes =
5470 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5471 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5473 /* Variable sized entities are always passed/returned in memory. */
5477 if (mode != VOIDmode
5478 && targetm.calls.must_pass_in_stack (mode, type))
5481 if (type && AGGREGATE_TYPE_P (type))
5485 enum x86_64_reg_class subclasses[MAX_CLASSES];
5487 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5491 for (i = 0; i < words; i++)
5492 classes[i] = X86_64_NO_CLASS;
5494 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5495 signalize memory class, so handle it as special case. */
5498 classes[0] = X86_64_NO_CLASS;
5502 /* Classify each field of record and merge classes. */
5503 switch (TREE_CODE (type))
5506 /* And now merge the fields of structure. */
5507 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5509 if (TREE_CODE (field) == FIELD_DECL)
5513 if (TREE_TYPE (field) == error_mark_node)
5516 /* Bitfields are always classified as integer. Handle them
5517 early, since later code would consider them to be
5518 misaligned integers. */
5519 if (DECL_BIT_FIELD (field))
5521 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5522 i < ((int_bit_position (field) + (bit_offset % 64))
5523 + tree_low_cst (DECL_SIZE (field), 0)
5526 merge_classes (X86_64_INTEGER_CLASS,
5533 type = TREE_TYPE (field);
5535 /* Flexible array member is ignored. */
5536 if (TYPE_MODE (type) == BLKmode
5537 && TREE_CODE (type) == ARRAY_TYPE
5538 && TYPE_SIZE (type) == NULL_TREE
5539 && TYPE_DOMAIN (type) != NULL_TREE
5540 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5545 if (!warned && warn_psabi)
5548 inform (input_location,
5549 "The ABI of passing struct with"
5550 " a flexible array member has"
5551 " changed in GCC 4.4");
5555 num = classify_argument (TYPE_MODE (type), type,
5557 (int_bit_position (field)
5558 + bit_offset) % 256);
5561 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5562 for (i = 0; i < num && (i + pos) < words; i++)
5564 merge_classes (subclasses[i], classes[i + pos]);
5571 /* Arrays are handled as small records. */
5574 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5575 TREE_TYPE (type), subclasses, bit_offset);
5579 /* The partial classes are now full classes. */
5580 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5581 subclasses[0] = X86_64_SSE_CLASS;
5582 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5583 && !((bit_offset % 64) == 0 && bytes == 4))
5584 subclasses[0] = X86_64_INTEGER_CLASS;
5586 for (i = 0; i < words; i++)
5587 classes[i] = subclasses[i % num];
5592 case QUAL_UNION_TYPE:
5593 /* Unions are similar to RECORD_TYPE but offset is always 0.
5595 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5597 if (TREE_CODE (field) == FIELD_DECL)
5601 if (TREE_TYPE (field) == error_mark_node)
5604 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5605 TREE_TYPE (field), subclasses,
5609 for (i = 0; i < num; i++)
5610 classes[i] = merge_classes (subclasses[i], classes[i]);
5621 /* When size > 16 bytes, if the first one isn't
5622 X86_64_SSE_CLASS or any other ones aren't
5623 X86_64_SSEUP_CLASS, everything should be passed in
5625 if (classes[0] != X86_64_SSE_CLASS)
5628 for (i = 1; i < words; i++)
5629 if (classes[i] != X86_64_SSEUP_CLASS)
5633 /* Final merger cleanup. */
5634 for (i = 0; i < words; i++)
5636 /* If one class is MEMORY, everything should be passed in
5638 if (classes[i] == X86_64_MEMORY_CLASS)
5641 /* The X86_64_SSEUP_CLASS should be always preceded by
5642 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5643 if (classes[i] == X86_64_SSEUP_CLASS
5644 && classes[i - 1] != X86_64_SSE_CLASS
5645 && classes[i - 1] != X86_64_SSEUP_CLASS)
5647 /* The first one should never be X86_64_SSEUP_CLASS. */
5648 gcc_assert (i != 0);
5649 classes[i] = X86_64_SSE_CLASS;
5652 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5653 everything should be passed in memory. */
5654 if (classes[i] == X86_64_X87UP_CLASS
5655 && (classes[i - 1] != X86_64_X87_CLASS))
5659 /* The first one should never be X86_64_X87UP_CLASS. */
5660 gcc_assert (i != 0);
5661 if (!warned && warn_psabi)
5664 inform (input_location,
5665 "The ABI of passing union with long double"
5666 " has changed in GCC 4.4");
5674 /* Compute alignment needed. We align all types to natural boundaries with
5675 exception of XFmode that is aligned to 64bits. */
5676 if (mode != VOIDmode && mode != BLKmode)
5678 int mode_alignment = GET_MODE_BITSIZE (mode);
5681 mode_alignment = 128;
5682 else if (mode == XCmode)
5683 mode_alignment = 256;
5684 if (COMPLEX_MODE_P (mode))
5685 mode_alignment /= 2;
5686 /* Misaligned fields are always returned in memory. */
5687 if (bit_offset % mode_alignment)
5691 /* for V1xx modes, just use the base mode */
5692 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5693 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5694 mode = GET_MODE_INNER (mode);
5696 /* Classification of atomic types. */
5701 classes[0] = X86_64_SSE_CLASS;
5704 classes[0] = X86_64_SSE_CLASS;
5705 classes[1] = X86_64_SSEUP_CLASS;
5715 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5719 classes[0] = X86_64_INTEGERSI_CLASS;
5722 else if (size <= 64)
5724 classes[0] = X86_64_INTEGER_CLASS;
5727 else if (size <= 64+32)
5729 classes[0] = X86_64_INTEGER_CLASS;
5730 classes[1] = X86_64_INTEGERSI_CLASS;
5733 else if (size <= 64+64)
5735 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5743 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5747 /* OImode shouldn't be used directly. */
5752 if (!(bit_offset % 64))
5753 classes[0] = X86_64_SSESF_CLASS;
5755 classes[0] = X86_64_SSE_CLASS;
5758 classes[0] = X86_64_SSEDF_CLASS;
5761 classes[0] = X86_64_X87_CLASS;
5762 classes[1] = X86_64_X87UP_CLASS;
5765 classes[0] = X86_64_SSE_CLASS;
5766 classes[1] = X86_64_SSEUP_CLASS;
5769 classes[0] = X86_64_SSE_CLASS;
5770 if (!(bit_offset % 64))
5776 if (!warned && warn_psabi)
5779 inform (input_location,
5780 "The ABI of passing structure with complex float"
5781 " member has changed in GCC 4.4");
5783 classes[1] = X86_64_SSESF_CLASS;
5787 classes[0] = X86_64_SSEDF_CLASS;
5788 classes[1] = X86_64_SSEDF_CLASS;
5791 classes[0] = X86_64_COMPLEX_X87_CLASS;
5794 /* This modes is larger than 16 bytes. */
5802 classes[0] = X86_64_SSE_CLASS;
5803 classes[1] = X86_64_SSEUP_CLASS;
5804 classes[2] = X86_64_SSEUP_CLASS;
5805 classes[3] = X86_64_SSEUP_CLASS;
5813 classes[0] = X86_64_SSE_CLASS;
5814 classes[1] = X86_64_SSEUP_CLASS;
5822 classes[0] = X86_64_SSE_CLASS;
5828 gcc_assert (VECTOR_MODE_P (mode));
5833 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5835 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5836 classes[0] = X86_64_INTEGERSI_CLASS;
5838 classes[0] = X86_64_INTEGER_CLASS;
5839 classes[1] = X86_64_INTEGER_CLASS;
5840 return 1 + (bytes > 8);
5844 /* Examine the argument and return set number of register required in each
5845 class. Return 0 iff parameter should be passed in memory. */
5847 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5848 int *int_nregs, int *sse_nregs)
5850 enum x86_64_reg_class regclass[MAX_CLASSES];
5851 int n = classify_argument (mode, type, regclass, 0);
5857 for (n--; n >= 0; n--)
5858 switch (regclass[n])
5860 case X86_64_INTEGER_CLASS:
5861 case X86_64_INTEGERSI_CLASS:
5864 case X86_64_SSE_CLASS:
5865 case X86_64_SSESF_CLASS:
5866 case X86_64_SSEDF_CLASS:
5869 case X86_64_NO_CLASS:
5870 case X86_64_SSEUP_CLASS:
5872 case X86_64_X87_CLASS:
5873 case X86_64_X87UP_CLASS:
5877 case X86_64_COMPLEX_X87_CLASS:
5878 return in_return ? 2 : 0;
5879 case X86_64_MEMORY_CLASS:
5885 /* Construct container for the argument used by GCC interface. See
5886 FUNCTION_ARG for the detailed description. */
5889 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5890 const_tree type, int in_return, int nintregs, int nsseregs,
5891 const int *intreg, int sse_regno)
5893 /* The following variables hold the static issued_error state. */
5894 static bool issued_sse_arg_error;
5895 static bool issued_sse_ret_error;
5896 static bool issued_x87_ret_error;
5898 enum machine_mode tmpmode;
5900 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5901 enum x86_64_reg_class regclass[MAX_CLASSES];
5905 int needed_sseregs, needed_intregs;
5906 rtx exp[MAX_CLASSES];
5909 n = classify_argument (mode, type, regclass, 0);
5912 if (!examine_argument (mode, type, in_return, &needed_intregs,
5915 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5918 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5919 some less clueful developer tries to use floating-point anyway. */
5920 if (needed_sseregs && !TARGET_SSE)
5924 if (!issued_sse_ret_error)
5926 error ("SSE register return with SSE disabled");
5927 issued_sse_ret_error = true;
5930 else if (!issued_sse_arg_error)
5932 error ("SSE register argument with SSE disabled");
5933 issued_sse_arg_error = true;
5938 /* Likewise, error if the ABI requires us to return values in the
5939 x87 registers and the user specified -mno-80387. */
5940 if (!TARGET_80387 && in_return)
5941 for (i = 0; i < n; i++)
5942 if (regclass[i] == X86_64_X87_CLASS
5943 || regclass[i] == X86_64_X87UP_CLASS
5944 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5946 if (!issued_x87_ret_error)
5948 error ("x87 register return with x87 disabled");
5949 issued_x87_ret_error = true;
5954 /* First construct simple cases. Avoid SCmode, since we want to use
5955 single register to pass this type. */
5956 if (n == 1 && mode != SCmode)
5957 switch (regclass[0])
5959 case X86_64_INTEGER_CLASS:
5960 case X86_64_INTEGERSI_CLASS:
5961 return gen_rtx_REG (mode, intreg[0]);
5962 case X86_64_SSE_CLASS:
5963 case X86_64_SSESF_CLASS:
5964 case X86_64_SSEDF_CLASS:
5965 if (mode != BLKmode)
5966 return gen_reg_or_parallel (mode, orig_mode,
5967 SSE_REGNO (sse_regno));
5969 case X86_64_X87_CLASS:
5970 case X86_64_COMPLEX_X87_CLASS:
5971 return gen_rtx_REG (mode, FIRST_STACK_REG);
5972 case X86_64_NO_CLASS:
5973 /* Zero sized array, struct or class. */
5978 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5979 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5980 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5982 && regclass[0] == X86_64_SSE_CLASS
5983 && regclass[1] == X86_64_SSEUP_CLASS
5984 && regclass[2] == X86_64_SSEUP_CLASS
5985 && regclass[3] == X86_64_SSEUP_CLASS
5987 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5990 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5991 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5992 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5993 && regclass[1] == X86_64_INTEGER_CLASS
5994 && (mode == CDImode || mode == TImode || mode == TFmode)
5995 && intreg[0] + 1 == intreg[1])
5996 return gen_rtx_REG (mode, intreg[0]);
5998 /* Otherwise figure out the entries of the PARALLEL. */
5999 for (i = 0; i < n; i++)
6003 switch (regclass[i])
6005 case X86_64_NO_CLASS:
6007 case X86_64_INTEGER_CLASS:
6008 case X86_64_INTEGERSI_CLASS:
6009 /* Merge TImodes on aligned occasions here too. */
6010 if (i * 8 + 8 > bytes)
6011 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6012 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6016 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6017 if (tmpmode == BLKmode)
6019 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6020 gen_rtx_REG (tmpmode, *intreg),
6024 case X86_64_SSESF_CLASS:
6025 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6026 gen_rtx_REG (SFmode,
6027 SSE_REGNO (sse_regno)),
6031 case X86_64_SSEDF_CLASS:
6032 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6033 gen_rtx_REG (DFmode,
6034 SSE_REGNO (sse_regno)),
6038 case X86_64_SSE_CLASS:
6046 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6056 && regclass[1] == X86_64_SSEUP_CLASS
6057 && regclass[2] == X86_64_SSEUP_CLASS
6058 && regclass[3] == X86_64_SSEUP_CLASS);
6065 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6066 gen_rtx_REG (tmpmode,
6067 SSE_REGNO (sse_regno)),
6076 /* Empty aligned struct, union or class. */
6080 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6081 for (i = 0; i < nexps; i++)
6082 XVECEXP (ret, 0, i) = exp [i];
6086 /* Update the data in CUM to advance over an argument of mode MODE
6087 and data type TYPE. (TYPE is null for libcalls where that information
6088 may not be available.) */
6091 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6092 const_tree type, HOST_WIDE_INT bytes,
6093 HOST_WIDE_INT words)
6109 cum->words += words;
6110 cum->nregs -= words;
6111 cum->regno += words;
6113 if (cum->nregs <= 0)
6121 /* OImode shouldn't be used directly. */
6125 if (cum->float_in_sse < 2)
6128 if (cum->float_in_sse < 1)
6145 if (!type || !AGGREGATE_TYPE_P (type))
6147 cum->sse_words += words;
6148 cum->sse_nregs -= 1;
6149 cum->sse_regno += 1;
6150 if (cum->sse_nregs <= 0)
6164 if (!type || !AGGREGATE_TYPE_P (type))
6166 cum->mmx_words += words;
6167 cum->mmx_nregs -= 1;
6168 cum->mmx_regno += 1;
6169 if (cum->mmx_nregs <= 0)
6180 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6181 const_tree type, HOST_WIDE_INT words, bool named)
6183 int int_nregs, sse_nregs;
6185 /* Unnamed 256bit vector mode parameters are passed on stack. */
6186 if (!named && VALID_AVX256_REG_MODE (mode))
6189 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6190 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6192 cum->nregs -= int_nregs;
6193 cum->sse_nregs -= sse_nregs;
6194 cum->regno += int_nregs;
6195 cum->sse_regno += sse_nregs;
6199 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6200 cum->words = (cum->words + align - 1) & ~(align - 1);
6201 cum->words += words;
6206 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6207 HOST_WIDE_INT words)
6209 /* Otherwise, this should be passed indirect. */
6210 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6212 cum->words += words;
6220 /* Update the data in CUM to advance over an argument of mode MODE and
6221 data type TYPE. (TYPE is null for libcalls where that information
6222 may not be available.) */
6225 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6226 const_tree type, bool named)
6228 HOST_WIDE_INT bytes, words;
6230 if (mode == BLKmode)
6231 bytes = int_size_in_bytes (type);
6233 bytes = GET_MODE_SIZE (mode);
6234 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6237 mode = type_natural_mode (type, NULL);
6239 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6240 function_arg_advance_ms_64 (cum, bytes, words);
6241 else if (TARGET_64BIT)
6242 function_arg_advance_64 (cum, mode, type, words, named);
6244 function_arg_advance_32 (cum, mode, type, bytes, words);
6247 /* Define where to put the arguments to a function.
6248 Value is zero to push the argument on the stack,
6249 or a hard register in which to store the argument.
6251 MODE is the argument's machine mode.
6252 TYPE is the data type of the argument (as a tree).
6253 This is null for libcalls where that information may
6255 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6256 the preceding args and about the function being called.
6257 NAMED is nonzero if this argument is a named parameter
6258 (otherwise it is an extra parameter matching an ellipsis). */
6261 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6262 enum machine_mode orig_mode, const_tree type,
6263 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6265 static bool warnedsse, warnedmmx;
6267 /* Avoid the AL settings for the Unix64 ABI. */
6268 if (mode == VOIDmode)
6284 if (words <= cum->nregs)
6286 int regno = cum->regno;
6288 /* Fastcall allocates the first two DWORD (SImode) or
6289 smaller arguments to ECX and EDX if it isn't an
6295 || (type && AGGREGATE_TYPE_P (type)))
6298 /* ECX not EAX is the first allocated register. */
6299 if (regno == AX_REG)
6302 return gen_rtx_REG (mode, regno);
6307 if (cum->float_in_sse < 2)
6310 if (cum->float_in_sse < 1)
6314 /* In 32bit, we pass TImode in xmm registers. */
6321 if (!type || !AGGREGATE_TYPE_P (type))
6323 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6326 warning (0, "SSE vector argument without SSE enabled "
6330 return gen_reg_or_parallel (mode, orig_mode,
6331 cum->sse_regno + FIRST_SSE_REG);
6336 /* OImode shouldn't be used directly. */
6345 if (!type || !AGGREGATE_TYPE_P (type))
6348 return gen_reg_or_parallel (mode, orig_mode,
6349 cum->sse_regno + FIRST_SSE_REG);
6359 if (!type || !AGGREGATE_TYPE_P (type))
6361 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6364 warning (0, "MMX vector argument without MMX enabled "
6368 return gen_reg_or_parallel (mode, orig_mode,
6369 cum->mmx_regno + FIRST_MMX_REG);
6378 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6379 enum machine_mode orig_mode, const_tree type, bool named)
6381 /* Handle a hidden AL argument containing number of registers
6382 for varargs x86-64 functions. */
6383 if (mode == VOIDmode)
6384 return GEN_INT (cum->maybe_vaarg
6385 ? (cum->sse_nregs < 0
6386 ? X86_64_SSE_REGPARM_MAX
6401 /* Unnamed 256bit vector mode parameters are passed on stack. */
6407 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6409 &x86_64_int_parameter_registers [cum->regno],
6414 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6415 enum machine_mode orig_mode, bool named,
6416 HOST_WIDE_INT bytes)
6420 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6421 We use value of -2 to specify that current function call is MSABI. */
6422 if (mode == VOIDmode)
6423 return GEN_INT (-2);
6425 /* If we've run out of registers, it goes on the stack. */
6426 if (cum->nregs == 0)
6429 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6431 /* Only floating point modes are passed in anything but integer regs. */
6432 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6435 regno = cum->regno + FIRST_SSE_REG;
6440 /* Unnamed floating parameters are passed in both the
6441 SSE and integer registers. */
6442 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6443 t2 = gen_rtx_REG (mode, regno);
6444 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6445 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6446 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6449 /* Handle aggregated types passed in register. */
6450 if (orig_mode == BLKmode)
6452 if (bytes > 0 && bytes <= 8)
6453 mode = (bytes > 4 ? DImode : SImode);
6454 if (mode == BLKmode)
6458 return gen_reg_or_parallel (mode, orig_mode, regno);
6461 /* Return where to put the arguments to a function.
6462 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6464 MODE is the argument's machine mode. TYPE is the data type of the
6465 argument. It is null for libcalls where that information may not be
6466 available. CUM gives information about the preceding args and about
6467 the function being called. NAMED is nonzero if this argument is a
6468 named parameter (otherwise it is an extra parameter matching an
6472 ix86_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode omode,
6473 const_tree type, bool named)
6475 enum machine_mode mode = omode;
6476 HOST_WIDE_INT bytes, words;
6478 if (mode == BLKmode)
6479 bytes = int_size_in_bytes (type);
6481 bytes = GET_MODE_SIZE (mode);
6482 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6484 /* To simplify the code below, represent vector types with a vector mode
6485 even if MMX/SSE are not active. */
6486 if (type && TREE_CODE (type) == VECTOR_TYPE)
6487 mode = type_natural_mode (type, cum);
6489 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6490 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6491 else if (TARGET_64BIT)
6492 return function_arg_64 (cum, mode, omode, type, named);
6494 return function_arg_32 (cum, mode, omode, type, bytes, words);
6497 /* A C expression that indicates when an argument must be passed by
6498 reference. If nonzero for an argument, a copy of that argument is
6499 made in memory and a pointer to the argument is passed instead of
6500 the argument itself. The pointer is passed in whatever way is
6501 appropriate for passing a pointer to that type. */
6504 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6505 enum machine_mode mode ATTRIBUTE_UNUSED,
6506 const_tree type, bool named ATTRIBUTE_UNUSED)
6508 /* See Windows x64 Software Convention. */
6509 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6511 int msize = (int) GET_MODE_SIZE (mode);
6514 /* Arrays are passed by reference. */
6515 if (TREE_CODE (type) == ARRAY_TYPE)
6518 if (AGGREGATE_TYPE_P (type))
6520 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6521 are passed by reference. */
6522 msize = int_size_in_bytes (type);
6526 /* __m128 is passed by reference. */
6528 case 1: case 2: case 4: case 8:
6534 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6540 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6543 contains_aligned_value_p (const_tree type)
6545 enum machine_mode mode = TYPE_MODE (type);
6546 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6550 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6552 if (TYPE_ALIGN (type) < 128)
6555 if (AGGREGATE_TYPE_P (type))
6557 /* Walk the aggregates recursively. */
6558 switch (TREE_CODE (type))
6562 case QUAL_UNION_TYPE:
6566 /* Walk all the structure fields. */
6567 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6569 if (TREE_CODE (field) == FIELD_DECL
6570 && contains_aligned_value_p (TREE_TYPE (field)))
6577 /* Just for use if some languages passes arrays by value. */
6578 if (contains_aligned_value_p (TREE_TYPE (type)))
6589 /* Gives the alignment boundary, in bits, of an argument with the
6590 specified mode and type. */
6593 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6598 /* Since the main variant type is used for call, we convert it to
6599 the main variant type. */
6600 type = TYPE_MAIN_VARIANT (type);
6601 align = TYPE_ALIGN (type);
6604 align = GET_MODE_ALIGNMENT (mode);
6605 if (align < PARM_BOUNDARY)
6606 align = PARM_BOUNDARY;
6607 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6608 natural boundaries. */
6609 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6611 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6612 make an exception for SSE modes since these require 128bit
6615 The handling here differs from field_alignment. ICC aligns MMX
6616 arguments to 4 byte boundaries, while structure fields are aligned
6617 to 8 byte boundaries. */
6620 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6621 align = PARM_BOUNDARY;
6625 if (!contains_aligned_value_p (type))
6626 align = PARM_BOUNDARY;
6629 if (align > BIGGEST_ALIGNMENT)
6630 align = BIGGEST_ALIGNMENT;
6634 /* Return true if N is a possible register number of function value. */
6637 ix86_function_value_regno_p (const unsigned int regno)
6644 case FIRST_FLOAT_REG:
6645 /* TODO: The function should depend on current function ABI but
6646 builtins.c would need updating then. Therefore we use the
6648 if (TARGET_64BIT && ix86_abi == MS_ABI)
6650 return TARGET_FLOAT_RETURNS_IN_80387;
6656 if (TARGET_MACHO || TARGET_64BIT)
6664 /* Define how to find the value returned by a function.
6665 VALTYPE is the data type of the value (as a tree).
6666 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6667 otherwise, FUNC is 0. */
6670 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6671 const_tree fntype, const_tree fn)
6675 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6676 we normally prevent this case when mmx is not available. However
6677 some ABIs may require the result to be returned like DImode. */
6678 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6679 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6681 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6682 we prevent this case when sse is not available. However some ABIs
6683 may require the result to be returned like integer TImode. */
6684 else if (mode == TImode
6685 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6686 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6688 /* 32-byte vector modes in %ymm0. */
6689 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6690 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6692 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6693 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6694 regno = FIRST_FLOAT_REG;
6696 /* Most things go in %eax. */
6699 /* Override FP return register with %xmm0 for local functions when
6700 SSE math is enabled or for functions with sseregparm attribute. */
6701 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6703 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6704 if ((sse_level >= 1 && mode == SFmode)
6705 || (sse_level == 2 && mode == DFmode))
6706 regno = FIRST_SSE_REG;
6709 /* OImode shouldn't be used directly. */
6710 gcc_assert (mode != OImode);
6712 return gen_rtx_REG (orig_mode, regno);
6716 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6721 /* Handle libcalls, which don't provide a type node. */
6722 if (valtype == NULL)
6734 return gen_rtx_REG (mode, FIRST_SSE_REG);
6737 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6741 return gen_rtx_REG (mode, AX_REG);
6745 ret = construct_container (mode, orig_mode, valtype, 1,
6746 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6747 x86_64_int_return_registers, 0);
6749 /* For zero sized structures, construct_container returns NULL, but we
6750 need to keep rest of compiler happy by returning meaningful value. */
6752 ret = gen_rtx_REG (orig_mode, AX_REG);
6758 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6760 unsigned int regno = AX_REG;
6764 switch (GET_MODE_SIZE (mode))
6767 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6768 && !COMPLEX_MODE_P (mode))
6769 regno = FIRST_SSE_REG;
6773 if (mode == SFmode || mode == DFmode)
6774 regno = FIRST_SSE_REG;
6780 return gen_rtx_REG (orig_mode, regno);
6784 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6785 enum machine_mode orig_mode, enum machine_mode mode)
6787 const_tree fn, fntype;
6790 if (fntype_or_decl && DECL_P (fntype_or_decl))
6791 fn = fntype_or_decl;
6792 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6794 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6795 return function_value_ms_64 (orig_mode, mode);
6796 else if (TARGET_64BIT)
6797 return function_value_64 (orig_mode, mode, valtype);
6799 return function_value_32 (orig_mode, mode, fntype, fn);
6803 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6804 bool outgoing ATTRIBUTE_UNUSED)
6806 enum machine_mode mode, orig_mode;
6808 orig_mode = TYPE_MODE (valtype);
6809 mode = type_natural_mode (valtype, NULL);
6810 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6814 ix86_libcall_value (enum machine_mode mode)
6816 return ix86_function_value_1 (NULL, NULL, mode, mode);
6819 /* Return true iff type is returned in memory. */
6821 static int ATTRIBUTE_UNUSED
6822 return_in_memory_32 (const_tree type, enum machine_mode mode)
6826 if (mode == BLKmode)
6829 size = int_size_in_bytes (type);
6831 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6834 if (VECTOR_MODE_P (mode) || mode == TImode)
6836 /* User-created vectors small enough to fit in EAX. */
6840 /* MMX/3dNow values are returned in MM0,
6841 except when it doesn't exits. */
6843 return (TARGET_MMX ? 0 : 1);
6845 /* SSE values are returned in XMM0, except when it doesn't exist. */
6847 return (TARGET_SSE ? 0 : 1);
6849 /* AVX values are returned in YMM0, except when it doesn't exist. */
6851 return TARGET_AVX ? 0 : 1;
6860 /* OImode shouldn't be used directly. */
6861 gcc_assert (mode != OImode);
6866 static int ATTRIBUTE_UNUSED
6867 return_in_memory_64 (const_tree type, enum machine_mode mode)
6869 int needed_intregs, needed_sseregs;
6870 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6873 static int ATTRIBUTE_UNUSED
6874 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6876 HOST_WIDE_INT size = int_size_in_bytes (type);
6878 /* __m128 is returned in xmm0. */
6879 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6880 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6883 /* Otherwise, the size must be exactly in [1248]. */
6884 return (size != 1 && size != 2 && size != 4 && size != 8);
6888 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6890 #ifdef SUBTARGET_RETURN_IN_MEMORY
6891 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6893 const enum machine_mode mode = type_natural_mode (type, NULL);
6897 if (ix86_function_type_abi (fntype) == MS_ABI)
6898 return return_in_memory_ms_64 (type, mode);
6900 return return_in_memory_64 (type, mode);
6903 return return_in_memory_32 (type, mode);
6907 /* Return false iff TYPE is returned in memory. This version is used
6908 on Solaris 2. It is similar to the generic ix86_return_in_memory,
6909 but differs notably in that when MMX is available, 8-byte vectors
6910 are returned in memory, rather than in MMX registers. */
6913 ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6916 enum machine_mode mode = type_natural_mode (type, NULL);
6919 return return_in_memory_64 (type, mode);
6921 if (mode == BLKmode)
6924 size = int_size_in_bytes (type);
6926 if (VECTOR_MODE_P (mode))
6928 /* Return in memory only if MMX registers *are* available. This
6929 seems backwards, but it is consistent with the existing
6936 else if (mode == TImode)
6938 else if (mode == XFmode)
6944 /* When returning SSE vector types, we have a choice of either
6945 (1) being abi incompatible with a -march switch, or
6946 (2) generating an error.
6947 Given no good solution, I think the safest thing is one warning.
6948 The user won't be able to use -Werror, but....
6950 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6951 called in response to actually generating a caller or callee that
6952 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6953 via aggregate_value_p for general type probing from tree-ssa. */
6956 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6958 static bool warnedsse, warnedmmx;
6960 if (!TARGET_64BIT && type)
6962 /* Look at the return type of the function, not the function type. */
6963 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6965 if (!TARGET_SSE && !warnedsse)
6968 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6971 warning (0, "SSE vector return without SSE enabled "
6976 if (!TARGET_MMX && !warnedmmx)
6978 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6981 warning (0, "MMX vector return without MMX enabled "
6991 /* Create the va_list data type. */
6993 /* Returns the calling convention specific va_list date type.
6994 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6997 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6999 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
7001 /* For i386 we use plain pointer to argument area. */
7002 if (!TARGET_64BIT || abi == MS_ABI)
7003 return build_pointer_type (char_type_node);
7005 record = lang_hooks.types.make_type (RECORD_TYPE);
7006 type_decl = build_decl (BUILTINS_LOCATION,
7007 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7009 f_gpr = build_decl (BUILTINS_LOCATION,
7010 FIELD_DECL, get_identifier ("gp_offset"),
7011 unsigned_type_node);
7012 f_fpr = build_decl (BUILTINS_LOCATION,
7013 FIELD_DECL, get_identifier ("fp_offset"),
7014 unsigned_type_node);
7015 f_ovf = build_decl (BUILTINS_LOCATION,
7016 FIELD_DECL, get_identifier ("overflow_arg_area"),
7018 f_sav = build_decl (BUILTINS_LOCATION,
7019 FIELD_DECL, get_identifier ("reg_save_area"),
7022 va_list_gpr_counter_field = f_gpr;
7023 va_list_fpr_counter_field = f_fpr;
7025 DECL_FIELD_CONTEXT (f_gpr) = record;
7026 DECL_FIELD_CONTEXT (f_fpr) = record;
7027 DECL_FIELD_CONTEXT (f_ovf) = record;
7028 DECL_FIELD_CONTEXT (f_sav) = record;
7030 TREE_CHAIN (record) = type_decl;
7031 TYPE_NAME (record) = type_decl;
7032 TYPE_FIELDS (record) = f_gpr;
7033 DECL_CHAIN (f_gpr) = f_fpr;
7034 DECL_CHAIN (f_fpr) = f_ovf;
7035 DECL_CHAIN (f_ovf) = f_sav;
7037 layout_type (record);
7039 /* The correct type is an array type of one element. */
7040 return build_array_type (record, build_index_type (size_zero_node));
7043 /* Setup the builtin va_list data type and for 64-bit the additional
7044 calling convention specific va_list data types. */
7047 ix86_build_builtin_va_list (void)
7049 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7051 /* Initialize abi specific va_list builtin types. */
7055 if (ix86_abi == MS_ABI)
7057 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7058 if (TREE_CODE (t) != RECORD_TYPE)
7059 t = build_variant_type_copy (t);
7060 sysv_va_list_type_node = t;
7065 if (TREE_CODE (t) != RECORD_TYPE)
7066 t = build_variant_type_copy (t);
7067 sysv_va_list_type_node = t;
7069 if (ix86_abi != MS_ABI)
7071 t = ix86_build_builtin_va_list_abi (MS_ABI);
7072 if (TREE_CODE (t) != RECORD_TYPE)
7073 t = build_variant_type_copy (t);
7074 ms_va_list_type_node = t;
7079 if (TREE_CODE (t) != RECORD_TYPE)
7080 t = build_variant_type_copy (t);
7081 ms_va_list_type_node = t;
7088 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7091 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7097 /* GPR size of varargs save area. */
7098 if (cfun->va_list_gpr_size)
7099 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7101 ix86_varargs_gpr_size = 0;
7103 /* FPR size of varargs save area. We don't need it if we don't pass
7104 anything in SSE registers. */
7105 if (TARGET_SSE && cfun->va_list_fpr_size)
7106 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7108 ix86_varargs_fpr_size = 0;
7110 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7113 save_area = frame_pointer_rtx;
7114 set = get_varargs_alias_set ();
7116 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7117 if (max > X86_64_REGPARM_MAX)
7118 max = X86_64_REGPARM_MAX;
7120 for (i = cum->regno; i < max; i++)
7122 mem = gen_rtx_MEM (Pmode,
7123 plus_constant (save_area, i * UNITS_PER_WORD));
7124 MEM_NOTRAP_P (mem) = 1;
7125 set_mem_alias_set (mem, set);
7126 emit_move_insn (mem, gen_rtx_REG (Pmode,
7127 x86_64_int_parameter_registers[i]));
7130 if (ix86_varargs_fpr_size)
7132 enum machine_mode smode;
7135 /* Now emit code to save SSE registers. The AX parameter contains number
7136 of SSE parameter registers used to call this function, though all we
7137 actually check here is the zero/non-zero status. */
7139 label = gen_label_rtx ();
7140 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7141 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7144 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7145 we used movdqa (i.e. TImode) instead? Perhaps even better would
7146 be if we could determine the real mode of the data, via a hook
7147 into pass_stdarg. Ignore all that for now. */
7149 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7150 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7152 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7153 if (max > X86_64_SSE_REGPARM_MAX)
7154 max = X86_64_SSE_REGPARM_MAX;
7156 for (i = cum->sse_regno; i < max; ++i)
7158 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7159 mem = gen_rtx_MEM (smode, mem);
7160 MEM_NOTRAP_P (mem) = 1;
7161 set_mem_alias_set (mem, set);
7162 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7164 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7172 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7174 alias_set_type set = get_varargs_alias_set ();
7177 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7181 mem = gen_rtx_MEM (Pmode,
7182 plus_constant (virtual_incoming_args_rtx,
7183 i * UNITS_PER_WORD));
7184 MEM_NOTRAP_P (mem) = 1;
7185 set_mem_alias_set (mem, set);
7187 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7188 emit_move_insn (mem, reg);
7193 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7194 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7197 CUMULATIVE_ARGS next_cum;
7200 /* This argument doesn't appear to be used anymore. Which is good,
7201 because the old code here didn't suppress rtl generation. */
7202 gcc_assert (!no_rtl);
7207 fntype = TREE_TYPE (current_function_decl);
7209 /* For varargs, we do not want to skip the dummy va_dcl argument.
7210 For stdargs, we do want to skip the last named argument. */
7212 if (stdarg_p (fntype))
7213 ix86_function_arg_advance (&next_cum, mode, type, true);
7215 if (cum->call_abi == MS_ABI)
7216 setup_incoming_varargs_ms_64 (&next_cum);
7218 setup_incoming_varargs_64 (&next_cum);
7221 /* Checks if TYPE is of kind va_list char *. */
7224 is_va_list_char_pointer (tree type)
7228 /* For 32-bit it is always true. */
7231 canonic = ix86_canonical_va_list_type (type);
7232 return (canonic == ms_va_list_type_node
7233 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7236 /* Implement va_start. */
7239 ix86_va_start (tree valist, rtx nextarg)
7241 HOST_WIDE_INT words, n_gpr, n_fpr;
7242 tree f_gpr, f_fpr, f_ovf, f_sav;
7243 tree gpr, fpr, ovf, sav, t;
7246 /* Only 64bit target needs something special. */
7247 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7249 std_expand_builtin_va_start (valist, nextarg);
7253 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7254 f_fpr = DECL_CHAIN (f_gpr);
7255 f_ovf = DECL_CHAIN (f_fpr);
7256 f_sav = DECL_CHAIN (f_ovf);
7258 valist = build_simple_mem_ref (valist);
7259 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7260 /* The following should be folded into the MEM_REF offset. */
7261 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7263 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7265 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7267 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7270 /* Count number of gp and fp argument registers used. */
7271 words = crtl->args.info.words;
7272 n_gpr = crtl->args.info.regno;
7273 n_fpr = crtl->args.info.sse_regno;
7275 if (cfun->va_list_gpr_size)
7277 type = TREE_TYPE (gpr);
7278 t = build2 (MODIFY_EXPR, type,
7279 gpr, build_int_cst (type, n_gpr * 8));
7280 TREE_SIDE_EFFECTS (t) = 1;
7281 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7284 if (TARGET_SSE && cfun->va_list_fpr_size)
7286 type = TREE_TYPE (fpr);
7287 t = build2 (MODIFY_EXPR, type, fpr,
7288 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7289 TREE_SIDE_EFFECTS (t) = 1;
7290 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7293 /* Find the overflow area. */
7294 type = TREE_TYPE (ovf);
7295 t = make_tree (type, crtl->args.internal_arg_pointer);
7297 t = build2 (POINTER_PLUS_EXPR, type, t,
7298 size_int (words * UNITS_PER_WORD));
7299 t = build2 (MODIFY_EXPR, type, ovf, t);
7300 TREE_SIDE_EFFECTS (t) = 1;
7301 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7303 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7305 /* Find the register save area.
7306 Prologue of the function save it right above stack frame. */
7307 type = TREE_TYPE (sav);
7308 t = make_tree (type, frame_pointer_rtx);
7309 if (!ix86_varargs_gpr_size)
7310 t = build2 (POINTER_PLUS_EXPR, type, t,
7311 size_int (-8 * X86_64_REGPARM_MAX));
7312 t = build2 (MODIFY_EXPR, type, sav, t);
7313 TREE_SIDE_EFFECTS (t) = 1;
7314 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7318 /* Implement va_arg. */
7321 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7324 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7325 tree f_gpr, f_fpr, f_ovf, f_sav;
7326 tree gpr, fpr, ovf, sav, t;
7328 tree lab_false, lab_over = NULL_TREE;
7333 enum machine_mode nat_mode;
7334 unsigned int arg_boundary;
7336 /* Only 64bit target needs something special. */
7337 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7338 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7340 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7341 f_fpr = DECL_CHAIN (f_gpr);
7342 f_ovf = DECL_CHAIN (f_fpr);
7343 f_sav = DECL_CHAIN (f_ovf);
7345 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7346 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7347 valist = build_va_arg_indirect_ref (valist);
7348 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7349 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7350 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7352 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7354 type = build_pointer_type (type);
7355 size = int_size_in_bytes (type);
7356 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7358 nat_mode = type_natural_mode (type, NULL);
7367 /* Unnamed 256bit vector mode parameters are passed on stack. */
7368 if (ix86_cfun_abi () == SYSV_ABI)
7375 container = construct_container (nat_mode, TYPE_MODE (type),
7376 type, 0, X86_64_REGPARM_MAX,
7377 X86_64_SSE_REGPARM_MAX, intreg,
7382 /* Pull the value out of the saved registers. */
7384 addr = create_tmp_var (ptr_type_node, "addr");
7388 int needed_intregs, needed_sseregs;
7390 tree int_addr, sse_addr;
7392 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7393 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7395 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7397 need_temp = (!REG_P (container)
7398 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7399 || TYPE_ALIGN (type) > 128));
7401 /* In case we are passing structure, verify that it is consecutive block
7402 on the register save area. If not we need to do moves. */
7403 if (!need_temp && !REG_P (container))
7405 /* Verify that all registers are strictly consecutive */
7406 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7410 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7412 rtx slot = XVECEXP (container, 0, i);
7413 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7414 || INTVAL (XEXP (slot, 1)) != i * 16)
7422 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7424 rtx slot = XVECEXP (container, 0, i);
7425 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7426 || INTVAL (XEXP (slot, 1)) != i * 8)
7438 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7439 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7442 /* First ensure that we fit completely in registers. */
7445 t = build_int_cst (TREE_TYPE (gpr),
7446 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7447 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7448 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7449 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7450 gimplify_and_add (t, pre_p);
7454 t = build_int_cst (TREE_TYPE (fpr),
7455 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7456 + X86_64_REGPARM_MAX * 8);
7457 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7458 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7459 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7460 gimplify_and_add (t, pre_p);
7463 /* Compute index to start of area used for integer regs. */
7466 /* int_addr = gpr + sav; */
7467 t = fold_convert (sizetype, gpr);
7468 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7469 gimplify_assign (int_addr, t, pre_p);
7473 /* sse_addr = fpr + sav; */
7474 t = fold_convert (sizetype, fpr);
7475 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7476 gimplify_assign (sse_addr, t, pre_p);
7480 int i, prev_size = 0;
7481 tree temp = create_tmp_var (type, "va_arg_tmp");
7484 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7485 gimplify_assign (addr, t, pre_p);
7487 for (i = 0; i < XVECLEN (container, 0); i++)
7489 rtx slot = XVECEXP (container, 0, i);
7490 rtx reg = XEXP (slot, 0);
7491 enum machine_mode mode = GET_MODE (reg);
7497 tree dest_addr, dest;
7498 int cur_size = GET_MODE_SIZE (mode);
7500 if (prev_size + cur_size > size)
7502 cur_size = size - prev_size;
7503 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7504 if (mode == BLKmode)
7507 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7508 if (mode == GET_MODE (reg))
7509 addr_type = build_pointer_type (piece_type);
7511 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7513 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7516 if (SSE_REGNO_P (REGNO (reg)))
7518 src_addr = sse_addr;
7519 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7523 src_addr = int_addr;
7524 src_offset = REGNO (reg) * 8;
7526 src_addr = fold_convert (addr_type, src_addr);
7527 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7528 size_int (src_offset));
7530 dest_addr = fold_convert (daddr_type, addr);
7531 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7532 size_int (INTVAL (XEXP (slot, 1))));
7533 if (cur_size == GET_MODE_SIZE (mode))
7535 src = build_va_arg_indirect_ref (src_addr);
7536 dest = build_va_arg_indirect_ref (dest_addr);
7538 gimplify_assign (dest, src, pre_p);
7543 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7544 3, dest_addr, src_addr,
7545 size_int (cur_size));
7546 gimplify_and_add (copy, pre_p);
7548 prev_size += cur_size;
7554 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7555 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7556 gimplify_assign (gpr, t, pre_p);
7561 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7562 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7563 gimplify_assign (fpr, t, pre_p);
7566 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7568 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7571 /* ... otherwise out of the overflow area. */
7573 /* When we align parameter on stack for caller, if the parameter
7574 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7575 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7576 here with caller. */
7577 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7578 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7579 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7581 /* Care for on-stack alignment if needed. */
7582 if (arg_boundary <= 64 || size == 0)
7586 HOST_WIDE_INT align = arg_boundary / 8;
7587 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7588 size_int (align - 1));
7589 t = fold_convert (sizetype, t);
7590 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7592 t = fold_convert (TREE_TYPE (ovf), t);
7595 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7596 gimplify_assign (addr, t, pre_p);
7598 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7599 size_int (rsize * UNITS_PER_WORD));
7600 gimplify_assign (unshare_expr (ovf), t, pre_p);
7603 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7605 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7606 addr = fold_convert (ptrtype, addr);
7609 addr = build_va_arg_indirect_ref (addr);
7610 return build_va_arg_indirect_ref (addr);
7613 /* Return nonzero if OPNUM's MEM should be matched
7614 in movabs* patterns. */
7617 ix86_check_movabs (rtx insn, int opnum)
7621 set = PATTERN (insn);
7622 if (GET_CODE (set) == PARALLEL)
7623 set = XVECEXP (set, 0, 0);
7624 gcc_assert (GET_CODE (set) == SET);
7625 mem = XEXP (set, opnum);
7626 while (GET_CODE (mem) == SUBREG)
7627 mem = SUBREG_REG (mem);
7628 gcc_assert (MEM_P (mem));
7629 return (volatile_ok || !MEM_VOLATILE_P (mem));
7632 /* Initialize the table of extra 80387 mathematical constants. */
7635 init_ext_80387_constants (void)
7637 static const char * cst[5] =
7639 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7640 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7641 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7642 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7643 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7647 for (i = 0; i < 5; i++)
7649 real_from_string (&ext_80387_constants_table[i], cst[i]);
7650 /* Ensure each constant is rounded to XFmode precision. */
7651 real_convert (&ext_80387_constants_table[i],
7652 XFmode, &ext_80387_constants_table[i]);
7655 ext_80387_constants_init = 1;
7658 /* Return true if the constant is something that can be loaded with
7659 a special instruction. */
7662 standard_80387_constant_p (rtx x)
7664 enum machine_mode mode = GET_MODE (x);
7668 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7671 if (x == CONST0_RTX (mode))
7673 if (x == CONST1_RTX (mode))
7676 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7678 /* For XFmode constants, try to find a special 80387 instruction when
7679 optimizing for size or on those CPUs that benefit from them. */
7681 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7685 if (! ext_80387_constants_init)
7686 init_ext_80387_constants ();
7688 for (i = 0; i < 5; i++)
7689 if (real_identical (&r, &ext_80387_constants_table[i]))
7693 /* Load of the constant -0.0 or -1.0 will be split as
7694 fldz;fchs or fld1;fchs sequence. */
7695 if (real_isnegzero (&r))
7697 if (real_identical (&r, &dconstm1))
7703 /* Return the opcode of the special instruction to be used to load
7707 standard_80387_constant_opcode (rtx x)
7709 switch (standard_80387_constant_p (x))
7733 /* Return the CONST_DOUBLE representing the 80387 constant that is
7734 loaded by the specified special instruction. The argument IDX
7735 matches the return value from standard_80387_constant_p. */
7738 standard_80387_constant_rtx (int idx)
7742 if (! ext_80387_constants_init)
7743 init_ext_80387_constants ();
7759 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7763 /* Return 1 if X is all 0s and 2 if x is all 1s
7764 in supported SSE vector mode. */
7767 standard_sse_constant_p (rtx x)
7769 enum machine_mode mode = GET_MODE (x);
7771 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7773 if (vector_all_ones_operand (x, mode))
7789 /* Return the opcode of the special instruction to be used to load
7793 standard_sse_constant_opcode (rtx insn, rtx x)
7795 switch (standard_sse_constant_p (x))
7798 switch (get_attr_mode (insn))
7801 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7803 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7804 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7806 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7808 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7809 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7811 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7813 return "vxorps\t%x0, %x0, %x0";
7815 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7816 return "vxorps\t%x0, %x0, %x0";
7818 return "vxorpd\t%x0, %x0, %x0";
7820 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7821 return "vxorps\t%x0, %x0, %x0";
7823 return "vpxor\t%x0, %x0, %x0";
7828 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7835 /* Returns 1 if OP contains a symbol reference */
7838 symbolic_reference_mentioned_p (rtx op)
7843 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7846 fmt = GET_RTX_FORMAT (GET_CODE (op));
7847 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7853 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7854 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7858 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7865 /* Return 1 if it is appropriate to emit `ret' instructions in the
7866 body of a function. Do this only if the epilogue is simple, needing a
7867 couple of insns. Prior to reloading, we can't tell how many registers
7868 must be saved, so return 0 then. Return 0 if there is no frame
7869 marker to de-allocate. */
7872 ix86_can_use_return_insn_p (void)
7874 struct ix86_frame frame;
7876 if (! reload_completed || frame_pointer_needed)
7879 /* Don't allow more than 32k pop, since that's all we can do
7880 with one instruction. */
7881 if (crtl->args.pops_args && crtl->args.size >= 32768)
7884 ix86_compute_frame_layout (&frame);
7885 return (frame.stack_pointer_offset == UNITS_PER_WORD
7886 && (frame.nregs + frame.nsseregs) == 0);
7889 /* Value should be nonzero if functions must have frame pointers.
7890 Zero means the frame pointer need not be set up (and parms may
7891 be accessed via the stack pointer) in functions that seem suitable. */
7894 ix86_frame_pointer_required (void)
7896 /* If we accessed previous frames, then the generated code expects
7897 to be able to access the saved ebp value in our frame. */
7898 if (cfun->machine->accesses_prev_frame)
7901 /* Several x86 os'es need a frame pointer for other reasons,
7902 usually pertaining to setjmp. */
7903 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7906 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7907 the frame pointer by default. Turn it back on now if we've not
7908 got a leaf function. */
7909 if (TARGET_OMIT_LEAF_FRAME_POINTER
7910 && (!current_function_is_leaf
7911 || ix86_current_function_calls_tls_descriptor))
7914 if (crtl->profile && !flag_fentry)
7920 /* Record that the current function accesses previous call frames. */
7923 ix86_setup_frame_addresses (void)
7925 cfun->machine->accesses_prev_frame = 1;
7928 #ifndef USE_HIDDEN_LINKONCE
7929 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7930 # define USE_HIDDEN_LINKONCE 1
7932 # define USE_HIDDEN_LINKONCE 0
7936 static int pic_labels_used;
7938 /* Fills in the label name that should be used for a pc thunk for
7939 the given register. */
7942 get_pc_thunk_name (char name[32], unsigned int regno)
7944 gcc_assert (!TARGET_64BIT);
7946 if (USE_HIDDEN_LINKONCE)
7947 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7949 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7953 /* This function generates code for -fpic that loads %ebx with
7954 the return address of the caller and then returns. */
7957 ix86_code_end (void)
7962 for (regno = 0; regno < 8; ++regno)
7967 if (! ((pic_labels_used >> regno) & 1))
7970 get_pc_thunk_name (name, regno);
7972 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
7973 get_identifier (name),
7974 build_function_type (void_type_node, void_list_node));
7975 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
7976 NULL_TREE, void_type_node);
7977 TREE_PUBLIC (decl) = 1;
7978 TREE_STATIC (decl) = 1;
7983 switch_to_section (darwin_sections[text_coal_section]);
7984 fputs ("\t.weak_definition\t", asm_out_file);
7985 assemble_name (asm_out_file, name);
7986 fputs ("\n\t.private_extern\t", asm_out_file);
7987 assemble_name (asm_out_file, name);
7988 putc ('\n', asm_out_file);
7989 ASM_OUTPUT_LABEL (asm_out_file, name);
7990 DECL_WEAK (decl) = 1;
7994 if (USE_HIDDEN_LINKONCE)
7996 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7998 targetm.asm_out.unique_section (decl, 0);
7999 switch_to_section (get_named_section (decl, NULL, 0));
8001 targetm.asm_out.globalize_label (asm_out_file, name);
8002 fputs ("\t.hidden\t", asm_out_file);
8003 assemble_name (asm_out_file, name);
8004 putc ('\n', asm_out_file);
8005 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8009 switch_to_section (text_section);
8010 ASM_OUTPUT_LABEL (asm_out_file, name);
8013 DECL_INITIAL (decl) = make_node (BLOCK);
8014 current_function_decl = decl;
8015 init_function_start (decl);
8016 first_function_block_is_cold = false;
8017 /* Make sure unwind info is emitted for the thunk if needed. */
8018 final_start_function (emit_barrier (), asm_out_file, 1);
8020 xops[0] = gen_rtx_REG (Pmode, regno);
8021 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8022 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8023 output_asm_insn ("ret", xops);
8024 final_end_function ();
8025 init_insn_lengths ();
8026 free_after_compilation (cfun);
8028 current_function_decl = NULL;
8032 /* Emit code for the SET_GOT patterns. */
8035 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8041 if (TARGET_VXWORKS_RTP && flag_pic)
8043 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8044 xops[2] = gen_rtx_MEM (Pmode,
8045 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8046 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8048 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8049 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8050 an unadorned address. */
8051 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8052 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8053 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8057 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8059 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8061 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8064 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8067 output_asm_insn ("call\t%a2", xops);
8068 #ifdef DWARF2_UNWIND_INFO
8069 /* The call to next label acts as a push. */
8070 if (dwarf2out_do_frame ())
8074 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8075 gen_rtx_PLUS (Pmode,
8078 RTX_FRAME_RELATED_P (insn) = 1;
8079 dwarf2out_frame_debug (insn, true);
8086 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8087 is what will be referenced by the Mach-O PIC subsystem. */
8089 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8092 targetm.asm_out.internal_label (asm_out_file, "L",
8093 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8097 output_asm_insn ("pop%z0\t%0", xops);
8098 #ifdef DWARF2_UNWIND_INFO
8099 /* The pop is a pop and clobbers dest, but doesn't restore it
8100 for unwind info purposes. */
8101 if (dwarf2out_do_frame ())
8105 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8106 dwarf2out_frame_debug (insn, true);
8107 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8108 gen_rtx_PLUS (Pmode,
8111 RTX_FRAME_RELATED_P (insn) = 1;
8112 dwarf2out_frame_debug (insn, true);
8121 get_pc_thunk_name (name, REGNO (dest));
8122 pic_labels_used |= 1 << REGNO (dest);
8124 #ifdef DWARF2_UNWIND_INFO
8125 /* Ensure all queued register saves are flushed before the
8127 if (dwarf2out_do_frame ())
8131 insn = emit_barrier ();
8133 dwarf2out_frame_debug (insn, false);
8136 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8137 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8138 output_asm_insn ("call\t%X2", xops);
8139 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8140 is what will be referenced by the Mach-O PIC subsystem. */
8143 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8145 targetm.asm_out.internal_label (asm_out_file, "L",
8146 CODE_LABEL_NUMBER (label));
8153 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8154 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8156 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8161 /* Generate an "push" pattern for input ARG. */
8166 struct machine_function *m = cfun->machine;
8168 if (m->fs.cfa_reg == stack_pointer_rtx)
8169 m->fs.cfa_offset += UNITS_PER_WORD;
8170 m->fs.sp_offset += UNITS_PER_WORD;
8172 return gen_rtx_SET (VOIDmode,
8174 gen_rtx_PRE_DEC (Pmode,
8175 stack_pointer_rtx)),
8179 /* Return >= 0 if there is an unused call-clobbered register available
8180 for the entire function. */
8183 ix86_select_alt_pic_regnum (void)
8185 if (current_function_is_leaf
8187 && !ix86_current_function_calls_tls_descriptor)
8190 /* Can't use the same register for both PIC and DRAP. */
8192 drap = REGNO (crtl->drap_reg);
8195 for (i = 2; i >= 0; --i)
8196 if (i != drap && !df_regs_ever_live_p (i))
8200 return INVALID_REGNUM;
8203 /* Return 1 if we need to save REGNO. */
8205 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8207 if (pic_offset_table_rtx
8208 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8209 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8211 || crtl->calls_eh_return
8212 || crtl->uses_const_pool))
8214 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8219 if (crtl->calls_eh_return && maybe_eh_return)
8224 unsigned test = EH_RETURN_DATA_REGNO (i);
8225 if (test == INVALID_REGNUM)
8232 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8235 return (df_regs_ever_live_p (regno)
8236 && !call_used_regs[regno]
8237 && !fixed_regs[regno]
8238 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8241 /* Return number of saved general prupose registers. */
8244 ix86_nsaved_regs (void)
8249 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8250 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8255 /* Return number of saved SSE registrers. */
8258 ix86_nsaved_sseregs (void)
8263 if (ix86_cfun_abi () != MS_ABI)
8265 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8266 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8271 /* Given FROM and TO register numbers, say whether this elimination is
8272 allowed. If stack alignment is needed, we can only replace argument
8273 pointer with hard frame pointer, or replace frame pointer with stack
8274 pointer. Otherwise, frame pointer elimination is automatically
8275 handled and all other eliminations are valid. */
8278 ix86_can_eliminate (const int from, const int to)
8280 if (stack_realign_fp)
8281 return ((from == ARG_POINTER_REGNUM
8282 && to == HARD_FRAME_POINTER_REGNUM)
8283 || (from == FRAME_POINTER_REGNUM
8284 && to == STACK_POINTER_REGNUM));
8286 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8289 /* Return the offset between two registers, one to be eliminated, and the other
8290 its replacement, at the start of a routine. */
8293 ix86_initial_elimination_offset (int from, int to)
8295 struct ix86_frame frame;
8296 ix86_compute_frame_layout (&frame);
8298 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8299 return frame.hard_frame_pointer_offset;
8300 else if (from == FRAME_POINTER_REGNUM
8301 && to == HARD_FRAME_POINTER_REGNUM)
8302 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8305 gcc_assert (to == STACK_POINTER_REGNUM);
8307 if (from == ARG_POINTER_REGNUM)
8308 return frame.stack_pointer_offset;
8310 gcc_assert (from == FRAME_POINTER_REGNUM);
8311 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8315 /* In a dynamically-aligned function, we can't know the offset from
8316 stack pointer to frame pointer, so we must ensure that setjmp
8317 eliminates fp against the hard fp (%ebp) rather than trying to
8318 index from %esp up to the top of the frame across a gap that is
8319 of unknown (at compile-time) size. */
8321 ix86_builtin_setjmp_frame_value (void)
8323 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8326 /* Fill structure ix86_frame about frame of currently computed function. */
8329 ix86_compute_frame_layout (struct ix86_frame *frame)
8331 unsigned int stack_alignment_needed;
8332 HOST_WIDE_INT offset;
8333 unsigned int preferred_alignment;
8334 HOST_WIDE_INT size = get_frame_size ();
8335 HOST_WIDE_INT to_allocate;
8337 frame->nregs = ix86_nsaved_regs ();
8338 frame->nsseregs = ix86_nsaved_sseregs ();
8340 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8341 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8343 /* MS ABI seem to require stack alignment to be always 16 except for function
8344 prologues and leaf. */
8345 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8346 && (!current_function_is_leaf || cfun->calls_alloca != 0
8347 || ix86_current_function_calls_tls_descriptor))
8349 preferred_alignment = 16;
8350 stack_alignment_needed = 16;
8351 crtl->preferred_stack_boundary = 128;
8352 crtl->stack_alignment_needed = 128;
8355 gcc_assert (!size || stack_alignment_needed);
8356 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8357 gcc_assert (preferred_alignment <= stack_alignment_needed);
8359 /* During reload iteration the amount of registers saved can change.
8360 Recompute the value as needed. Do not recompute when amount of registers
8361 didn't change as reload does multiple calls to the function and does not
8362 expect the decision to change within single iteration. */
8363 if (!optimize_function_for_size_p (cfun)
8364 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8366 int count = frame->nregs;
8367 struct cgraph_node *node = cgraph_node (current_function_decl);
8369 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8370 /* The fast prologue uses move instead of push to save registers. This
8371 is significantly longer, but also executes faster as modern hardware
8372 can execute the moves in parallel, but can't do that for push/pop.
8374 Be careful about choosing what prologue to emit: When function takes
8375 many instructions to execute we may use slow version as well as in
8376 case function is known to be outside hot spot (this is known with
8377 feedback only). Weight the size of function by number of registers
8378 to save as it is cheap to use one or two push instructions but very
8379 slow to use many of them. */
8381 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8382 if (node->frequency < NODE_FREQUENCY_NORMAL
8383 || (flag_branch_probabilities
8384 && node->frequency < NODE_FREQUENCY_HOT))
8385 cfun->machine->use_fast_prologue_epilogue = false;
8387 cfun->machine->use_fast_prologue_epilogue
8388 = !expensive_function_p (count);
8390 if (TARGET_PROLOGUE_USING_MOVE
8391 && cfun->machine->use_fast_prologue_epilogue)
8392 frame->save_regs_using_mov = true;
8394 frame->save_regs_using_mov = false;
8396 /* If static stack checking is enabled and done with probes, the registers
8397 need to be saved before allocating the frame. */
8398 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8399 frame->save_regs_using_mov = false;
8401 /* Skip return address. */
8402 offset = UNITS_PER_WORD;
8404 /* Skip pushed static chain. */
8405 if (ix86_static_chain_on_stack)
8406 offset += UNITS_PER_WORD;
8408 /* Skip saved base pointer. */
8409 if (frame_pointer_needed)
8410 offset += UNITS_PER_WORD;
8412 frame->hard_frame_pointer_offset = offset;
8414 /* Register save area */
8415 offset += frame->nregs * UNITS_PER_WORD;
8416 frame->reg_save_offset = offset;
8418 /* Align and set SSE register save area. */
8419 if (frame->nsseregs)
8421 /* The only ABI that has saved SSE registers (Win64) also has a
8422 16-byte aligned default stack, and thus we don't need to be
8423 within the re-aligned local stack frame to save them. */
8424 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8425 offset = (offset + 16 - 1) & -16;
8426 offset += frame->nsseregs * 16;
8428 frame->sse_reg_save_offset = offset;
8430 /* The re-aligned stack starts here. Values before this point are not
8431 directly comparable with values below this point. In order to make
8432 sure that no value happens to be the same before and after, force
8433 the alignment computation below to add a non-zero value. */
8434 if (stack_realign_fp)
8435 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8438 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8439 offset += frame->va_arg_size;
8441 /* Align start of frame for local function. */
8442 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8444 /* Frame pointer points here. */
8445 frame->frame_pointer_offset = offset;
8449 /* Add outgoing arguments area. Can be skipped if we eliminated
8450 all the function calls as dead code.
8451 Skipping is however impossible when function calls alloca. Alloca
8452 expander assumes that last crtl->outgoing_args_size
8453 of stack frame are unused. */
8454 if (ACCUMULATE_OUTGOING_ARGS
8455 && (!current_function_is_leaf || cfun->calls_alloca
8456 || ix86_current_function_calls_tls_descriptor))
8458 offset += crtl->outgoing_args_size;
8459 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8462 frame->outgoing_arguments_size = 0;
8464 /* Align stack boundary. Only needed if we're calling another function
8466 if (!current_function_is_leaf || cfun->calls_alloca
8467 || ix86_current_function_calls_tls_descriptor)
8468 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8470 /* We've reached end of stack frame. */
8471 frame->stack_pointer_offset = offset;
8473 /* Size prologue needs to allocate. */
8474 to_allocate = offset - frame->sse_reg_save_offset;
8476 if ((!to_allocate && frame->nregs <= 1)
8477 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8478 frame->save_regs_using_mov = false;
8480 if (ix86_using_red_zone ()
8481 && current_function_sp_is_unchanging
8482 && current_function_is_leaf
8483 && !ix86_current_function_calls_tls_descriptor)
8485 frame->red_zone_size = to_allocate;
8486 if (frame->save_regs_using_mov)
8487 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8488 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8489 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8492 frame->red_zone_size = 0;
8493 frame->stack_pointer_offset -= frame->red_zone_size;
8496 /* This is semi-inlined memory_address_length, but simplified
8497 since we know that we're always dealing with reg+offset, and
8498 to avoid having to create and discard all that rtl. */
8501 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8507 /* EBP and R13 cannot be encoded without an offset. */
8508 len = (regno == BP_REG || regno == R13_REG);
8510 else if (IN_RANGE (offset, -128, 127))
8513 /* ESP and R12 must be encoded with a SIB byte. */
8514 if (regno == SP_REG || regno == R12_REG)
8520 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8521 The valid base registers are taken from CFUN->MACHINE->FS. */
8524 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8526 const struct machine_function *m = cfun->machine;
8527 rtx base_reg = NULL;
8528 HOST_WIDE_INT base_offset = 0;
8530 if (m->use_fast_prologue_epilogue)
8532 /* Choose the base register most likely to allow the most scheduling
8533 opportunities. Generally FP is valid througout the function,
8534 while DRAP must be reloaded within the epilogue. But choose either
8535 over the SP due to increased encoding size. */
8539 base_reg = hard_frame_pointer_rtx;
8540 base_offset = m->fs.fp_offset - cfa_offset;
8542 else if (m->fs.drap_valid)
8544 base_reg = crtl->drap_reg;
8545 base_offset = 0 - cfa_offset;
8547 else if (m->fs.sp_valid)
8549 base_reg = stack_pointer_rtx;
8550 base_offset = m->fs.sp_offset - cfa_offset;
8555 HOST_WIDE_INT toffset;
8558 /* Choose the base register with the smallest address encoding.
8559 With a tie, choose FP > DRAP > SP. */
8562 base_reg = stack_pointer_rtx;
8563 base_offset = m->fs.sp_offset - cfa_offset;
8564 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8566 if (m->fs.drap_valid)
8568 toffset = 0 - cfa_offset;
8569 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8572 base_reg = crtl->drap_reg;
8573 base_offset = toffset;
8579 toffset = m->fs.fp_offset - cfa_offset;
8580 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8583 base_reg = hard_frame_pointer_rtx;
8584 base_offset = toffset;
8589 gcc_assert (base_reg != NULL);
8591 return plus_constant (base_reg, base_offset);
8594 /* Emit code to save registers in the prologue. */
8597 ix86_emit_save_regs (void)
8602 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8603 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8605 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8606 RTX_FRAME_RELATED_P (insn) = 1;
8610 /* Emit a single register save at CFA - CFA_OFFSET. */
8613 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8614 HOST_WIDE_INT cfa_offset)
8616 struct machine_function *m = cfun->machine;
8617 rtx reg = gen_rtx_REG (mode, regno);
8618 rtx mem, addr, base, insn;
8620 addr = choose_baseaddr (cfa_offset);
8621 mem = gen_frame_mem (mode, addr);
8623 /* For SSE saves, we need to indicate the 128-bit alignment. */
8624 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8626 insn = emit_move_insn (mem, reg);
8627 RTX_FRAME_RELATED_P (insn) = 1;
8630 if (GET_CODE (base) == PLUS)
8631 base = XEXP (base, 0);
8632 gcc_checking_assert (REG_P (base));
8634 /* When saving registers into a re-aligned local stack frame, avoid
8635 any tricky guessing by dwarf2out. */
8636 if (m->fs.realigned)
8638 gcc_checking_assert (stack_realign_drap);
8640 if (regno == REGNO (crtl->drap_reg))
8642 /* A bit of a hack. We force the DRAP register to be saved in
8643 the re-aligned stack frame, which provides us with a copy
8644 of the CFA that will last past the prologue. Install it. */
8645 gcc_checking_assert (cfun->machine->fs.fp_valid);
8646 addr = plus_constant (hard_frame_pointer_rtx,
8647 cfun->machine->fs.fp_offset - cfa_offset);
8648 mem = gen_rtx_MEM (mode, addr);
8649 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8653 /* The frame pointer is a stable reference within the
8654 aligned frame. Use it. */
8655 gcc_checking_assert (cfun->machine->fs.fp_valid);
8656 addr = plus_constant (hard_frame_pointer_rtx,
8657 cfun->machine->fs.fp_offset - cfa_offset);
8658 mem = gen_rtx_MEM (mode, addr);
8659 add_reg_note (insn, REG_CFA_EXPRESSION,
8660 gen_rtx_SET (VOIDmode, mem, reg));
8664 /* The memory may not be relative to the current CFA register,
8665 which means that we may need to generate a new pattern for
8666 use by the unwind info. */
8667 else if (base != m->fs.cfa_reg)
8669 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8670 mem = gen_rtx_MEM (mode, addr);
8671 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8675 /* Emit code to save registers using MOV insns.
8676 First register is stored at CFA - CFA_OFFSET. */
8678 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8682 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8683 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8685 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8686 cfa_offset -= UNITS_PER_WORD;
8690 /* Emit code to save SSE registers using MOV insns.
8691 First register is stored at CFA - CFA_OFFSET. */
8693 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8697 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8698 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8700 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8705 static GTY(()) rtx queued_cfa_restores;
8707 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8708 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8709 Don't add the note if the previously saved value will be left untouched
8710 within stack red-zone till return, as unwinders can find the same value
8711 in the register and on the stack. */
8714 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8716 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8721 add_reg_note (insn, REG_CFA_RESTORE, reg);
8722 RTX_FRAME_RELATED_P (insn) = 1;
8726 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8729 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8732 ix86_add_queued_cfa_restore_notes (rtx insn)
8735 if (!queued_cfa_restores)
8737 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8739 XEXP (last, 1) = REG_NOTES (insn);
8740 REG_NOTES (insn) = queued_cfa_restores;
8741 queued_cfa_restores = NULL_RTX;
8742 RTX_FRAME_RELATED_P (insn) = 1;
8745 /* Expand prologue or epilogue stack adjustment.
8746 The pattern exist to put a dependency on all ebp-based memory accesses.
8747 STYLE should be negative if instructions should be marked as frame related,
8748 zero if %r11 register is live and cannot be freely used and positive
8752 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8753 int style, bool set_cfa)
8755 struct machine_function *m = cfun->machine;
8759 insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
8760 else if (x86_64_immediate_operand (offset, DImode))
8761 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
8765 /* r11 is used by indirect sibcall return as well, set before the
8766 epilogue and used after the epilogue. */
8768 tmp = gen_rtx_REG (DImode, R11_REG);
8771 gcc_assert (src != hard_frame_pointer_rtx
8772 && dest != hard_frame_pointer_rtx);
8773 tmp = hard_frame_pointer_rtx;
8775 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8777 RTX_FRAME_RELATED_P (insn) = 1;
8778 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
8783 ix86_add_queued_cfa_restore_notes (insn);
8789 gcc_assert (m->fs.cfa_reg == src);
8790 m->fs.cfa_offset += INTVAL (offset);
8791 m->fs.cfa_reg = dest;
8793 r = gen_rtx_PLUS (Pmode, src, offset);
8794 r = gen_rtx_SET (VOIDmode, dest, r);
8795 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8796 RTX_FRAME_RELATED_P (insn) = 1;
8799 RTX_FRAME_RELATED_P (insn) = 1;
8801 if (dest == stack_pointer_rtx)
8803 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8804 bool valid = m->fs.sp_valid;
8806 if (src == hard_frame_pointer_rtx)
8808 valid = m->fs.fp_valid;
8809 ooffset = m->fs.fp_offset;
8811 else if (src == crtl->drap_reg)
8813 valid = m->fs.drap_valid;
8818 /* Else there are two possibilities: SP itself, which we set
8819 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8820 taken care of this by hand along the eh_return path. */
8821 gcc_checking_assert (src == stack_pointer_rtx
8822 || offset == const0_rtx);
8825 m->fs.sp_offset = ooffset - INTVAL (offset);
8826 m->fs.sp_valid = valid;
8830 /* Find an available register to be used as dynamic realign argument
8831 pointer regsiter. Such a register will be written in prologue and
8832 used in begin of body, so it must not be
8833 1. parameter passing register.
8835 We reuse static-chain register if it is available. Otherwise, we
8836 use DI for i386 and R13 for x86-64. We chose R13 since it has
8839 Return: the regno of chosen register. */
8842 find_drap_reg (void)
8844 tree decl = cfun->decl;
8848 /* Use R13 for nested function or function need static chain.
8849 Since function with tail call may use any caller-saved
8850 registers in epilogue, DRAP must not use caller-saved
8851 register in such case. */
8852 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8859 /* Use DI for nested function or function need static chain.
8860 Since function with tail call may use any caller-saved
8861 registers in epilogue, DRAP must not use caller-saved
8862 register in such case. */
8863 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8866 /* Reuse static chain register if it isn't used for parameter
8868 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8869 && !lookup_attribute ("fastcall",
8870 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8871 && !lookup_attribute ("thiscall",
8872 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8879 /* Return minimum incoming stack alignment. */
8882 ix86_minimum_incoming_stack_boundary (bool sibcall)
8884 unsigned int incoming_stack_boundary;
8886 /* Prefer the one specified at command line. */
8887 if (ix86_user_incoming_stack_boundary)
8888 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8889 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8890 if -mstackrealign is used, it isn't used for sibcall check and
8891 estimated stack alignment is 128bit. */
8894 && ix86_force_align_arg_pointer
8895 && crtl->stack_alignment_estimated == 128)
8896 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8898 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8900 /* Incoming stack alignment can be changed on individual functions
8901 via force_align_arg_pointer attribute. We use the smallest
8902 incoming stack boundary. */
8903 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8904 && lookup_attribute (ix86_force_align_arg_pointer_string,
8905 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8906 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8908 /* The incoming stack frame has to be aligned at least at
8909 parm_stack_boundary. */
8910 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8911 incoming_stack_boundary = crtl->parm_stack_boundary;
8913 /* Stack at entrance of main is aligned by runtime. We use the
8914 smallest incoming stack boundary. */
8915 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8916 && DECL_NAME (current_function_decl)
8917 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8918 && DECL_FILE_SCOPE_P (current_function_decl))
8919 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8921 return incoming_stack_boundary;
8924 /* Update incoming stack boundary and estimated stack alignment. */
8927 ix86_update_stack_boundary (void)
8929 ix86_incoming_stack_boundary
8930 = ix86_minimum_incoming_stack_boundary (false);
8932 /* x86_64 vararg needs 16byte stack alignment for register save
8936 && crtl->stack_alignment_estimated < 128)
8937 crtl->stack_alignment_estimated = 128;
8940 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8941 needed or an rtx for DRAP otherwise. */
8944 ix86_get_drap_rtx (void)
8946 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8947 crtl->need_drap = true;
8949 if (stack_realign_drap)
8951 /* Assign DRAP to vDRAP and returns vDRAP */
8952 unsigned int regno = find_drap_reg ();
8957 arg_ptr = gen_rtx_REG (Pmode, regno);
8958 crtl->drap_reg = arg_ptr;
8961 drap_vreg = copy_to_reg (arg_ptr);
8965 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8968 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8969 RTX_FRAME_RELATED_P (insn) = 1;
8977 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8980 ix86_internal_arg_pointer (void)
8982 return virtual_incoming_args_rtx;
8985 struct scratch_reg {
8990 /* Return a short-lived scratch register for use on function entry.
8991 In 32-bit mode, it is valid only after the registers are saved
8992 in the prologue. This register must be released by means of
8993 release_scratch_register_on_entry once it is dead. */
8996 get_scratch_register_on_entry (struct scratch_reg *sr)
9004 /* We always use R11 in 64-bit mode. */
9009 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9011 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9012 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9013 int regparm = ix86_function_regparm (fntype, decl);
9015 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9017 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9018 for the static chain register. */
9019 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9020 && drap_regno != AX_REG)
9022 else if (regparm < 2 && drap_regno != DX_REG)
9024 /* ecx is the static chain register. */
9025 else if (regparm < 3 && !fastcall_p && !static_chain_p
9026 && drap_regno != CX_REG)
9028 else if (ix86_save_reg (BX_REG, true))
9030 /* esi is the static chain register. */
9031 else if (!(regparm == 3 && static_chain_p)
9032 && ix86_save_reg (SI_REG, true))
9034 else if (ix86_save_reg (DI_REG, true))
9038 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9043 sr->reg = gen_rtx_REG (Pmode, regno);
9046 rtx insn = emit_insn (gen_push (sr->reg));
9047 RTX_FRAME_RELATED_P (insn) = 1;
9051 /* Release a scratch register obtained from the preceding function. */
9054 release_scratch_register_on_entry (struct scratch_reg *sr)
9058 rtx x, insn = emit_insn (ix86_gen_pop1 (sr->reg));
9060 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9061 RTX_FRAME_RELATED_P (insn) = 1;
9062 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9063 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9064 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9068 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9070 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9073 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9075 /* We skip the probe for the first interval + a small dope of 4 words and
9076 probe that many bytes past the specified size to maintain a protection
9077 area at the botton of the stack. */
9078 const int dope = 4 * UNITS_PER_WORD;
9079 rtx size_rtx = GEN_INT (size);
9081 /* See if we have a constant small number of probes to generate. If so,
9082 that's the easy case. The run-time loop is made up of 11 insns in the
9083 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9084 for n # of intervals. */
9085 if (size <= 5 * PROBE_INTERVAL)
9087 HOST_WIDE_INT i, adjust;
9088 bool first_probe = true;
9090 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9091 values of N from 1 until it exceeds SIZE. If only one probe is
9092 needed, this will not generate any code. Then adjust and probe
9093 to PROBE_INTERVAL + SIZE. */
9094 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9098 adjust = 2 * PROBE_INTERVAL + dope;
9099 first_probe = false;
9102 adjust = PROBE_INTERVAL;
9104 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9105 plus_constant (stack_pointer_rtx, -adjust)));
9106 emit_stack_probe (stack_pointer_rtx);
9110 adjust = size + PROBE_INTERVAL + dope;
9112 adjust = size + PROBE_INTERVAL - i;
9114 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9115 plus_constant (stack_pointer_rtx, -adjust)));
9116 emit_stack_probe (stack_pointer_rtx);
9118 /* Adjust back to account for the additional first interval. */
9119 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9120 plus_constant (stack_pointer_rtx,
9121 PROBE_INTERVAL + dope)));
9124 /* Otherwise, do the same as above, but in a loop. Note that we must be
9125 extra careful with variables wrapping around because we might be at
9126 the very top (or the very bottom) of the address space and we have
9127 to be able to handle this case properly; in particular, we use an
9128 equality test for the loop condition. */
9131 HOST_WIDE_INT rounded_size;
9132 struct scratch_reg sr;
9134 get_scratch_register_on_entry (&sr);
9137 /* Step 1: round SIZE to the previous multiple of the interval. */
9139 rounded_size = size & -PROBE_INTERVAL;
9142 /* Step 2: compute initial and final value of the loop counter. */
9144 /* SP = SP_0 + PROBE_INTERVAL. */
9145 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9146 plus_constant (stack_pointer_rtx,
9147 - (PROBE_INTERVAL + dope))));
9149 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9150 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9151 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9152 gen_rtx_PLUS (Pmode, sr.reg,
9153 stack_pointer_rtx)));
9158 while (SP != LAST_ADDR)
9160 SP = SP + PROBE_INTERVAL
9164 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9165 values of N from 1 until it is equal to ROUNDED_SIZE. */
9167 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9170 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9171 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9173 if (size != rounded_size)
9175 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9176 plus_constant (stack_pointer_rtx,
9177 rounded_size - size)));
9178 emit_stack_probe (stack_pointer_rtx);
9181 /* Adjust back to account for the additional first interval. */
9182 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9183 plus_constant (stack_pointer_rtx,
9184 PROBE_INTERVAL + dope)));
9186 release_scratch_register_on_entry (&sr);
9189 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9190 cfun->machine->fs.sp_offset += size;
9192 /* Make sure nothing is scheduled before we are done. */
9193 emit_insn (gen_blockage ());
9196 /* Adjust the stack pointer up to REG while probing it. */
9199 output_adjust_stack_and_probe (rtx reg)
9201 static int labelno = 0;
9202 char loop_lab[32], end_lab[32];
9205 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9206 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9208 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9210 /* Jump to END_LAB if SP == LAST_ADDR. */
9211 xops[0] = stack_pointer_rtx;
9213 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9214 fputs ("\tje\t", asm_out_file);
9215 assemble_name_raw (asm_out_file, end_lab);
9216 fputc ('\n', asm_out_file);
9218 /* SP = SP + PROBE_INTERVAL. */
9219 xops[1] = GEN_INT (PROBE_INTERVAL);
9220 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9223 xops[1] = const0_rtx;
9224 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9226 fprintf (asm_out_file, "\tjmp\t");
9227 assemble_name_raw (asm_out_file, loop_lab);
9228 fputc ('\n', asm_out_file);
9230 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9235 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9236 inclusive. These are offsets from the current stack pointer. */
9239 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9241 /* See if we have a constant small number of probes to generate. If so,
9242 that's the easy case. The run-time loop is made up of 7 insns in the
9243 generic case while the compile-time loop is made up of n insns for n #
9245 if (size <= 7 * PROBE_INTERVAL)
9249 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9250 it exceeds SIZE. If only one probe is needed, this will not
9251 generate any code. Then probe at FIRST + SIZE. */
9252 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9253 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9255 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9258 /* Otherwise, do the same as above, but in a loop. Note that we must be
9259 extra careful with variables wrapping around because we might be at
9260 the very top (or the very bottom) of the address space and we have
9261 to be able to handle this case properly; in particular, we use an
9262 equality test for the loop condition. */
9265 HOST_WIDE_INT rounded_size, last;
9266 struct scratch_reg sr;
9268 get_scratch_register_on_entry (&sr);
9271 /* Step 1: round SIZE to the previous multiple of the interval. */
9273 rounded_size = size & -PROBE_INTERVAL;
9276 /* Step 2: compute initial and final value of the loop counter. */
9278 /* TEST_OFFSET = FIRST. */
9279 emit_move_insn (sr.reg, GEN_INT (-first));
9281 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9282 last = first + rounded_size;
9287 while (TEST_ADDR != LAST_ADDR)
9289 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9293 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9294 until it is equal to ROUNDED_SIZE. */
9296 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9299 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9300 that SIZE is equal to ROUNDED_SIZE. */
9302 if (size != rounded_size)
9303 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9306 rounded_size - size));
9308 release_scratch_register_on_entry (&sr);
9311 /* Make sure nothing is scheduled before we are done. */
9312 emit_insn (gen_blockage ());
9315 /* Probe a range of stack addresses from REG to END, inclusive. These are
9316 offsets from the current stack pointer. */
9319 output_probe_stack_range (rtx reg, rtx end)
9321 static int labelno = 0;
9322 char loop_lab[32], end_lab[32];
9325 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9326 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9328 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9330 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9333 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9334 fputs ("\tje\t", asm_out_file);
9335 assemble_name_raw (asm_out_file, end_lab);
9336 fputc ('\n', asm_out_file);
9338 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9339 xops[1] = GEN_INT (PROBE_INTERVAL);
9340 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9342 /* Probe at TEST_ADDR. */
9343 xops[0] = stack_pointer_rtx;
9345 xops[2] = const0_rtx;
9346 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9348 fprintf (asm_out_file, "\tjmp\t");
9349 assemble_name_raw (asm_out_file, loop_lab);
9350 fputc ('\n', asm_out_file);
9352 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9357 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9358 to be generated in correct form. */
9360 ix86_finalize_stack_realign_flags (void)
9362 /* Check if stack realign is really needed after reload, and
9363 stores result in cfun */
9364 unsigned int incoming_stack_boundary
9365 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9366 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9367 unsigned int stack_realign = (incoming_stack_boundary
9368 < (current_function_is_leaf
9369 ? crtl->max_used_stack_slot_alignment
9370 : crtl->stack_alignment_needed));
9372 if (crtl->stack_realign_finalized)
9374 /* After stack_realign_needed is finalized, we can't no longer
9376 gcc_assert (crtl->stack_realign_needed == stack_realign);
9380 crtl->stack_realign_needed = stack_realign;
9381 crtl->stack_realign_finalized = true;
9385 /* Expand the prologue into a bunch of separate insns. */
9388 ix86_expand_prologue (void)
9390 struct machine_function *m = cfun->machine;
9393 struct ix86_frame frame;
9394 HOST_WIDE_INT allocate;
9395 bool int_registers_saved;
9397 ix86_finalize_stack_realign_flags ();
9399 /* DRAP should not coexist with stack_realign_fp */
9400 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9402 memset (&m->fs, 0, sizeof (m->fs));
9404 /* Initialize CFA state for before the prologue. */
9405 m->fs.cfa_reg = stack_pointer_rtx;
9406 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9408 /* Track SP offset to the CFA. We continue tracking this after we've
9409 swapped the CFA register away from SP. In the case of re-alignment
9410 this is fudged; we're interested to offsets within the local frame. */
9411 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9412 m->fs.sp_valid = true;
9414 ix86_compute_frame_layout (&frame);
9416 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9418 /* We should have already generated an error for any use of
9419 ms_hook on a nested function. */
9420 gcc_checking_assert (!ix86_static_chain_on_stack);
9422 /* Check if profiling is active and we shall use profiling before
9423 prologue variant. If so sorry. */
9424 if (crtl->profile && flag_fentry != 0)
9425 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9427 /* In ix86_asm_output_function_label we emitted:
9428 8b ff movl.s %edi,%edi
9430 8b ec movl.s %esp,%ebp
9432 This matches the hookable function prologue in Win32 API
9433 functions in Microsoft Windows XP Service Pack 2 and newer.
9434 Wine uses this to enable Windows apps to hook the Win32 API
9435 functions provided by Wine.
9437 What that means is that we've already set up the frame pointer. */
9439 if (frame_pointer_needed
9440 && !(crtl->drap_reg && crtl->stack_realign_needed))
9444 /* We've decided to use the frame pointer already set up.
9445 Describe this to the unwinder by pretending that both
9446 push and mov insns happen right here.
9448 Putting the unwind info here at the end of the ms_hook
9449 is done so that we can make absolutely certain we get
9450 the required byte sequence at the start of the function,
9451 rather than relying on an assembler that can produce
9452 the exact encoding required.
9454 However it does mean (in the unpatched case) that we have
9455 a 1 insn window where the asynchronous unwind info is
9456 incorrect. However, if we placed the unwind info at
9457 its correct location we would have incorrect unwind info
9458 in the patched case. Which is probably all moot since
9459 I don't expect Wine generates dwarf2 unwind info for the
9460 system libraries that use this feature. */
9462 insn = emit_insn (gen_blockage ());
9464 push = gen_push (hard_frame_pointer_rtx);
9465 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9467 RTX_FRAME_RELATED_P (push) = 1;
9468 RTX_FRAME_RELATED_P (mov) = 1;
9470 RTX_FRAME_RELATED_P (insn) = 1;
9471 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9472 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9474 /* Note that gen_push incremented m->fs.cfa_offset, even
9475 though we didn't emit the push insn here. */
9476 m->fs.cfa_reg = hard_frame_pointer_rtx;
9477 m->fs.fp_offset = m->fs.cfa_offset;
9478 m->fs.fp_valid = true;
9482 /* The frame pointer is not needed so pop %ebp again.
9483 This leaves us with a pristine state. */
9484 emit_insn (ix86_gen_pop1 (hard_frame_pointer_rtx));
9488 /* The first insn of a function that accepts its static chain on the
9489 stack is to push the register that would be filled in by a direct
9490 call. This insn will be skipped by the trampoline. */
9491 else if (ix86_static_chain_on_stack)
9493 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9494 emit_insn (gen_blockage ());
9496 /* We don't want to interpret this push insn as a register save,
9497 only as a stack adjustment. The real copy of the register as
9498 a save will be done later, if needed. */
9499 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9500 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9501 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9502 RTX_FRAME_RELATED_P (insn) = 1;
9505 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9506 of DRAP is needed and stack realignment is really needed after reload */
9507 if (stack_realign_drap)
9509 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9511 /* Only need to push parameter pointer reg if it is caller saved. */
9512 if (!call_used_regs[REGNO (crtl->drap_reg)])
9514 /* Push arg pointer reg */
9515 insn = emit_insn (gen_push (crtl->drap_reg));
9516 RTX_FRAME_RELATED_P (insn) = 1;
9519 /* Grab the argument pointer. */
9520 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9521 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9522 RTX_FRAME_RELATED_P (insn) = 1;
9523 m->fs.cfa_reg = crtl->drap_reg;
9524 m->fs.cfa_offset = 0;
9526 /* Align the stack. */
9527 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9529 GEN_INT (-align_bytes)));
9530 RTX_FRAME_RELATED_P (insn) = 1;
9532 /* Replicate the return address on the stack so that return
9533 address can be reached via (argp - 1) slot. This is needed
9534 to implement macro RETURN_ADDR_RTX and intrinsic function
9535 expand_builtin_return_addr etc. */
9536 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9537 t = gen_frame_mem (Pmode, t);
9538 insn = emit_insn (gen_push (t));
9539 RTX_FRAME_RELATED_P (insn) = 1;
9541 /* For the purposes of frame and register save area addressing,
9542 we've started over with a new frame. */
9543 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9544 m->fs.realigned = true;
9547 if (frame_pointer_needed && !m->fs.fp_valid)
9549 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9550 slower on all targets. Also sdb doesn't like it. */
9551 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9552 RTX_FRAME_RELATED_P (insn) = 1;
9554 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9555 RTX_FRAME_RELATED_P (insn) = 1;
9557 if (m->fs.cfa_reg == stack_pointer_rtx)
9558 m->fs.cfa_reg = hard_frame_pointer_rtx;
9559 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9560 m->fs.fp_offset = m->fs.sp_offset;
9561 m->fs.fp_valid = true;
9564 int_registers_saved = (frame.nregs == 0);
9566 if (!int_registers_saved)
9568 /* If saving registers via PUSH, do so now. */
9569 if (!frame.save_regs_using_mov)
9571 ix86_emit_save_regs ();
9572 int_registers_saved = true;
9573 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9576 /* When using red zone we may start register saving before allocating
9577 the stack frame saving one cycle of the prologue. However, avoid
9578 doing this if we have to probe the stack; at least on x86_64 the
9579 stack probe can turn into a call that clobbers a red zone location. */
9580 else if (ix86_using_red_zone ()
9581 && (! TARGET_STACK_PROBE
9582 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9584 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9585 int_registers_saved = true;
9589 if (stack_realign_fp)
9591 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9592 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9594 /* The computation of the size of the re-aligned stack frame means
9595 that we must allocate the size of the register save area before
9596 performing the actual alignment. Otherwise we cannot guarantee
9597 that there's enough storage above the realignment point. */
9598 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9599 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9600 GEN_INT (m->fs.sp_offset
9601 - frame.sse_reg_save_offset),
9604 /* Align the stack. */
9605 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9607 GEN_INT (-align_bytes)));
9609 /* For the purposes of register save area addressing, the stack
9610 pointer is no longer valid. As for the value of sp_offset,
9611 see ix86_compute_frame_layout, which we need to match in order
9612 to pass verification of stack_pointer_offset at the end. */
9613 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9614 m->fs.sp_valid = false;
9617 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9619 /* The stack has already been decremented by the instruction calling us
9620 so we need to probe unconditionally to preserve the protection area. */
9621 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9623 /* We expect the registers to be saved when probes are used. */
9624 gcc_assert (int_registers_saved);
9626 if (STACK_CHECK_MOVING_SP)
9628 ix86_adjust_stack_and_probe (allocate);
9633 HOST_WIDE_INT size = allocate;
9635 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9636 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9638 if (TARGET_STACK_PROBE)
9639 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9641 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9647 else if (!ix86_target_stack_probe ()
9648 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9650 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9651 GEN_INT (-allocate), -1,
9652 m->fs.cfa_reg == stack_pointer_rtx);
9656 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9659 if (cfun->machine->call_abi == MS_ABI)
9662 eax_live = ix86_eax_live_at_start_p ();
9666 emit_insn (gen_push (eax));
9667 allocate -= UNITS_PER_WORD;
9670 emit_move_insn (eax, GEN_INT (allocate));
9672 insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9674 if (m->fs.cfa_reg == stack_pointer_rtx)
9676 m->fs.cfa_offset += allocate;
9677 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
9678 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9679 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9680 RTX_FRAME_RELATED_P (insn) = 1;
9682 m->fs.sp_offset += allocate;
9686 t = choose_baseaddr (m->fs.sp_offset - allocate);
9687 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9690 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9692 if (!int_registers_saved)
9693 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9695 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9697 pic_reg_used = false;
9698 if (pic_offset_table_rtx
9699 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9702 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9704 if (alt_pic_reg_used != INVALID_REGNUM)
9705 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9707 pic_reg_used = true;
9714 if (ix86_cmodel == CM_LARGE_PIC)
9716 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9717 rtx label = gen_label_rtx ();
9719 LABEL_PRESERVE_P (label) = 1;
9720 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9721 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9722 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9723 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9724 pic_offset_table_rtx, tmp_reg));
9727 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9730 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9733 /* In the pic_reg_used case, make sure that the got load isn't deleted
9734 when mcount needs it. Blockage to avoid call movement across mcount
9735 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9737 if (crtl->profile && !flag_fentry && pic_reg_used)
9738 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9740 if (crtl->drap_reg && !crtl->stack_realign_needed)
9742 /* vDRAP is setup but after reload it turns out stack realign
9743 isn't necessary, here we will emit prologue to setup DRAP
9744 without stack realign adjustment */
9745 t = choose_baseaddr (0);
9746 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9749 /* Prevent instructions from being scheduled into register save push
9750 sequence when access to the redzone area is done through frame pointer.
9751 The offset between the frame pointer and the stack pointer is calculated
9752 relative to the value of the stack pointer at the end of the function
9753 prologue, and moving instructions that access redzone area via frame
9754 pointer inside push sequence violates this assumption. */
9755 if (frame_pointer_needed && frame.red_zone_size)
9756 emit_insn (gen_memory_blockage ());
9758 /* Emit cld instruction if stringops are used in the function. */
9759 if (TARGET_CLD && ix86_current_function_needs_cld)
9760 emit_insn (gen_cld ());
9763 /* Emit code to restore REG using a POP insn. */
9766 ix86_emit_restore_reg_using_pop (rtx reg)
9768 struct machine_function *m = cfun->machine;
9769 rtx insn = emit_insn (ix86_gen_pop1 (reg));
9771 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9772 m->fs.sp_offset -= UNITS_PER_WORD;
9774 if (m->fs.cfa_reg == crtl->drap_reg
9775 && REGNO (reg) == REGNO (crtl->drap_reg))
9777 /* Previously we'd represented the CFA as an expression
9778 like *(%ebp - 8). We've just popped that value from
9779 the stack, which means we need to reset the CFA to
9780 the drap register. This will remain until we restore
9781 the stack pointer. */
9782 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9783 RTX_FRAME_RELATED_P (insn) = 1;
9785 /* This means that the DRAP register is valid for addressing too. */
9786 m->fs.drap_valid = true;
9790 if (m->fs.cfa_reg == stack_pointer_rtx)
9792 m->fs.cfa_offset -= UNITS_PER_WORD;
9793 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9794 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9795 RTX_FRAME_RELATED_P (insn) = 1;
9798 /* When the frame pointer is the CFA, and we pop it, we are
9799 swapping back to the stack pointer as the CFA. This happens
9800 for stack frames that don't allocate other data, so we assume
9801 the stack pointer is now pointing at the return address, i.e.
9802 the function entry state, which makes the offset be 1 word. */
9803 if (reg == hard_frame_pointer_rtx)
9805 m->fs.fp_valid = false;
9806 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9808 m->fs.cfa_reg = stack_pointer_rtx;
9809 m->fs.cfa_offset -= UNITS_PER_WORD;
9811 add_reg_note (insn, REG_CFA_DEF_CFA,
9812 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9813 GEN_INT (m->fs.cfa_offset)));
9814 RTX_FRAME_RELATED_P (insn) = 1;
9819 /* Emit code to restore saved registers using POP insns. */
9822 ix86_emit_restore_regs_using_pop (void)
9826 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9827 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9828 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9831 /* Emit code and notes for the LEAVE instruction. */
9834 ix86_emit_leave (void)
9836 struct machine_function *m = cfun->machine;
9837 rtx insn = emit_insn (ix86_gen_leave ());
9839 ix86_add_queued_cfa_restore_notes (insn);
9841 gcc_assert (m->fs.fp_valid);
9842 m->fs.sp_valid = true;
9843 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9844 m->fs.fp_valid = false;
9846 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9848 m->fs.cfa_reg = stack_pointer_rtx;
9849 m->fs.cfa_offset = m->fs.sp_offset;
9851 add_reg_note (insn, REG_CFA_DEF_CFA,
9852 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
9853 RTX_FRAME_RELATED_P (insn) = 1;
9854 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9859 /* Emit code to restore saved registers using MOV insns.
9860 First register is restored from CFA - CFA_OFFSET. */
9862 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9863 int maybe_eh_return)
9865 struct machine_function *m = cfun->machine;
9868 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9869 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9871 rtx reg = gen_rtx_REG (Pmode, regno);
9874 mem = choose_baseaddr (cfa_offset);
9875 mem = gen_frame_mem (Pmode, mem);
9876 insn = emit_move_insn (reg, mem);
9878 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9880 /* Previously we'd represented the CFA as an expression
9881 like *(%ebp - 8). We've just popped that value from
9882 the stack, which means we need to reset the CFA to
9883 the drap register. This will remain until we restore
9884 the stack pointer. */
9885 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9886 RTX_FRAME_RELATED_P (insn) = 1;
9888 /* This means that the DRAP register is valid for addressing. */
9889 m->fs.drap_valid = true;
9892 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
9894 cfa_offset -= UNITS_PER_WORD;
9898 /* Emit code to restore saved registers using MOV insns.
9899 First register is restored from CFA - CFA_OFFSET. */
9901 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9902 int maybe_eh_return)
9906 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9907 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9909 rtx reg = gen_rtx_REG (V4SFmode, regno);
9912 mem = choose_baseaddr (cfa_offset);
9913 mem = gen_rtx_MEM (V4SFmode, mem);
9914 set_mem_align (mem, 128);
9915 emit_move_insn (reg, mem);
9917 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
9923 /* Restore function stack, frame, and registers. */
9926 ix86_expand_epilogue (int style)
9928 struct machine_function *m = cfun->machine;
9929 struct machine_frame_state frame_state_save = m->fs;
9930 struct ix86_frame frame;
9931 bool restore_regs_via_mov;
9934 ix86_finalize_stack_realign_flags ();
9935 ix86_compute_frame_layout (&frame);
9937 m->fs.sp_valid = (!frame_pointer_needed
9938 || (current_function_sp_is_unchanging
9939 && !stack_realign_fp));
9940 gcc_assert (!m->fs.sp_valid
9941 || m->fs.sp_offset == frame.stack_pointer_offset);
9943 /* The FP must be valid if the frame pointer is present. */
9944 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9945 gcc_assert (!m->fs.fp_valid
9946 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9948 /* We must have *some* valid pointer to the stack frame. */
9949 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9951 /* The DRAP is never valid at this point. */
9952 gcc_assert (!m->fs.drap_valid);
9954 /* See the comment about red zone and frame
9955 pointer usage in ix86_expand_prologue. */
9956 if (frame_pointer_needed && frame.red_zone_size)
9957 emit_insn (gen_memory_blockage ());
9959 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9960 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
9962 /* Determine the CFA offset of the end of the red-zone. */
9963 m->fs.red_zone_offset = 0;
9964 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
9966 /* The red-zone begins below the return address. */
9967 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
9969 /* When the register save area is in the aligned portion of
9970 the stack, determine the maximum runtime displacement that
9971 matches up with the aligned frame. */
9972 if (stack_realign_drap)
9973 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
9977 /* Special care must be taken for the normal return case of a function
9978 using eh_return: the eax and edx registers are marked as saved, but
9979 not restored along this path. Adjust the save location to match. */
9980 if (crtl->calls_eh_return && style != 2)
9981 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
9983 /* If we're only restoring one register and sp is not valid then
9984 using a move instruction to restore the register since it's
9985 less work than reloading sp and popping the register. */
9986 if (!m->fs.sp_valid && frame.nregs <= 1)
9987 restore_regs_via_mov = true;
9988 /* EH_RETURN requires the use of moves to function properly. */
9989 else if (crtl->calls_eh_return)
9990 restore_regs_via_mov = true;
9991 else if (TARGET_EPILOGUE_USING_MOVE
9992 && cfun->machine->use_fast_prologue_epilogue
9994 || m->fs.sp_offset != frame.reg_save_offset))
9995 restore_regs_via_mov = true;
9996 else if (frame_pointer_needed
9998 && m->fs.sp_offset != frame.reg_save_offset)
9999 restore_regs_via_mov = true;
10000 else if (frame_pointer_needed
10001 && TARGET_USE_LEAVE
10002 && cfun->machine->use_fast_prologue_epilogue
10003 && frame.nregs == 1)
10004 restore_regs_via_mov = true;
10006 restore_regs_via_mov = false;
10008 if (restore_regs_via_mov || frame.nsseregs)
10010 /* Ensure that the entire register save area is addressable via
10011 the stack pointer, if we will restore via sp. */
10013 && m->fs.sp_offset > 0x7fffffff
10014 && !(m->fs.fp_valid || m->fs.drap_valid)
10015 && (frame.nsseregs + frame.nregs) != 0)
10017 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10018 GEN_INT (m->fs.sp_offset
10019 - frame.sse_reg_save_offset),
10021 m->fs.cfa_reg == stack_pointer_rtx);
10025 /* If there are any SSE registers to restore, then we have to do it
10026 via moves, since there's obviously no pop for SSE regs. */
10027 if (frame.nsseregs)
10028 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10031 if (restore_regs_via_mov)
10036 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10038 /* eh_return epilogues need %ecx added to the stack pointer. */
10041 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10043 /* Stack align doesn't work with eh_return. */
10044 gcc_assert (!stack_realign_drap);
10045 /* Neither does regparm nested functions. */
10046 gcc_assert (!ix86_static_chain_on_stack);
10048 if (frame_pointer_needed)
10050 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10051 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10052 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10054 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10055 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10057 /* Note that we use SA as a temporary CFA, as the return
10058 address is at the proper place relative to it. We
10059 pretend this happens at the FP restore insn because
10060 prior to this insn the FP would be stored at the wrong
10061 offset relative to SA, and after this insn we have no
10062 other reasonable register to use for the CFA. We don't
10063 bother resetting the CFA to the SP for the duration of
10064 the return insn. */
10065 add_reg_note (insn, REG_CFA_DEF_CFA,
10066 plus_constant (sa, UNITS_PER_WORD));
10067 ix86_add_queued_cfa_restore_notes (insn);
10068 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10069 RTX_FRAME_RELATED_P (insn) = 1;
10071 m->fs.cfa_reg = sa;
10072 m->fs.cfa_offset = UNITS_PER_WORD;
10073 m->fs.fp_valid = false;
10075 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10076 const0_rtx, style, false);
10080 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10081 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10082 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10083 ix86_add_queued_cfa_restore_notes (insn);
10085 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10086 if (m->fs.cfa_offset != UNITS_PER_WORD)
10088 m->fs.cfa_offset = UNITS_PER_WORD;
10089 add_reg_note (insn, REG_CFA_DEF_CFA,
10090 plus_constant (stack_pointer_rtx,
10092 RTX_FRAME_RELATED_P (insn) = 1;
10095 m->fs.sp_offset = UNITS_PER_WORD;
10100 /* First step is to deallocate the stack frame so that we can
10101 pop the registers. */
10102 if (!m->fs.sp_valid)
10104 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10105 GEN_INT (m->fs.fp_offset
10106 - frame.reg_save_offset),
10109 else if (m->fs.sp_offset != frame.reg_save_offset)
10111 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10112 GEN_INT (m->fs.sp_offset
10113 - frame.reg_save_offset),
10115 m->fs.cfa_reg == stack_pointer_rtx);
10118 ix86_emit_restore_regs_using_pop ();
10121 /* If we used a stack pointer and haven't already got rid of it,
10123 if (m->fs.fp_valid)
10125 /* If the stack pointer is valid and pointing at the frame
10126 pointer store address, then we only need a pop. */
10127 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10128 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10129 /* Leave results in shorter dependency chains on CPUs that are
10130 able to grok it fast. */
10131 else if (TARGET_USE_LEAVE
10132 || optimize_function_for_size_p (cfun)
10133 || !cfun->machine->use_fast_prologue_epilogue)
10134 ix86_emit_leave ();
10137 pro_epilogue_adjust_stack (stack_pointer_rtx,
10138 hard_frame_pointer_rtx,
10139 const0_rtx, style, !using_drap);
10140 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10146 int param_ptr_offset = UNITS_PER_WORD;
10149 gcc_assert (stack_realign_drap);
10151 if (ix86_static_chain_on_stack)
10152 param_ptr_offset += UNITS_PER_WORD;
10153 if (!call_used_regs[REGNO (crtl->drap_reg)])
10154 param_ptr_offset += UNITS_PER_WORD;
10156 insn = emit_insn (gen_rtx_SET
10157 (VOIDmode, stack_pointer_rtx,
10158 gen_rtx_PLUS (Pmode,
10160 GEN_INT (-param_ptr_offset))));
10161 m->fs.cfa_reg = stack_pointer_rtx;
10162 m->fs.cfa_offset = param_ptr_offset;
10163 m->fs.sp_offset = param_ptr_offset;
10164 m->fs.realigned = false;
10166 add_reg_note (insn, REG_CFA_DEF_CFA,
10167 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10168 GEN_INT (param_ptr_offset)));
10169 RTX_FRAME_RELATED_P (insn) = 1;
10171 if (!call_used_regs[REGNO (crtl->drap_reg)])
10172 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10175 /* At this point the stack pointer must be valid, and we must have
10176 restored all of the registers. We may not have deallocated the
10177 entire stack frame. We've delayed this until now because it may
10178 be possible to merge the local stack deallocation with the
10179 deallocation forced by ix86_static_chain_on_stack. */
10180 gcc_assert (m->fs.sp_valid);
10181 gcc_assert (!m->fs.fp_valid);
10182 gcc_assert (!m->fs.realigned);
10183 if (m->fs.sp_offset != UNITS_PER_WORD)
10185 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10186 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10190 /* Sibcall epilogues don't want a return instruction. */
10193 m->fs = frame_state_save;
10197 if (crtl->args.pops_args && crtl->args.size)
10199 rtx popc = GEN_INT (crtl->args.pops_args);
10201 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10202 address, do explicit add, and jump indirectly to the caller. */
10204 if (crtl->args.pops_args >= 65536)
10206 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10209 /* There is no "pascal" calling convention in any 64bit ABI. */
10210 gcc_assert (!TARGET_64BIT);
10212 insn = emit_insn (gen_popsi1 (ecx));
10213 m->fs.cfa_offset -= UNITS_PER_WORD;
10214 m->fs.sp_offset -= UNITS_PER_WORD;
10216 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10217 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10218 add_reg_note (insn, REG_CFA_REGISTER,
10219 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10220 RTX_FRAME_RELATED_P (insn) = 1;
10222 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10224 emit_jump_insn (gen_return_indirect_internal (ecx));
10227 emit_jump_insn (gen_return_pop_internal (popc));
10230 emit_jump_insn (gen_return_internal ());
10232 /* Restore the state back to the state from the prologue,
10233 so that it's correct for the next epilogue. */
10234 m->fs = frame_state_save;
10237 /* Reset from the function's potential modifications. */
10240 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10241 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10243 if (pic_offset_table_rtx)
10244 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10246 /* Mach-O doesn't support labels at the end of objects, so if
10247 it looks like we might want one, insert a NOP. */
10249 rtx insn = get_last_insn ();
10252 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10253 insn = PREV_INSN (insn);
10257 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10258 fputs ("\tnop\n", file);
10264 /* Extract the parts of an RTL expression that is a valid memory address
10265 for an instruction. Return 0 if the structure of the address is
10266 grossly off. Return -1 if the address contains ASHIFT, so it is not
10267 strictly valid, but still used for computing length of lea instruction. */
10270 ix86_decompose_address (rtx addr, struct ix86_address *out)
10272 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10273 rtx base_reg, index_reg;
10274 HOST_WIDE_INT scale = 1;
10275 rtx scale_rtx = NULL_RTX;
10278 enum ix86_address_seg seg = SEG_DEFAULT;
10280 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10282 else if (GET_CODE (addr) == PLUS)
10284 rtx addends[4], op;
10292 addends[n++] = XEXP (op, 1);
10295 while (GET_CODE (op) == PLUS);
10300 for (i = n; i >= 0; --i)
10303 switch (GET_CODE (op))
10308 index = XEXP (op, 0);
10309 scale_rtx = XEXP (op, 1);
10315 index = XEXP (op, 0);
10316 tmp = XEXP (op, 1);
10317 if (!CONST_INT_P (tmp))
10319 scale = INTVAL (tmp);
10320 if ((unsigned HOST_WIDE_INT) scale > 3)
10322 scale = 1 << scale;
10326 if (XINT (op, 1) == UNSPEC_TP
10327 && TARGET_TLS_DIRECT_SEG_REFS
10328 && seg == SEG_DEFAULT)
10329 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10358 else if (GET_CODE (addr) == MULT)
10360 index = XEXP (addr, 0); /* index*scale */
10361 scale_rtx = XEXP (addr, 1);
10363 else if (GET_CODE (addr) == ASHIFT)
10365 /* We're called for lea too, which implements ashift on occasion. */
10366 index = XEXP (addr, 0);
10367 tmp = XEXP (addr, 1);
10368 if (!CONST_INT_P (tmp))
10370 scale = INTVAL (tmp);
10371 if ((unsigned HOST_WIDE_INT) scale > 3)
10373 scale = 1 << scale;
10377 disp = addr; /* displacement */
10379 /* Extract the integral value of scale. */
10382 if (!CONST_INT_P (scale_rtx))
10384 scale = INTVAL (scale_rtx);
10387 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10388 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10390 /* Avoid useless 0 displacement. */
10391 if (disp == const0_rtx && (base || index))
10394 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10395 if (base_reg && index_reg && scale == 1
10396 && (index_reg == arg_pointer_rtx
10397 || index_reg == frame_pointer_rtx
10398 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10401 tmp = base, base = index, index = tmp;
10402 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10405 /* Special case: %ebp cannot be encoded as a base without a displacement.
10409 && (base_reg == hard_frame_pointer_rtx
10410 || base_reg == frame_pointer_rtx
10411 || base_reg == arg_pointer_rtx
10412 || (REG_P (base_reg)
10413 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10414 || REGNO (base_reg) == R13_REG))))
10417 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10418 Avoid this by transforming to [%esi+0].
10419 Reload calls address legitimization without cfun defined, so we need
10420 to test cfun for being non-NULL. */
10421 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10422 && base_reg && !index_reg && !disp
10423 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10426 /* Special case: encode reg+reg instead of reg*2. */
10427 if (!base && index && scale == 2)
10428 base = index, base_reg = index_reg, scale = 1;
10430 /* Special case: scaling cannot be encoded without base or displacement. */
10431 if (!base && !disp && index && scale != 1)
10435 out->index = index;
10437 out->scale = scale;
10443 /* Return cost of the memory address x.
10444 For i386, it is better to use a complex address than let gcc copy
10445 the address into a reg and make a new pseudo. But not if the address
10446 requires to two regs - that would mean more pseudos with longer
10449 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10451 struct ix86_address parts;
10453 int ok = ix86_decompose_address (x, &parts);
10457 if (parts.base && GET_CODE (parts.base) == SUBREG)
10458 parts.base = SUBREG_REG (parts.base);
10459 if (parts.index && GET_CODE (parts.index) == SUBREG)
10460 parts.index = SUBREG_REG (parts.index);
10462 /* Attempt to minimize number of registers in the address. */
10464 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10466 && (!REG_P (parts.index)
10467 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10471 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10473 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10474 && parts.base != parts.index)
10477 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10478 since it's predecode logic can't detect the length of instructions
10479 and it degenerates to vector decoded. Increase cost of such
10480 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10481 to split such addresses or even refuse such addresses at all.
10483 Following addressing modes are affected:
10488 The first and last case may be avoidable by explicitly coding the zero in
10489 memory address, but I don't have AMD-K6 machine handy to check this
10493 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10494 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10495 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10501 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10502 this is used for to form addresses to local data when -fPIC is in
10506 darwin_local_data_pic (rtx disp)
10508 return (GET_CODE (disp) == UNSPEC
10509 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10512 /* Determine if a given RTX is a valid constant. We already know this
10513 satisfies CONSTANT_P. */
10516 legitimate_constant_p (rtx x)
10518 switch (GET_CODE (x))
10523 if (GET_CODE (x) == PLUS)
10525 if (!CONST_INT_P (XEXP (x, 1)))
10530 if (TARGET_MACHO && darwin_local_data_pic (x))
10533 /* Only some unspecs are valid as "constants". */
10534 if (GET_CODE (x) == UNSPEC)
10535 switch (XINT (x, 1))
10538 case UNSPEC_GOTOFF:
10539 case UNSPEC_PLTOFF:
10540 return TARGET_64BIT;
10542 case UNSPEC_NTPOFF:
10543 x = XVECEXP (x, 0, 0);
10544 return (GET_CODE (x) == SYMBOL_REF
10545 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10546 case UNSPEC_DTPOFF:
10547 x = XVECEXP (x, 0, 0);
10548 return (GET_CODE (x) == SYMBOL_REF
10549 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10554 /* We must have drilled down to a symbol. */
10555 if (GET_CODE (x) == LABEL_REF)
10557 if (GET_CODE (x) != SYMBOL_REF)
10562 /* TLS symbols are never valid. */
10563 if (SYMBOL_REF_TLS_MODEL (x))
10566 /* DLLIMPORT symbols are never valid. */
10567 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10568 && SYMBOL_REF_DLLIMPORT_P (x))
10573 if (GET_MODE (x) == TImode
10574 && x != CONST0_RTX (TImode)
10580 if (!standard_sse_constant_p (x))
10587 /* Otherwise we handle everything else in the move patterns. */
10591 /* Determine if it's legal to put X into the constant pool. This
10592 is not possible for the address of thread-local symbols, which
10593 is checked above. */
10596 ix86_cannot_force_const_mem (rtx x)
10598 /* We can always put integral constants and vectors in memory. */
10599 switch (GET_CODE (x))
10609 return !legitimate_constant_p (x);
10613 /* Nonzero if the constant value X is a legitimate general operand
10614 when generating PIC code. It is given that flag_pic is on and
10615 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
10618 legitimate_pic_operand_p (rtx x)
10622 switch (GET_CODE (x))
10625 inner = XEXP (x, 0);
10626 if (GET_CODE (inner) == PLUS
10627 && CONST_INT_P (XEXP (inner, 1)))
10628 inner = XEXP (inner, 0);
10630 /* Only some unspecs are valid as "constants". */
10631 if (GET_CODE (inner) == UNSPEC)
10632 switch (XINT (inner, 1))
10635 case UNSPEC_GOTOFF:
10636 case UNSPEC_PLTOFF:
10637 return TARGET_64BIT;
10639 x = XVECEXP (inner, 0, 0);
10640 return (GET_CODE (x) == SYMBOL_REF
10641 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10642 case UNSPEC_MACHOPIC_OFFSET:
10643 return legitimate_pic_address_disp_p (x);
10651 return legitimate_pic_address_disp_p (x);
10658 /* Determine if a given CONST RTX is a valid memory displacement
10662 legitimate_pic_address_disp_p (rtx disp)
10666 /* In 64bit mode we can allow direct addresses of symbols and labels
10667 when they are not dynamic symbols. */
10670 rtx op0 = disp, op1;
10672 switch (GET_CODE (disp))
10678 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10680 op0 = XEXP (XEXP (disp, 0), 0);
10681 op1 = XEXP (XEXP (disp, 0), 1);
10682 if (!CONST_INT_P (op1)
10683 || INTVAL (op1) >= 16*1024*1024
10684 || INTVAL (op1) < -16*1024*1024)
10686 if (GET_CODE (op0) == LABEL_REF)
10688 if (GET_CODE (op0) != SYMBOL_REF)
10693 /* TLS references should always be enclosed in UNSPEC. */
10694 if (SYMBOL_REF_TLS_MODEL (op0))
10696 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
10697 && ix86_cmodel != CM_LARGE_PIC)
10705 if (GET_CODE (disp) != CONST)
10707 disp = XEXP (disp, 0);
10711 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10712 of GOT tables. We should not need these anyway. */
10713 if (GET_CODE (disp) != UNSPEC
10714 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10715 && XINT (disp, 1) != UNSPEC_GOTOFF
10716 && XINT (disp, 1) != UNSPEC_PLTOFF))
10719 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10720 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10726 if (GET_CODE (disp) == PLUS)
10728 if (!CONST_INT_P (XEXP (disp, 1)))
10730 disp = XEXP (disp, 0);
10734 if (TARGET_MACHO && darwin_local_data_pic (disp))
10737 if (GET_CODE (disp) != UNSPEC)
10740 switch (XINT (disp, 1))
10745 /* We need to check for both symbols and labels because VxWorks loads
10746 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10748 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10749 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10750 case UNSPEC_GOTOFF:
10751 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10752 While ABI specify also 32bit relocation but we don't produce it in
10753 small PIC model at all. */
10754 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10755 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10757 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10759 case UNSPEC_GOTTPOFF:
10760 case UNSPEC_GOTNTPOFF:
10761 case UNSPEC_INDNTPOFF:
10764 disp = XVECEXP (disp, 0, 0);
10765 return (GET_CODE (disp) == SYMBOL_REF
10766 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10767 case UNSPEC_NTPOFF:
10768 disp = XVECEXP (disp, 0, 0);
10769 return (GET_CODE (disp) == SYMBOL_REF
10770 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10771 case UNSPEC_DTPOFF:
10772 disp = XVECEXP (disp, 0, 0);
10773 return (GET_CODE (disp) == SYMBOL_REF
10774 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10780 /* Recognizes RTL expressions that are valid memory addresses for an
10781 instruction. The MODE argument is the machine mode for the MEM
10782 expression that wants to use this address.
10784 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10785 convert common non-canonical forms to canonical form so that they will
10789 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
10790 rtx addr, bool strict)
10792 struct ix86_address parts;
10793 rtx base, index, disp;
10794 HOST_WIDE_INT scale;
10796 if (ix86_decompose_address (addr, &parts) <= 0)
10797 /* Decomposition failed. */
10801 index = parts.index;
10803 scale = parts.scale;
10805 /* Validate base register.
10807 Don't allow SUBREG's that span more than a word here. It can lead to spill
10808 failures when the base is one word out of a two word structure, which is
10809 represented internally as a DImode int. */
10817 else if (GET_CODE (base) == SUBREG
10818 && REG_P (SUBREG_REG (base))
10819 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
10821 reg = SUBREG_REG (base);
10823 /* Base is not a register. */
10826 if (GET_MODE (base) != Pmode)
10827 /* Base is not in Pmode. */
10830 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10831 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10832 /* Base is not valid. */
10836 /* Validate index register.
10838 Don't allow SUBREG's that span more than a word here -- same as above. */
10846 else if (GET_CODE (index) == SUBREG
10847 && REG_P (SUBREG_REG (index))
10848 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
10850 reg = SUBREG_REG (index);
10852 /* Index is not a register. */
10855 if (GET_MODE (index) != Pmode)
10856 /* Index is not in Pmode. */
10859 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10860 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10861 /* Index is not valid. */
10865 /* Validate scale factor. */
10869 /* Scale without index. */
10872 if (scale != 2 && scale != 4 && scale != 8)
10873 /* Scale is not a valid multiplier. */
10877 /* Validate displacement. */
10880 if (GET_CODE (disp) == CONST
10881 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10882 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10883 switch (XINT (XEXP (disp, 0), 1))
10885 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
10886 used. While ABI specify also 32bit relocations, we don't produce
10887 them at all and use IP relative instead. */
10889 case UNSPEC_GOTOFF:
10890 gcc_assert (flag_pic);
10892 goto is_legitimate_pic;
10894 /* 64bit address unspec. */
10897 case UNSPEC_GOTPCREL:
10898 gcc_assert (flag_pic);
10899 goto is_legitimate_pic;
10901 case UNSPEC_GOTTPOFF:
10902 case UNSPEC_GOTNTPOFF:
10903 case UNSPEC_INDNTPOFF:
10904 case UNSPEC_NTPOFF:
10905 case UNSPEC_DTPOFF:
10909 /* Invalid address unspec. */
10913 else if (SYMBOLIC_CONST (disp)
10917 && MACHOPIC_INDIRECT
10918 && !machopic_operand_p (disp)
10924 if (TARGET_64BIT && (index || base))
10926 /* foo@dtpoff(%rX) is ok. */
10927 if (GET_CODE (disp) != CONST
10928 || GET_CODE (XEXP (disp, 0)) != PLUS
10929 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10930 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10931 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10932 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10933 /* Non-constant pic memory reference. */
10936 else if (! legitimate_pic_address_disp_p (disp))
10937 /* Displacement is an invalid pic construct. */
10940 /* This code used to verify that a symbolic pic displacement
10941 includes the pic_offset_table_rtx register.
10943 While this is good idea, unfortunately these constructs may
10944 be created by "adds using lea" optimization for incorrect
10953 This code is nonsensical, but results in addressing
10954 GOT table with pic_offset_table_rtx base. We can't
10955 just refuse it easily, since it gets matched by
10956 "addsi3" pattern, that later gets split to lea in the
10957 case output register differs from input. While this
10958 can be handled by separate addsi pattern for this case
10959 that never results in lea, this seems to be easier and
10960 correct fix for crash to disable this test. */
10962 else if (GET_CODE (disp) != LABEL_REF
10963 && !CONST_INT_P (disp)
10964 && (GET_CODE (disp) != CONST
10965 || !legitimate_constant_p (disp))
10966 && (GET_CODE (disp) != SYMBOL_REF
10967 || !legitimate_constant_p (disp)))
10968 /* Displacement is not constant. */
10970 else if (TARGET_64BIT
10971 && !x86_64_immediate_operand (disp, VOIDmode))
10972 /* Displacement is out of range. */
10976 /* Everything looks valid. */
10980 /* Determine if a given RTX is a valid constant address. */
10983 constant_address_p (rtx x)
10985 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10988 /* Return a unique alias set for the GOT. */
10990 static alias_set_type
10991 ix86_GOT_alias_set (void)
10993 static alias_set_type set = -1;
10995 set = new_alias_set ();
10999 /* Return a legitimate reference for ORIG (an address) using the
11000 register REG. If REG is 0, a new pseudo is generated.
11002 There are two types of references that must be handled:
11004 1. Global data references must load the address from the GOT, via
11005 the PIC reg. An insn is emitted to do this load, and the reg is
11008 2. Static data references, constant pool addresses, and code labels
11009 compute the address as an offset from the GOT, whose base is in
11010 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11011 differentiate them from global data objects. The returned
11012 address is the PIC reg + an unspec constant.
11014 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11015 reg also appears in the address. */
11018 legitimize_pic_address (rtx orig, rtx reg)
11021 rtx new_rtx = orig;
11025 if (TARGET_MACHO && !TARGET_64BIT)
11028 reg = gen_reg_rtx (Pmode);
11029 /* Use the generic Mach-O PIC machinery. */
11030 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11034 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11036 else if (TARGET_64BIT
11037 && ix86_cmodel != CM_SMALL_PIC
11038 && gotoff_operand (addr, Pmode))
11041 /* This symbol may be referenced via a displacement from the PIC
11042 base address (@GOTOFF). */
11044 if (reload_in_progress)
11045 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11046 if (GET_CODE (addr) == CONST)
11047 addr = XEXP (addr, 0);
11048 if (GET_CODE (addr) == PLUS)
11050 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11052 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11055 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11056 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11058 tmpreg = gen_reg_rtx (Pmode);
11061 emit_move_insn (tmpreg, new_rtx);
11065 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11066 tmpreg, 1, OPTAB_DIRECT);
11069 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11071 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11073 /* This symbol may be referenced via a displacement from the PIC
11074 base address (@GOTOFF). */
11076 if (reload_in_progress)
11077 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11078 if (GET_CODE (addr) == CONST)
11079 addr = XEXP (addr, 0);
11080 if (GET_CODE (addr) == PLUS)
11082 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11084 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11087 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11088 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11089 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11093 emit_move_insn (reg, new_rtx);
11097 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11098 /* We can't use @GOTOFF for text labels on VxWorks;
11099 see gotoff_operand. */
11100 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11102 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11104 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11105 return legitimize_dllimport_symbol (addr, true);
11106 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11107 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11108 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11110 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11111 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11115 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11117 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11118 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11119 new_rtx = gen_const_mem (Pmode, new_rtx);
11120 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11123 reg = gen_reg_rtx (Pmode);
11124 /* Use directly gen_movsi, otherwise the address is loaded
11125 into register for CSE. We don't want to CSE this addresses,
11126 instead we CSE addresses from the GOT table, so skip this. */
11127 emit_insn (gen_movsi (reg, new_rtx));
11132 /* This symbol must be referenced via a load from the
11133 Global Offset Table (@GOT). */
11135 if (reload_in_progress)
11136 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11137 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11138 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11140 new_rtx = force_reg (Pmode, new_rtx);
11141 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11142 new_rtx = gen_const_mem (Pmode, new_rtx);
11143 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11146 reg = gen_reg_rtx (Pmode);
11147 emit_move_insn (reg, new_rtx);
11153 if (CONST_INT_P (addr)
11154 && !x86_64_immediate_operand (addr, VOIDmode))
11158 emit_move_insn (reg, addr);
11162 new_rtx = force_reg (Pmode, addr);
11164 else if (GET_CODE (addr) == CONST)
11166 addr = XEXP (addr, 0);
11168 /* We must match stuff we generate before. Assume the only
11169 unspecs that can get here are ours. Not that we could do
11170 anything with them anyway.... */
11171 if (GET_CODE (addr) == UNSPEC
11172 || (GET_CODE (addr) == PLUS
11173 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11175 gcc_assert (GET_CODE (addr) == PLUS);
11177 if (GET_CODE (addr) == PLUS)
11179 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11181 /* Check first to see if this is a constant offset from a @GOTOFF
11182 symbol reference. */
11183 if (gotoff_operand (op0, Pmode)
11184 && CONST_INT_P (op1))
11188 if (reload_in_progress)
11189 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11190 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11192 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11193 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11194 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11198 emit_move_insn (reg, new_rtx);
11204 if (INTVAL (op1) < -16*1024*1024
11205 || INTVAL (op1) >= 16*1024*1024)
11207 if (!x86_64_immediate_operand (op1, Pmode))
11208 op1 = force_reg (Pmode, op1);
11209 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11215 base = legitimize_pic_address (XEXP (addr, 0), reg);
11216 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11217 base == reg ? NULL_RTX : reg);
11219 if (CONST_INT_P (new_rtx))
11220 new_rtx = plus_constant (base, INTVAL (new_rtx));
11223 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11225 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11226 new_rtx = XEXP (new_rtx, 1);
11228 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11236 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11239 get_thread_pointer (int to_reg)
11243 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11247 reg = gen_reg_rtx (Pmode);
11248 insn = gen_rtx_SET (VOIDmode, reg, tp);
11249 insn = emit_insn (insn);
11254 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11255 false if we expect this to be used for a memory address and true if
11256 we expect to load the address into a register. */
11259 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11261 rtx dest, base, off, pic, tp;
11266 case TLS_MODEL_GLOBAL_DYNAMIC:
11267 dest = gen_reg_rtx (Pmode);
11268 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11270 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11272 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11275 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11276 insns = get_insns ();
11279 RTL_CONST_CALL_P (insns) = 1;
11280 emit_libcall_block (insns, dest, rax, x);
11282 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11283 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11285 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11287 if (TARGET_GNU2_TLS)
11289 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11291 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11295 case TLS_MODEL_LOCAL_DYNAMIC:
11296 base = gen_reg_rtx (Pmode);
11297 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11299 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11301 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11304 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11305 insns = get_insns ();
11308 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11309 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11310 RTL_CONST_CALL_P (insns) = 1;
11311 emit_libcall_block (insns, base, rax, note);
11313 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11314 emit_insn (gen_tls_local_dynamic_base_64 (base));
11316 emit_insn (gen_tls_local_dynamic_base_32 (base));
11318 if (TARGET_GNU2_TLS)
11320 rtx x = ix86_tls_module_base ();
11322 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11323 gen_rtx_MINUS (Pmode, x, tp));
11326 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11327 off = gen_rtx_CONST (Pmode, off);
11329 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11331 if (TARGET_GNU2_TLS)
11333 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11335 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11340 case TLS_MODEL_INITIAL_EXEC:
11344 type = UNSPEC_GOTNTPOFF;
11348 if (reload_in_progress)
11349 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11350 pic = pic_offset_table_rtx;
11351 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11353 else if (!TARGET_ANY_GNU_TLS)
11355 pic = gen_reg_rtx (Pmode);
11356 emit_insn (gen_set_got (pic));
11357 type = UNSPEC_GOTTPOFF;
11362 type = UNSPEC_INDNTPOFF;
11365 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11366 off = gen_rtx_CONST (Pmode, off);
11368 off = gen_rtx_PLUS (Pmode, pic, off);
11369 off = gen_const_mem (Pmode, off);
11370 set_mem_alias_set (off, ix86_GOT_alias_set ());
11372 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11374 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11375 off = force_reg (Pmode, off);
11376 return gen_rtx_PLUS (Pmode, base, off);
11380 base = get_thread_pointer (true);
11381 dest = gen_reg_rtx (Pmode);
11382 emit_insn (gen_subsi3 (dest, base, off));
11386 case TLS_MODEL_LOCAL_EXEC:
11387 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11388 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11389 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11390 off = gen_rtx_CONST (Pmode, off);
11392 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11394 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11395 return gen_rtx_PLUS (Pmode, base, off);
11399 base = get_thread_pointer (true);
11400 dest = gen_reg_rtx (Pmode);
11401 emit_insn (gen_subsi3 (dest, base, off));
11406 gcc_unreachable ();
11412 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11415 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11416 htab_t dllimport_map;
11419 get_dllimport_decl (tree decl)
11421 struct tree_map *h, in;
11424 const char *prefix;
11425 size_t namelen, prefixlen;
11430 if (!dllimport_map)
11431 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11433 in.hash = htab_hash_pointer (decl);
11434 in.base.from = decl;
11435 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11436 h = (struct tree_map *) *loc;
11440 *loc = h = ggc_alloc_tree_map ();
11442 h->base.from = decl;
11443 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11444 VAR_DECL, NULL, ptr_type_node);
11445 DECL_ARTIFICIAL (to) = 1;
11446 DECL_IGNORED_P (to) = 1;
11447 DECL_EXTERNAL (to) = 1;
11448 TREE_READONLY (to) = 1;
11450 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11451 name = targetm.strip_name_encoding (name);
11452 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11453 ? "*__imp_" : "*__imp__";
11454 namelen = strlen (name);
11455 prefixlen = strlen (prefix);
11456 imp_name = (char *) alloca (namelen + prefixlen + 1);
11457 memcpy (imp_name, prefix, prefixlen);
11458 memcpy (imp_name + prefixlen, name, namelen + 1);
11460 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11461 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11462 SET_SYMBOL_REF_DECL (rtl, to);
11463 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11465 rtl = gen_const_mem (Pmode, rtl);
11466 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11468 SET_DECL_RTL (to, rtl);
11469 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11474 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11475 true if we require the result be a register. */
11478 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11483 gcc_assert (SYMBOL_REF_DECL (symbol));
11484 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11486 x = DECL_RTL (imp_decl);
11488 x = force_reg (Pmode, x);
11492 /* Try machine-dependent ways of modifying an illegitimate address
11493 to be legitimate. If we find one, return the new, valid address.
11494 This macro is used in only one place: `memory_address' in explow.c.
11496 OLDX is the address as it was before break_out_memory_refs was called.
11497 In some cases it is useful to look at this to decide what needs to be done.
11499 It is always safe for this macro to do nothing. It exists to recognize
11500 opportunities to optimize the output.
11502 For the 80386, we handle X+REG by loading X into a register R and
11503 using R+REG. R will go in a general reg and indexing will be used.
11504 However, if REG is a broken-out memory address or multiplication,
11505 nothing needs to be done because REG can certainly go in a general reg.
11507 When -fpic is used, special handling is needed for symbolic references.
11508 See comments by legitimize_pic_address in i386.c for details. */
11511 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11512 enum machine_mode mode)
11517 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11519 return legitimize_tls_address (x, (enum tls_model) log, false);
11520 if (GET_CODE (x) == CONST
11521 && GET_CODE (XEXP (x, 0)) == PLUS
11522 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11523 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11525 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11526 (enum tls_model) log, false);
11527 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11530 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11532 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11533 return legitimize_dllimport_symbol (x, true);
11534 if (GET_CODE (x) == CONST
11535 && GET_CODE (XEXP (x, 0)) == PLUS
11536 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11537 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11539 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11540 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11544 if (flag_pic && SYMBOLIC_CONST (x))
11545 return legitimize_pic_address (x, 0);
11547 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11548 if (GET_CODE (x) == ASHIFT
11549 && CONST_INT_P (XEXP (x, 1))
11550 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11553 log = INTVAL (XEXP (x, 1));
11554 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11555 GEN_INT (1 << log));
11558 if (GET_CODE (x) == PLUS)
11560 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11562 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11563 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11564 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11567 log = INTVAL (XEXP (XEXP (x, 0), 1));
11568 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11569 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11570 GEN_INT (1 << log));
11573 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11574 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11575 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11578 log = INTVAL (XEXP (XEXP (x, 1), 1));
11579 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11580 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11581 GEN_INT (1 << log));
11584 /* Put multiply first if it isn't already. */
11585 if (GET_CODE (XEXP (x, 1)) == MULT)
11587 rtx tmp = XEXP (x, 0);
11588 XEXP (x, 0) = XEXP (x, 1);
11593 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11594 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11595 created by virtual register instantiation, register elimination, and
11596 similar optimizations. */
11597 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11600 x = gen_rtx_PLUS (Pmode,
11601 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11602 XEXP (XEXP (x, 1), 0)),
11603 XEXP (XEXP (x, 1), 1));
11607 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11608 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11609 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11610 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11611 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11612 && CONSTANT_P (XEXP (x, 1)))
11615 rtx other = NULL_RTX;
11617 if (CONST_INT_P (XEXP (x, 1)))
11619 constant = XEXP (x, 1);
11620 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11622 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11624 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11625 other = XEXP (x, 1);
11633 x = gen_rtx_PLUS (Pmode,
11634 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11635 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11636 plus_constant (other, INTVAL (constant)));
11640 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11643 if (GET_CODE (XEXP (x, 0)) == MULT)
11646 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
11649 if (GET_CODE (XEXP (x, 1)) == MULT)
11652 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
11656 && REG_P (XEXP (x, 1))
11657 && REG_P (XEXP (x, 0)))
11660 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11663 x = legitimize_pic_address (x, 0);
11666 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11669 if (REG_P (XEXP (x, 0)))
11671 rtx temp = gen_reg_rtx (Pmode);
11672 rtx val = force_operand (XEXP (x, 1), temp);
11674 emit_move_insn (temp, val);
11676 XEXP (x, 1) = temp;
11680 else if (REG_P (XEXP (x, 1)))
11682 rtx temp = gen_reg_rtx (Pmode);
11683 rtx val = force_operand (XEXP (x, 0), temp);
11685 emit_move_insn (temp, val);
11687 XEXP (x, 0) = temp;
11695 /* Print an integer constant expression in assembler syntax. Addition
11696 and subtraction are the only arithmetic that may appear in these
11697 expressions. FILE is the stdio stream to write to, X is the rtx, and
11698 CODE is the operand print code from the output string. */
11701 output_pic_addr_const (FILE *file, rtx x, int code)
11705 switch (GET_CODE (x))
11708 gcc_assert (flag_pic);
11713 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
11714 output_addr_const (file, x);
11717 const char *name = XSTR (x, 0);
11719 /* Mark the decl as referenced so that cgraph will
11720 output the function. */
11721 if (SYMBOL_REF_DECL (x))
11722 mark_decl_referenced (SYMBOL_REF_DECL (x));
11725 if (MACHOPIC_INDIRECT
11726 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11727 name = machopic_indirection_name (x, /*stub_p=*/true);
11729 assemble_name (file, name);
11731 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11732 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11733 fputs ("@PLT", file);
11740 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11741 assemble_name (asm_out_file, buf);
11745 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11749 /* This used to output parentheses around the expression,
11750 but that does not work on the 386 (either ATT or BSD assembler). */
11751 output_pic_addr_const (file, XEXP (x, 0), code);
11755 if (GET_MODE (x) == VOIDmode)
11757 /* We can use %d if the number is <32 bits and positive. */
11758 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
11759 fprintf (file, "0x%lx%08lx",
11760 (unsigned long) CONST_DOUBLE_HIGH (x),
11761 (unsigned long) CONST_DOUBLE_LOW (x));
11763 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
11766 /* We can't handle floating point constants;
11767 TARGET_PRINT_OPERAND must handle them. */
11768 output_operand_lossage ("floating constant misused");
11772 /* Some assemblers need integer constants to appear first. */
11773 if (CONST_INT_P (XEXP (x, 0)))
11775 output_pic_addr_const (file, XEXP (x, 0), code);
11777 output_pic_addr_const (file, XEXP (x, 1), code);
11781 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11782 output_pic_addr_const (file, XEXP (x, 1), code);
11784 output_pic_addr_const (file, XEXP (x, 0), code);
11790 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11791 output_pic_addr_const (file, XEXP (x, 0), code);
11793 output_pic_addr_const (file, XEXP (x, 1), code);
11795 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11799 gcc_assert (XVECLEN (x, 0) == 1);
11800 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11801 switch (XINT (x, 1))
11804 fputs ("@GOT", file);
11806 case UNSPEC_GOTOFF:
11807 fputs ("@GOTOFF", file);
11809 case UNSPEC_PLTOFF:
11810 fputs ("@PLTOFF", file);
11812 case UNSPEC_GOTPCREL:
11813 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11814 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11816 case UNSPEC_GOTTPOFF:
11817 /* FIXME: This might be @TPOFF in Sun ld too. */
11818 fputs ("@gottpoff", file);
11821 fputs ("@tpoff", file);
11823 case UNSPEC_NTPOFF:
11825 fputs ("@tpoff", file);
11827 fputs ("@ntpoff", file);
11829 case UNSPEC_DTPOFF:
11830 fputs ("@dtpoff", file);
11832 case UNSPEC_GOTNTPOFF:
11834 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11835 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11837 fputs ("@gotntpoff", file);
11839 case UNSPEC_INDNTPOFF:
11840 fputs ("@indntpoff", file);
11843 case UNSPEC_MACHOPIC_OFFSET:
11845 machopic_output_function_base_name (file);
11849 output_operand_lossage ("invalid UNSPEC as operand");
11855 output_operand_lossage ("invalid expression as operand");
11859 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11860 We need to emit DTP-relative relocations. */
11862 static void ATTRIBUTE_UNUSED
11863 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11865 fputs (ASM_LONG, file);
11866 output_addr_const (file, x);
11867 fputs ("@dtpoff", file);
11873 fputs (", 0", file);
11876 gcc_unreachable ();
11880 /* Return true if X is a representation of the PIC register. This copes
11881 with calls from ix86_find_base_term, where the register might have
11882 been replaced by a cselib value. */
11885 ix86_pic_register_p (rtx x)
11887 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11888 return (pic_offset_table_rtx
11889 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11891 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11894 /* In the name of slightly smaller debug output, and to cater to
11895 general assembler lossage, recognize PIC+GOTOFF and turn it back
11896 into a direct symbol reference.
11898 On Darwin, this is necessary to avoid a crash, because Darwin
11899 has a different PIC label for each routine but the DWARF debugging
11900 information is not associated with any particular routine, so it's
11901 necessary to remove references to the PIC label from RTL stored by
11902 the DWARF output code. */
11905 ix86_delegitimize_address (rtx x)
11907 rtx orig_x = delegitimize_mem_from_attrs (x);
11908 /* addend is NULL or some rtx if x is something+GOTOFF where
11909 something doesn't include the PIC register. */
11910 rtx addend = NULL_RTX;
11911 /* reg_addend is NULL or a multiple of some register. */
11912 rtx reg_addend = NULL_RTX;
11913 /* const_addend is NULL or a const_int. */
11914 rtx const_addend = NULL_RTX;
11915 /* This is the result, or NULL. */
11916 rtx result = NULL_RTX;
11925 if (GET_CODE (x) != CONST
11926 || GET_CODE (XEXP (x, 0)) != UNSPEC
11927 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
11928 || !MEM_P (orig_x))
11930 x = XVECEXP (XEXP (x, 0), 0, 0);
11931 if (GET_MODE (orig_x) != Pmode)
11932 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
11936 if (GET_CODE (x) != PLUS
11937 || GET_CODE (XEXP (x, 1)) != CONST)
11940 if (ix86_pic_register_p (XEXP (x, 0)))
11941 /* %ebx + GOT/GOTOFF */
11943 else if (GET_CODE (XEXP (x, 0)) == PLUS)
11945 /* %ebx + %reg * scale + GOT/GOTOFF */
11946 reg_addend = XEXP (x, 0);
11947 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11948 reg_addend = XEXP (reg_addend, 1);
11949 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11950 reg_addend = XEXP (reg_addend, 0);
11953 reg_addend = NULL_RTX;
11954 addend = XEXP (x, 0);
11958 addend = XEXP (x, 0);
11960 x = XEXP (XEXP (x, 1), 0);
11961 if (GET_CODE (x) == PLUS
11962 && CONST_INT_P (XEXP (x, 1)))
11964 const_addend = XEXP (x, 1);
11968 if (GET_CODE (x) == UNSPEC
11969 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
11970 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
11971 result = XVECEXP (x, 0, 0);
11973 if (TARGET_MACHO && darwin_local_data_pic (x)
11974 && !MEM_P (orig_x))
11975 result = XVECEXP (x, 0, 0);
11981 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
11983 result = gen_rtx_PLUS (Pmode, reg_addend, result);
11986 /* If the rest of original X doesn't involve the PIC register, add
11987 addend and subtract pic_offset_table_rtx. This can happen e.g.
11989 leal (%ebx, %ecx, 4), %ecx
11991 movl foo@GOTOFF(%ecx), %edx
11992 in which case we return (%ecx - %ebx) + foo. */
11993 if (pic_offset_table_rtx)
11994 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
11995 pic_offset_table_rtx),
12000 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12001 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12005 /* If X is a machine specific address (i.e. a symbol or label being
12006 referenced as a displacement from the GOT implemented using an
12007 UNSPEC), then return the base term. Otherwise return X. */
12010 ix86_find_base_term (rtx x)
12016 if (GET_CODE (x) != CONST)
12018 term = XEXP (x, 0);
12019 if (GET_CODE (term) == PLUS
12020 && (CONST_INT_P (XEXP (term, 1))
12021 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12022 term = XEXP (term, 0);
12023 if (GET_CODE (term) != UNSPEC
12024 || XINT (term, 1) != UNSPEC_GOTPCREL)
12027 return XVECEXP (term, 0, 0);
12030 return ix86_delegitimize_address (x);
12034 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12035 int fp, FILE *file)
12037 const char *suffix;
12039 if (mode == CCFPmode || mode == CCFPUmode)
12041 code = ix86_fp_compare_code_to_integer (code);
12045 code = reverse_condition (code);
12096 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12100 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12101 Those same assemblers have the same but opposite lossage on cmov. */
12102 if (mode == CCmode)
12103 suffix = fp ? "nbe" : "a";
12104 else if (mode == CCCmode)
12107 gcc_unreachable ();
12123 gcc_unreachable ();
12127 gcc_assert (mode == CCmode || mode == CCCmode);
12144 gcc_unreachable ();
12148 /* ??? As above. */
12149 gcc_assert (mode == CCmode || mode == CCCmode);
12150 suffix = fp ? "nb" : "ae";
12153 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12157 /* ??? As above. */
12158 if (mode == CCmode)
12160 else if (mode == CCCmode)
12161 suffix = fp ? "nb" : "ae";
12163 gcc_unreachable ();
12166 suffix = fp ? "u" : "p";
12169 suffix = fp ? "nu" : "np";
12172 gcc_unreachable ();
12174 fputs (suffix, file);
12177 /* Print the name of register X to FILE based on its machine mode and number.
12178 If CODE is 'w', pretend the mode is HImode.
12179 If CODE is 'b', pretend the mode is QImode.
12180 If CODE is 'k', pretend the mode is SImode.
12181 If CODE is 'q', pretend the mode is DImode.
12182 If CODE is 'x', pretend the mode is V4SFmode.
12183 If CODE is 't', pretend the mode is V8SFmode.
12184 If CODE is 'h', pretend the reg is the 'high' byte register.
12185 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12186 If CODE is 'd', duplicate the operand for AVX instruction.
12190 print_reg (rtx x, int code, FILE *file)
12193 bool duplicated = code == 'd' && TARGET_AVX;
12195 gcc_assert (x == pc_rtx
12196 || (REGNO (x) != ARG_POINTER_REGNUM
12197 && REGNO (x) != FRAME_POINTER_REGNUM
12198 && REGNO (x) != FLAGS_REG
12199 && REGNO (x) != FPSR_REG
12200 && REGNO (x) != FPCR_REG));
12202 if (ASSEMBLER_DIALECT == ASM_ATT)
12207 gcc_assert (TARGET_64BIT);
12208 fputs ("rip", file);
12212 if (code == 'w' || MMX_REG_P (x))
12214 else if (code == 'b')
12216 else if (code == 'k')
12218 else if (code == 'q')
12220 else if (code == 'y')
12222 else if (code == 'h')
12224 else if (code == 'x')
12226 else if (code == 't')
12229 code = GET_MODE_SIZE (GET_MODE (x));
12231 /* Irritatingly, AMD extended registers use different naming convention
12232 from the normal registers. */
12233 if (REX_INT_REG_P (x))
12235 gcc_assert (TARGET_64BIT);
12239 error ("extended registers have no high halves");
12242 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12245 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12248 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12251 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12254 error ("unsupported operand size for extended register");
12264 if (STACK_TOP_P (x))
12273 if (! ANY_FP_REG_P (x))
12274 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12279 reg = hi_reg_name[REGNO (x)];
12282 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12284 reg = qi_reg_name[REGNO (x)];
12287 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12289 reg = qi_high_reg_name[REGNO (x)];
12294 gcc_assert (!duplicated);
12296 fputs (hi_reg_name[REGNO (x)] + 1, file);
12301 gcc_unreachable ();
12307 if (ASSEMBLER_DIALECT == ASM_ATT)
12308 fprintf (file, ", %%%s", reg);
12310 fprintf (file, ", %s", reg);
12314 /* Locate some local-dynamic symbol still in use by this function
12315 so that we can print its name in some tls_local_dynamic_base
12319 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12323 if (GET_CODE (x) == SYMBOL_REF
12324 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12326 cfun->machine->some_ld_name = XSTR (x, 0);
12333 static const char *
12334 get_some_local_dynamic_name (void)
12338 if (cfun->machine->some_ld_name)
12339 return cfun->machine->some_ld_name;
12341 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12342 if (NONDEBUG_INSN_P (insn)
12343 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12344 return cfun->machine->some_ld_name;
12349 /* Meaning of CODE:
12350 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12351 C -- print opcode suffix for set/cmov insn.
12352 c -- like C, but print reversed condition
12353 F,f -- likewise, but for floating-point.
12354 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12356 R -- print the prefix for register names.
12357 z -- print the opcode suffix for the size of the current operand.
12358 Z -- likewise, with special suffixes for x87 instructions.
12359 * -- print a star (in certain assembler syntax)
12360 A -- print an absolute memory reference.
12361 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12362 s -- print a shift double count, followed by the assemblers argument
12364 b -- print the QImode name of the register for the indicated operand.
12365 %b0 would print %al if operands[0] is reg 0.
12366 w -- likewise, print the HImode name of the register.
12367 k -- likewise, print the SImode name of the register.
12368 q -- likewise, print the DImode name of the register.
12369 x -- likewise, print the V4SFmode name of the register.
12370 t -- likewise, print the V8SFmode name of the register.
12371 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12372 y -- print "st(0)" instead of "st" as a register.
12373 d -- print duplicated register operand for AVX instruction.
12374 D -- print condition for SSE cmp instruction.
12375 P -- if PIC, print an @PLT suffix.
12376 X -- don't print any sort of PIC '@' suffix for a symbol.
12377 & -- print some in-use local-dynamic symbol name.
12378 H -- print a memory address offset by 8; used for sse high-parts
12379 Y -- print condition for XOP pcom* instruction.
12380 + -- print a branch hint as 'cs' or 'ds' prefix
12381 ; -- print a semicolon (after prefixes due to bug in older gas).
12385 ix86_print_operand (FILE *file, rtx x, int code)
12392 if (ASSEMBLER_DIALECT == ASM_ATT)
12398 const char *name = get_some_local_dynamic_name ();
12400 output_operand_lossage ("'%%&' used without any "
12401 "local dynamic TLS references");
12403 assemble_name (file, name);
12408 switch (ASSEMBLER_DIALECT)
12415 /* Intel syntax. For absolute addresses, registers should not
12416 be surrounded by braces. */
12420 ix86_print_operand (file, x, 0);
12427 gcc_unreachable ();
12430 ix86_print_operand (file, x, 0);
12435 if (ASSEMBLER_DIALECT == ASM_ATT)
12440 if (ASSEMBLER_DIALECT == ASM_ATT)
12445 if (ASSEMBLER_DIALECT == ASM_ATT)
12450 if (ASSEMBLER_DIALECT == ASM_ATT)
12455 if (ASSEMBLER_DIALECT == ASM_ATT)
12460 if (ASSEMBLER_DIALECT == ASM_ATT)
12465 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12467 /* Opcodes don't get size suffixes if using Intel opcodes. */
12468 if (ASSEMBLER_DIALECT == ASM_INTEL)
12471 switch (GET_MODE_SIZE (GET_MODE (x)))
12490 output_operand_lossage
12491 ("invalid operand size for operand code '%c'", code);
12496 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12498 (0, "non-integer operand used with operand code '%c'", code);
12502 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12503 if (ASSEMBLER_DIALECT == ASM_INTEL)
12506 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12508 switch (GET_MODE_SIZE (GET_MODE (x)))
12511 #ifdef HAVE_AS_IX86_FILDS
12521 #ifdef HAVE_AS_IX86_FILDQ
12524 fputs ("ll", file);
12532 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12534 /* 387 opcodes don't get size suffixes
12535 if the operands are registers. */
12536 if (STACK_REG_P (x))
12539 switch (GET_MODE_SIZE (GET_MODE (x)))
12560 output_operand_lossage
12561 ("invalid operand type used with operand code '%c'", code);
12565 output_operand_lossage
12566 ("invalid operand size for operand code '%c'", code);
12583 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12585 ix86_print_operand (file, x, 0);
12586 fputs (", ", file);
12591 /* Little bit of braindamage here. The SSE compare instructions
12592 does use completely different names for the comparisons that the
12593 fp conditional moves. */
12596 switch (GET_CODE (x))
12599 fputs ("eq", file);
12602 fputs ("eq_us", file);
12605 fputs ("lt", file);
12608 fputs ("nge", file);
12611 fputs ("le", file);
12614 fputs ("ngt", file);
12617 fputs ("unord", file);
12620 fputs ("neq", file);
12623 fputs ("neq_oq", file);
12626 fputs ("ge", file);
12629 fputs ("nlt", file);
12632 fputs ("gt", file);
12635 fputs ("nle", file);
12638 fputs ("ord", file);
12641 output_operand_lossage ("operand is not a condition code, "
12642 "invalid operand code 'D'");
12648 switch (GET_CODE (x))
12652 fputs ("eq", file);
12656 fputs ("lt", file);
12660 fputs ("le", file);
12663 fputs ("unord", file);
12667 fputs ("neq", file);
12671 fputs ("nlt", file);
12675 fputs ("nle", file);
12678 fputs ("ord", file);
12681 output_operand_lossage ("operand is not a condition code, "
12682 "invalid operand code 'D'");
12688 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12689 if (ASSEMBLER_DIALECT == ASM_ATT)
12691 switch (GET_MODE (x))
12693 case HImode: putc ('w', file); break;
12695 case SFmode: putc ('l', file); break;
12697 case DFmode: putc ('q', file); break;
12698 default: gcc_unreachable ();
12705 if (!COMPARISON_P (x))
12707 output_operand_lossage ("operand is neither a constant nor a "
12708 "condition code, invalid operand code "
12712 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
12715 if (!COMPARISON_P (x))
12717 output_operand_lossage ("operand is neither a constant nor a "
12718 "condition code, invalid operand code "
12722 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12723 if (ASSEMBLER_DIALECT == ASM_ATT)
12726 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
12729 /* Like above, but reverse condition */
12731 /* Check to see if argument to %c is really a constant
12732 and not a condition code which needs to be reversed. */
12733 if (!COMPARISON_P (x))
12735 output_operand_lossage ("operand is neither a constant nor a "
12736 "condition code, invalid operand "
12740 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
12743 if (!COMPARISON_P (x))
12745 output_operand_lossage ("operand is neither a constant nor a "
12746 "condition code, invalid operand "
12750 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12751 if (ASSEMBLER_DIALECT == ASM_ATT)
12754 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
12758 /* It doesn't actually matter what mode we use here, as we're
12759 only going to use this for printing. */
12760 x = adjust_address_nv (x, DImode, 8);
12768 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
12771 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12774 int pred_val = INTVAL (XEXP (x, 0));
12776 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12777 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12779 int taken = pred_val > REG_BR_PROB_BASE / 2;
12780 int cputaken = final_forward_branch_p (current_output_insn) == 0;
12782 /* Emit hints only in the case default branch prediction
12783 heuristics would fail. */
12784 if (taken != cputaken)
12786 /* We use 3e (DS) prefix for taken branches and
12787 2e (CS) prefix for not taken branches. */
12789 fputs ("ds ; ", file);
12791 fputs ("cs ; ", file);
12799 switch (GET_CODE (x))
12802 fputs ("neq", file);
12805 fputs ("eq", file);
12809 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12813 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12817 fputs ("le", file);
12821 fputs ("lt", file);
12824 fputs ("unord", file);
12827 fputs ("ord", file);
12830 fputs ("ueq", file);
12833 fputs ("nlt", file);
12836 fputs ("nle", file);
12839 fputs ("ule", file);
12842 fputs ("ult", file);
12845 fputs ("une", file);
12848 output_operand_lossage ("operand is not a condition code, "
12849 "invalid operand code 'Y'");
12855 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12861 output_operand_lossage ("invalid operand code '%c'", code);
12866 print_reg (x, code, file);
12868 else if (MEM_P (x))
12870 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
12871 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
12872 && GET_MODE (x) != BLKmode)
12875 switch (GET_MODE_SIZE (GET_MODE (x)))
12877 case 1: size = "BYTE"; break;
12878 case 2: size = "WORD"; break;
12879 case 4: size = "DWORD"; break;
12880 case 8: size = "QWORD"; break;
12881 case 12: size = "TBYTE"; break;
12883 if (GET_MODE (x) == XFmode)
12888 case 32: size = "YMMWORD"; break;
12890 gcc_unreachable ();
12893 /* Check for explicit size override (codes 'b', 'w' and 'k') */
12896 else if (code == 'w')
12898 else if (code == 'k')
12901 fputs (size, file);
12902 fputs (" PTR ", file);
12906 /* Avoid (%rip) for call operands. */
12907 if (CONSTANT_ADDRESS_P (x) && code == 'P'
12908 && !CONST_INT_P (x))
12909 output_addr_const (file, x);
12910 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
12911 output_operand_lossage ("invalid constraints for operand");
12913 output_address (x);
12916 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
12921 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12922 REAL_VALUE_TO_TARGET_SINGLE (r, l);
12924 if (ASSEMBLER_DIALECT == ASM_ATT)
12926 /* Sign extend 32bit SFmode immediate to 8 bytes. */
12928 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
12930 fprintf (file, "0x%08x", (unsigned int) l);
12933 /* These float cases don't actually occur as immediate operands. */
12934 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
12938 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12939 fputs (dstr, file);
12942 else if (GET_CODE (x) == CONST_DOUBLE
12943 && GET_MODE (x) == XFmode)
12947 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12948 fputs (dstr, file);
12953 /* We have patterns that allow zero sets of memory, for instance.
12954 In 64-bit mode, we should probably support all 8-byte vectors,
12955 since we can in fact encode that into an immediate. */
12956 if (GET_CODE (x) == CONST_VECTOR)
12958 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
12964 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
12966 if (ASSEMBLER_DIALECT == ASM_ATT)
12969 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
12970 || GET_CODE (x) == LABEL_REF)
12972 if (ASSEMBLER_DIALECT == ASM_ATT)
12975 fputs ("OFFSET FLAT:", file);
12978 if (CONST_INT_P (x))
12979 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12981 output_pic_addr_const (file, x, code);
12983 output_addr_const (file, x);
12988 ix86_print_operand_punct_valid_p (unsigned char code)
12990 return (code == '*' || code == '+' || code == '&' || code == ';');
12993 /* Print a memory operand whose address is ADDR. */
12996 ix86_print_operand_address (FILE *file, rtx addr)
12998 struct ix86_address parts;
12999 rtx base, index, disp;
13001 int ok = ix86_decompose_address (addr, &parts);
13006 index = parts.index;
13008 scale = parts.scale;
13016 if (ASSEMBLER_DIALECT == ASM_ATT)
13018 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13021 gcc_unreachable ();
13024 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13025 if (TARGET_64BIT && !base && !index)
13029 if (GET_CODE (disp) == CONST
13030 && GET_CODE (XEXP (disp, 0)) == PLUS
13031 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13032 symbol = XEXP (XEXP (disp, 0), 0);
13034 if (GET_CODE (symbol) == LABEL_REF
13035 || (GET_CODE (symbol) == SYMBOL_REF
13036 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13039 if (!base && !index)
13041 /* Displacement only requires special attention. */
13043 if (CONST_INT_P (disp))
13045 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13046 fputs ("ds:", file);
13047 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13050 output_pic_addr_const (file, disp, 0);
13052 output_addr_const (file, disp);
13056 if (ASSEMBLER_DIALECT == ASM_ATT)
13061 output_pic_addr_const (file, disp, 0);
13062 else if (GET_CODE (disp) == LABEL_REF)
13063 output_asm_label (disp);
13065 output_addr_const (file, disp);
13070 print_reg (base, 0, file);
13074 print_reg (index, 0, file);
13076 fprintf (file, ",%d", scale);
13082 rtx offset = NULL_RTX;
13086 /* Pull out the offset of a symbol; print any symbol itself. */
13087 if (GET_CODE (disp) == CONST
13088 && GET_CODE (XEXP (disp, 0)) == PLUS
13089 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13091 offset = XEXP (XEXP (disp, 0), 1);
13092 disp = gen_rtx_CONST (VOIDmode,
13093 XEXP (XEXP (disp, 0), 0));
13097 output_pic_addr_const (file, disp, 0);
13098 else if (GET_CODE (disp) == LABEL_REF)
13099 output_asm_label (disp);
13100 else if (CONST_INT_P (disp))
13103 output_addr_const (file, disp);
13109 print_reg (base, 0, file);
13112 if (INTVAL (offset) >= 0)
13114 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13118 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13125 print_reg (index, 0, file);
13127 fprintf (file, "*%d", scale);
13134 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13137 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13141 if (GET_CODE (x) != UNSPEC)
13144 op = XVECEXP (x, 0, 0);
13145 switch (XINT (x, 1))
13147 case UNSPEC_GOTTPOFF:
13148 output_addr_const (file, op);
13149 /* FIXME: This might be @TPOFF in Sun ld. */
13150 fputs ("@gottpoff", file);
13153 output_addr_const (file, op);
13154 fputs ("@tpoff", file);
13156 case UNSPEC_NTPOFF:
13157 output_addr_const (file, op);
13159 fputs ("@tpoff", file);
13161 fputs ("@ntpoff", file);
13163 case UNSPEC_DTPOFF:
13164 output_addr_const (file, op);
13165 fputs ("@dtpoff", file);
13167 case UNSPEC_GOTNTPOFF:
13168 output_addr_const (file, op);
13170 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13171 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13173 fputs ("@gotntpoff", file);
13175 case UNSPEC_INDNTPOFF:
13176 output_addr_const (file, op);
13177 fputs ("@indntpoff", file);
13180 case UNSPEC_MACHOPIC_OFFSET:
13181 output_addr_const (file, op);
13183 machopic_output_function_base_name (file);
13194 /* Split one or more DImode RTL references into pairs of SImode
13195 references. The RTL can be REG, offsettable MEM, integer constant, or
13196 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13197 split and "num" is its length. lo_half and hi_half are output arrays
13198 that parallel "operands". */
13201 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13205 rtx op = operands[num];
13207 /* simplify_subreg refuse to split volatile memory addresses,
13208 but we still have to handle it. */
13211 lo_half[num] = adjust_address (op, SImode, 0);
13212 hi_half[num] = adjust_address (op, SImode, 4);
13216 lo_half[num] = simplify_gen_subreg (SImode, op,
13217 GET_MODE (op) == VOIDmode
13218 ? DImode : GET_MODE (op), 0);
13219 hi_half[num] = simplify_gen_subreg (SImode, op,
13220 GET_MODE (op) == VOIDmode
13221 ? DImode : GET_MODE (op), 4);
13225 /* Split one or more TImode RTL references into pairs of DImode
13226 references. The RTL can be REG, offsettable MEM, integer constant, or
13227 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13228 split and "num" is its length. lo_half and hi_half are output arrays
13229 that parallel "operands". */
13232 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13236 rtx op = operands[num];
13238 /* simplify_subreg refuse to split volatile memory addresses, but we
13239 still have to handle it. */
13242 lo_half[num] = adjust_address (op, DImode, 0);
13243 hi_half[num] = adjust_address (op, DImode, 8);
13247 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
13248 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
13253 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13254 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13255 is the expression of the binary operation. The output may either be
13256 emitted here, or returned to the caller, like all output_* functions.
13258 There is no guarantee that the operands are the same mode, as they
13259 might be within FLOAT or FLOAT_EXTEND expressions. */
13261 #ifndef SYSV386_COMPAT
13262 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13263 wants to fix the assemblers because that causes incompatibility
13264 with gcc. No-one wants to fix gcc because that causes
13265 incompatibility with assemblers... You can use the option of
13266 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13267 #define SYSV386_COMPAT 1
13271 output_387_binary_op (rtx insn, rtx *operands)
13273 static char buf[40];
13276 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13278 #ifdef ENABLE_CHECKING
13279 /* Even if we do not want to check the inputs, this documents input
13280 constraints. Which helps in understanding the following code. */
13281 if (STACK_REG_P (operands[0])
13282 && ((REG_P (operands[1])
13283 && REGNO (operands[0]) == REGNO (operands[1])
13284 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13285 || (REG_P (operands[2])
13286 && REGNO (operands[0]) == REGNO (operands[2])
13287 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13288 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13291 gcc_assert (is_sse);
13294 switch (GET_CODE (operands[3]))
13297 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13298 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13306 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13307 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13315 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13316 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13324 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13325 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13333 gcc_unreachable ();
13340 strcpy (buf, ssep);
13341 if (GET_MODE (operands[0]) == SFmode)
13342 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13344 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13348 strcpy (buf, ssep + 1);
13349 if (GET_MODE (operands[0]) == SFmode)
13350 strcat (buf, "ss\t{%2, %0|%0, %2}");
13352 strcat (buf, "sd\t{%2, %0|%0, %2}");
13358 switch (GET_CODE (operands[3]))
13362 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13364 rtx temp = operands[2];
13365 operands[2] = operands[1];
13366 operands[1] = temp;
13369 /* know operands[0] == operands[1]. */
13371 if (MEM_P (operands[2]))
13377 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13379 if (STACK_TOP_P (operands[0]))
13380 /* How is it that we are storing to a dead operand[2]?
13381 Well, presumably operands[1] is dead too. We can't
13382 store the result to st(0) as st(0) gets popped on this
13383 instruction. Instead store to operands[2] (which I
13384 think has to be st(1)). st(1) will be popped later.
13385 gcc <= 2.8.1 didn't have this check and generated
13386 assembly code that the Unixware assembler rejected. */
13387 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13389 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13393 if (STACK_TOP_P (operands[0]))
13394 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13396 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13401 if (MEM_P (operands[1]))
13407 if (MEM_P (operands[2]))
13413 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13416 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13417 derived assemblers, confusingly reverse the direction of
13418 the operation for fsub{r} and fdiv{r} when the
13419 destination register is not st(0). The Intel assembler
13420 doesn't have this brain damage. Read !SYSV386_COMPAT to
13421 figure out what the hardware really does. */
13422 if (STACK_TOP_P (operands[0]))
13423 p = "{p\t%0, %2|rp\t%2, %0}";
13425 p = "{rp\t%2, %0|p\t%0, %2}";
13427 if (STACK_TOP_P (operands[0]))
13428 /* As above for fmul/fadd, we can't store to st(0). */
13429 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13431 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13436 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13439 if (STACK_TOP_P (operands[0]))
13440 p = "{rp\t%0, %1|p\t%1, %0}";
13442 p = "{p\t%1, %0|rp\t%0, %1}";
13444 if (STACK_TOP_P (operands[0]))
13445 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13447 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13452 if (STACK_TOP_P (operands[0]))
13454 if (STACK_TOP_P (operands[1]))
13455 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13457 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13460 else if (STACK_TOP_P (operands[1]))
13463 p = "{\t%1, %0|r\t%0, %1}";
13465 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13471 p = "{r\t%2, %0|\t%0, %2}";
13473 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13479 gcc_unreachable ();
13486 /* Return needed mode for entity in optimize_mode_switching pass. */
13489 ix86_mode_needed (int entity, rtx insn)
13491 enum attr_i387_cw mode;
13493 /* The mode UNINITIALIZED is used to store control word after a
13494 function call or ASM pattern. The mode ANY specify that function
13495 has no requirements on the control word and make no changes in the
13496 bits we are interested in. */
13499 || (NONJUMP_INSN_P (insn)
13500 && (asm_noperands (PATTERN (insn)) >= 0
13501 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13502 return I387_CW_UNINITIALIZED;
13504 if (recog_memoized (insn) < 0)
13505 return I387_CW_ANY;
13507 mode = get_attr_i387_cw (insn);
13512 if (mode == I387_CW_TRUNC)
13517 if (mode == I387_CW_FLOOR)
13522 if (mode == I387_CW_CEIL)
13527 if (mode == I387_CW_MASK_PM)
13532 gcc_unreachable ();
13535 return I387_CW_ANY;
13538 /* Output code to initialize control word copies used by trunc?f?i and
13539 rounding patterns. CURRENT_MODE is set to current control word,
13540 while NEW_MODE is set to new control word. */
13543 emit_i387_cw_initialization (int mode)
13545 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13548 enum ix86_stack_slot slot;
13550 rtx reg = gen_reg_rtx (HImode);
13552 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13553 emit_move_insn (reg, copy_rtx (stored_mode));
13555 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13556 || optimize_function_for_size_p (cfun))
13560 case I387_CW_TRUNC:
13561 /* round toward zero (truncate) */
13562 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13563 slot = SLOT_CW_TRUNC;
13566 case I387_CW_FLOOR:
13567 /* round down toward -oo */
13568 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13569 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
13570 slot = SLOT_CW_FLOOR;
13574 /* round up toward +oo */
13575 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13576 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
13577 slot = SLOT_CW_CEIL;
13580 case I387_CW_MASK_PM:
13581 /* mask precision exception for nearbyint() */
13582 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13583 slot = SLOT_CW_MASK_PM;
13587 gcc_unreachable ();
13594 case I387_CW_TRUNC:
13595 /* round toward zero (truncate) */
13596 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
13597 slot = SLOT_CW_TRUNC;
13600 case I387_CW_FLOOR:
13601 /* round down toward -oo */
13602 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
13603 slot = SLOT_CW_FLOOR;
13607 /* round up toward +oo */
13608 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
13609 slot = SLOT_CW_CEIL;
13612 case I387_CW_MASK_PM:
13613 /* mask precision exception for nearbyint() */
13614 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13615 slot = SLOT_CW_MASK_PM;
13619 gcc_unreachable ();
13623 gcc_assert (slot < MAX_386_STACK_LOCALS);
13625 new_mode = assign_386_stack_local (HImode, slot);
13626 emit_move_insn (new_mode, reg);
13629 /* Output code for INSN to convert a float to a signed int. OPERANDS
13630 are the insn operands. The output may be [HSD]Imode and the input
13631 operand may be [SDX]Fmode. */
13634 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
13636 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13637 int dimode_p = GET_MODE (operands[0]) == DImode;
13638 int round_mode = get_attr_i387_cw (insn);
13640 /* Jump through a hoop or two for DImode, since the hardware has no
13641 non-popping instruction. We used to do this a different way, but
13642 that was somewhat fragile and broke with post-reload splitters. */
13643 if ((dimode_p || fisttp) && !stack_top_dies)
13644 output_asm_insn ("fld\t%y1", operands);
13646 gcc_assert (STACK_TOP_P (operands[1]));
13647 gcc_assert (MEM_P (operands[0]));
13648 gcc_assert (GET_MODE (operands[1]) != TFmode);
13651 output_asm_insn ("fisttp%Z0\t%0", operands);
13654 if (round_mode != I387_CW_ANY)
13655 output_asm_insn ("fldcw\t%3", operands);
13656 if (stack_top_dies || dimode_p)
13657 output_asm_insn ("fistp%Z0\t%0", operands);
13659 output_asm_insn ("fist%Z0\t%0", operands);
13660 if (round_mode != I387_CW_ANY)
13661 output_asm_insn ("fldcw\t%2", operands);
13667 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13668 have the values zero or one, indicates the ffreep insn's operand
13669 from the OPERANDS array. */
13671 static const char *
13672 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
13674 if (TARGET_USE_FFREEP)
13675 #ifdef HAVE_AS_IX86_FFREEP
13676 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
13679 static char retval[32];
13680 int regno = REGNO (operands[opno]);
13682 gcc_assert (FP_REGNO_P (regno));
13684 regno -= FIRST_STACK_REG;
13686 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
13691 return opno ? "fstp\t%y1" : "fstp\t%y0";
13695 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13696 should be used. UNORDERED_P is true when fucom should be used. */
13699 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
13701 int stack_top_dies;
13702 rtx cmp_op0, cmp_op1;
13703 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
13707 cmp_op0 = operands[0];
13708 cmp_op1 = operands[1];
13712 cmp_op0 = operands[1];
13713 cmp_op1 = operands[2];
13718 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
13719 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
13720 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
13721 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
13723 if (GET_MODE (operands[0]) == SFmode)
13725 return &ucomiss[TARGET_AVX ? 0 : 1];
13727 return &comiss[TARGET_AVX ? 0 : 1];
13730 return &ucomisd[TARGET_AVX ? 0 : 1];
13732 return &comisd[TARGET_AVX ? 0 : 1];
13735 gcc_assert (STACK_TOP_P (cmp_op0));
13737 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13739 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
13741 if (stack_top_dies)
13743 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
13744 return output_387_ffreep (operands, 1);
13747 return "ftst\n\tfnstsw\t%0";
13750 if (STACK_REG_P (cmp_op1)
13752 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
13753 && REGNO (cmp_op1) != FIRST_STACK_REG)
13755 /* If both the top of the 387 stack dies, and the other operand
13756 is also a stack register that dies, then this must be a
13757 `fcompp' float compare */
13761 /* There is no double popping fcomi variant. Fortunately,
13762 eflags is immune from the fstp's cc clobbering. */
13764 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
13766 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
13767 return output_387_ffreep (operands, 0);
13772 return "fucompp\n\tfnstsw\t%0";
13774 return "fcompp\n\tfnstsw\t%0";
13779 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
13781 static const char * const alt[16] =
13783 "fcom%Z2\t%y2\n\tfnstsw\t%0",
13784 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
13785 "fucom%Z2\t%y2\n\tfnstsw\t%0",
13786 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
13788 "ficom%Z2\t%y2\n\tfnstsw\t%0",
13789 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
13793 "fcomi\t{%y1, %0|%0, %y1}",
13794 "fcomip\t{%y1, %0|%0, %y1}",
13795 "fucomi\t{%y1, %0|%0, %y1}",
13796 "fucomip\t{%y1, %0|%0, %y1}",
13807 mask = eflags_p << 3;
13808 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
13809 mask |= unordered_p << 1;
13810 mask |= stack_top_dies;
13812 gcc_assert (mask < 16);
13821 ix86_output_addr_vec_elt (FILE *file, int value)
13823 const char *directive = ASM_LONG;
13827 directive = ASM_QUAD;
13829 gcc_assert (!TARGET_64BIT);
13832 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
13836 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
13838 const char *directive = ASM_LONG;
13841 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
13842 directive = ASM_QUAD;
13844 gcc_assert (!TARGET_64BIT);
13846 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
13847 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
13848 fprintf (file, "%s%s%d-%s%d\n",
13849 directive, LPREFIX, value, LPREFIX, rel);
13850 else if (HAVE_AS_GOTOFF_IN_DATA)
13851 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
13853 else if (TARGET_MACHO)
13855 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
13856 machopic_output_function_base_name (file);
13861 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
13862 GOT_SYMBOL_NAME, LPREFIX, value);
13865 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
13869 ix86_expand_clear (rtx dest)
13873 /* We play register width games, which are only valid after reload. */
13874 gcc_assert (reload_completed);
13876 /* Avoid HImode and its attendant prefix byte. */
13877 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
13878 dest = gen_rtx_REG (SImode, REGNO (dest));
13879 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
13881 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
13882 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
13884 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13885 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
13891 /* X is an unchanging MEM. If it is a constant pool reference, return
13892 the constant pool rtx, else NULL. */
13895 maybe_get_pool_constant (rtx x)
13897 x = ix86_delegitimize_address (XEXP (x, 0));
13899 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
13900 return get_pool_constant (x);
13906 ix86_expand_move (enum machine_mode mode, rtx operands[])
13909 enum tls_model model;
13914 if (GET_CODE (op1) == SYMBOL_REF)
13916 model = SYMBOL_REF_TLS_MODEL (op1);
13919 op1 = legitimize_tls_address (op1, model, true);
13920 op1 = force_operand (op1, op0);
13924 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13925 && SYMBOL_REF_DLLIMPORT_P (op1))
13926 op1 = legitimize_dllimport_symbol (op1, false);
13928 else if (GET_CODE (op1) == CONST
13929 && GET_CODE (XEXP (op1, 0)) == PLUS
13930 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
13932 rtx addend = XEXP (XEXP (op1, 0), 1);
13933 rtx symbol = XEXP (XEXP (op1, 0), 0);
13936 model = SYMBOL_REF_TLS_MODEL (symbol);
13938 tmp = legitimize_tls_address (symbol, model, true);
13939 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13940 && SYMBOL_REF_DLLIMPORT_P (symbol))
13941 tmp = legitimize_dllimport_symbol (symbol, true);
13945 tmp = force_operand (tmp, NULL);
13946 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
13947 op0, 1, OPTAB_DIRECT);
13953 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
13955 if (TARGET_MACHO && !TARGET_64BIT)
13960 rtx temp = ((reload_in_progress
13961 || ((op0 && REG_P (op0))
13963 ? op0 : gen_reg_rtx (Pmode));
13964 op1 = machopic_indirect_data_reference (op1, temp);
13965 op1 = machopic_legitimize_pic_address (op1, mode,
13966 temp == op1 ? 0 : temp);
13968 else if (MACHOPIC_INDIRECT)
13969 op1 = machopic_indirect_data_reference (op1, 0);
13977 op1 = force_reg (Pmode, op1);
13978 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
13980 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
13981 op1 = legitimize_pic_address (op1, reg);
13990 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
13991 || !push_operand (op0, mode))
13993 op1 = force_reg (mode, op1);
13995 if (push_operand (op0, mode)
13996 && ! general_no_elim_operand (op1, mode))
13997 op1 = copy_to_mode_reg (mode, op1);
13999 /* Force large constants in 64bit compilation into register
14000 to get them CSEed. */
14001 if (can_create_pseudo_p ()
14002 && (mode == DImode) && TARGET_64BIT
14003 && immediate_operand (op1, mode)
14004 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14005 && !register_operand (op0, mode)
14007 op1 = copy_to_mode_reg (mode, op1);
14009 if (can_create_pseudo_p ()
14010 && FLOAT_MODE_P (mode)
14011 && GET_CODE (op1) == CONST_DOUBLE)
14013 /* If we are loading a floating point constant to a register,
14014 force the value to memory now, since we'll get better code
14015 out the back end. */
14017 op1 = validize_mem (force_const_mem (mode, op1));
14018 if (!register_operand (op0, mode))
14020 rtx temp = gen_reg_rtx (mode);
14021 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14022 emit_move_insn (op0, temp);
14028 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14032 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14034 rtx op0 = operands[0], op1 = operands[1];
14035 unsigned int align = GET_MODE_ALIGNMENT (mode);
14037 /* Force constants other than zero into memory. We do not know how
14038 the instructions used to build constants modify the upper 64 bits
14039 of the register, once we have that information we may be able
14040 to handle some of them more efficiently. */
14041 if (can_create_pseudo_p ()
14042 && register_operand (op0, mode)
14043 && (CONSTANT_P (op1)
14044 || (GET_CODE (op1) == SUBREG
14045 && CONSTANT_P (SUBREG_REG (op1))))
14046 && !standard_sse_constant_p (op1))
14047 op1 = validize_mem (force_const_mem (mode, op1));
14049 /* We need to check memory alignment for SSE mode since attribute
14050 can make operands unaligned. */
14051 if (can_create_pseudo_p ()
14052 && SSE_REG_MODE_P (mode)
14053 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14054 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14058 /* ix86_expand_vector_move_misalign() does not like constants ... */
14059 if (CONSTANT_P (op1)
14060 || (GET_CODE (op1) == SUBREG
14061 && CONSTANT_P (SUBREG_REG (op1))))
14062 op1 = validize_mem (force_const_mem (mode, op1));
14064 /* ... nor both arguments in memory. */
14065 if (!register_operand (op0, mode)
14066 && !register_operand (op1, mode))
14067 op1 = force_reg (mode, op1);
14069 tmp[0] = op0; tmp[1] = op1;
14070 ix86_expand_vector_move_misalign (mode, tmp);
14074 /* Make operand1 a register if it isn't already. */
14075 if (can_create_pseudo_p ()
14076 && !register_operand (op0, mode)
14077 && !register_operand (op1, mode))
14079 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14083 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14086 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14087 straight to ix86_expand_vector_move. */
14088 /* Code generation for scalar reg-reg moves of single and double precision data:
14089 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14093 if (x86_sse_partial_reg_dependency == true)
14098 Code generation for scalar loads of double precision data:
14099 if (x86_sse_split_regs == true)
14100 movlpd mem, reg (gas syntax)
14104 Code generation for unaligned packed loads of single precision data
14105 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14106 if (x86_sse_unaligned_move_optimal)
14109 if (x86_sse_partial_reg_dependency == true)
14121 Code generation for unaligned packed loads of double precision data
14122 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14123 if (x86_sse_unaligned_move_optimal)
14126 if (x86_sse_split_regs == true)
14139 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14148 switch (GET_MODE_CLASS (mode))
14150 case MODE_VECTOR_INT:
14152 switch (GET_MODE_SIZE (mode))
14155 /* If we're optimizing for size, movups is the smallest. */
14156 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14158 op0 = gen_lowpart (V4SFmode, op0);
14159 op1 = gen_lowpart (V4SFmode, op1);
14160 emit_insn (gen_avx_movups (op0, op1));
14163 op0 = gen_lowpart (V16QImode, op0);
14164 op1 = gen_lowpart (V16QImode, op1);
14165 emit_insn (gen_avx_movdqu (op0, op1));
14168 op0 = gen_lowpart (V32QImode, op0);
14169 op1 = gen_lowpart (V32QImode, op1);
14170 emit_insn (gen_avx_movdqu256 (op0, op1));
14173 gcc_unreachable ();
14176 case MODE_VECTOR_FLOAT:
14177 op0 = gen_lowpart (mode, op0);
14178 op1 = gen_lowpart (mode, op1);
14183 emit_insn (gen_avx_movups (op0, op1));
14186 emit_insn (gen_avx_movups256 (op0, op1));
14189 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14191 op0 = gen_lowpart (V4SFmode, op0);
14192 op1 = gen_lowpart (V4SFmode, op1);
14193 emit_insn (gen_avx_movups (op0, op1));
14196 emit_insn (gen_avx_movupd (op0, op1));
14199 emit_insn (gen_avx_movupd256 (op0, op1));
14202 gcc_unreachable ();
14207 gcc_unreachable ();
14215 /* If we're optimizing for size, movups is the smallest. */
14216 if (optimize_insn_for_size_p ()
14217 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14219 op0 = gen_lowpart (V4SFmode, op0);
14220 op1 = gen_lowpart (V4SFmode, op1);
14221 emit_insn (gen_sse_movups (op0, op1));
14225 /* ??? If we have typed data, then it would appear that using
14226 movdqu is the only way to get unaligned data loaded with
14228 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14230 op0 = gen_lowpart (V16QImode, op0);
14231 op1 = gen_lowpart (V16QImode, op1);
14232 emit_insn (gen_sse2_movdqu (op0, op1));
14236 if (TARGET_SSE2 && mode == V2DFmode)
14240 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14242 op0 = gen_lowpart (V2DFmode, op0);
14243 op1 = gen_lowpart (V2DFmode, op1);
14244 emit_insn (gen_sse2_movupd (op0, op1));
14248 /* When SSE registers are split into halves, we can avoid
14249 writing to the top half twice. */
14250 if (TARGET_SSE_SPLIT_REGS)
14252 emit_clobber (op0);
14257 /* ??? Not sure about the best option for the Intel chips.
14258 The following would seem to satisfy; the register is
14259 entirely cleared, breaking the dependency chain. We
14260 then store to the upper half, with a dependency depth
14261 of one. A rumor has it that Intel recommends two movsd
14262 followed by an unpacklpd, but this is unconfirmed. And
14263 given that the dependency depth of the unpacklpd would
14264 still be one, I'm not sure why this would be better. */
14265 zero = CONST0_RTX (V2DFmode);
14268 m = adjust_address (op1, DFmode, 0);
14269 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14270 m = adjust_address (op1, DFmode, 8);
14271 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14275 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14277 op0 = gen_lowpart (V4SFmode, op0);
14278 op1 = gen_lowpart (V4SFmode, op1);
14279 emit_insn (gen_sse_movups (op0, op1));
14283 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14284 emit_move_insn (op0, CONST0_RTX (mode));
14286 emit_clobber (op0);
14288 if (mode != V4SFmode)
14289 op0 = gen_lowpart (V4SFmode, op0);
14290 m = adjust_address (op1, V2SFmode, 0);
14291 emit_insn (gen_sse_loadlps (op0, op0, m));
14292 m = adjust_address (op1, V2SFmode, 8);
14293 emit_insn (gen_sse_loadhps (op0, op0, m));
14296 else if (MEM_P (op0))
14298 /* If we're optimizing for size, movups is the smallest. */
14299 if (optimize_insn_for_size_p ()
14300 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14302 op0 = gen_lowpart (V4SFmode, op0);
14303 op1 = gen_lowpart (V4SFmode, op1);
14304 emit_insn (gen_sse_movups (op0, op1));
14308 /* ??? Similar to above, only less clear because of quote
14309 typeless stores unquote. */
14310 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14311 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14313 op0 = gen_lowpart (V16QImode, op0);
14314 op1 = gen_lowpart (V16QImode, op1);
14315 emit_insn (gen_sse2_movdqu (op0, op1));
14319 if (TARGET_SSE2 && mode == V2DFmode)
14321 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14323 op0 = gen_lowpart (V2DFmode, op0);
14324 op1 = gen_lowpart (V2DFmode, op1);
14325 emit_insn (gen_sse2_movupd (op0, op1));
14329 m = adjust_address (op0, DFmode, 0);
14330 emit_insn (gen_sse2_storelpd (m, op1));
14331 m = adjust_address (op0, DFmode, 8);
14332 emit_insn (gen_sse2_storehpd (m, op1));
14337 if (mode != V4SFmode)
14338 op1 = gen_lowpart (V4SFmode, op1);
14340 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14342 op0 = gen_lowpart (V4SFmode, op0);
14343 emit_insn (gen_sse_movups (op0, op1));
14347 m = adjust_address (op0, V2SFmode, 0);
14348 emit_insn (gen_sse_storelps (m, op1));
14349 m = adjust_address (op0, V2SFmode, 8);
14350 emit_insn (gen_sse_storehps (m, op1));
14355 gcc_unreachable ();
14358 /* Expand a push in MODE. This is some mode for which we do not support
14359 proper push instructions, at least from the registers that we expect
14360 the value to live in. */
14363 ix86_expand_push (enum machine_mode mode, rtx x)
14367 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14368 GEN_INT (-GET_MODE_SIZE (mode)),
14369 stack_pointer_rtx, 1, OPTAB_DIRECT);
14370 if (tmp != stack_pointer_rtx)
14371 emit_move_insn (stack_pointer_rtx, tmp);
14373 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14375 /* When we push an operand onto stack, it has to be aligned at least
14376 at the function argument boundary. However since we don't have
14377 the argument type, we can't determine the actual argument
14379 emit_move_insn (tmp, x);
14382 /* Helper function of ix86_fixup_binary_operands to canonicalize
14383 operand order. Returns true if the operands should be swapped. */
14386 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14389 rtx dst = operands[0];
14390 rtx src1 = operands[1];
14391 rtx src2 = operands[2];
14393 /* If the operation is not commutative, we can't do anything. */
14394 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14397 /* Highest priority is that src1 should match dst. */
14398 if (rtx_equal_p (dst, src1))
14400 if (rtx_equal_p (dst, src2))
14403 /* Next highest priority is that immediate constants come second. */
14404 if (immediate_operand (src2, mode))
14406 if (immediate_operand (src1, mode))
14409 /* Lowest priority is that memory references should come second. */
14419 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14420 destination to use for the operation. If different from the true
14421 destination in operands[0], a copy operation will be required. */
14424 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14427 rtx dst = operands[0];
14428 rtx src1 = operands[1];
14429 rtx src2 = operands[2];
14431 /* Canonicalize operand order. */
14432 if (ix86_swap_binary_operands_p (code, mode, operands))
14436 /* It is invalid to swap operands of different modes. */
14437 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14444 /* Both source operands cannot be in memory. */
14445 if (MEM_P (src1) && MEM_P (src2))
14447 /* Optimization: Only read from memory once. */
14448 if (rtx_equal_p (src1, src2))
14450 src2 = force_reg (mode, src2);
14454 src2 = force_reg (mode, src2);
14457 /* If the destination is memory, and we do not have matching source
14458 operands, do things in registers. */
14459 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14460 dst = gen_reg_rtx (mode);
14462 /* Source 1 cannot be a constant. */
14463 if (CONSTANT_P (src1))
14464 src1 = force_reg (mode, src1);
14466 /* Source 1 cannot be a non-matching memory. */
14467 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14468 src1 = force_reg (mode, src1);
14470 operands[1] = src1;
14471 operands[2] = src2;
14475 /* Similarly, but assume that the destination has already been
14476 set up properly. */
14479 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14480 enum machine_mode mode, rtx operands[])
14482 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14483 gcc_assert (dst == operands[0]);
14486 /* Attempt to expand a binary operator. Make the expansion closer to the
14487 actual machine, then just general_operand, which will allow 3 separate
14488 memory references (one output, two input) in a single insn. */
14491 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14494 rtx src1, src2, dst, op, clob;
14496 dst = ix86_fixup_binary_operands (code, mode, operands);
14497 src1 = operands[1];
14498 src2 = operands[2];
14500 /* Emit the instruction. */
14502 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14503 if (reload_in_progress)
14505 /* Reload doesn't know about the flags register, and doesn't know that
14506 it doesn't want to clobber it. We can only do this with PLUS. */
14507 gcc_assert (code == PLUS);
14512 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14513 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14516 /* Fix up the destination if needed. */
14517 if (dst != operands[0])
14518 emit_move_insn (operands[0], dst);
14521 /* Return TRUE or FALSE depending on whether the binary operator meets the
14522 appropriate constraints. */
14525 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14528 rtx dst = operands[0];
14529 rtx src1 = operands[1];
14530 rtx src2 = operands[2];
14532 /* Both source operands cannot be in memory. */
14533 if (MEM_P (src1) && MEM_P (src2))
14536 /* Canonicalize operand order for commutative operators. */
14537 if (ix86_swap_binary_operands_p (code, mode, operands))
14544 /* If the destination is memory, we must have a matching source operand. */
14545 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14548 /* Source 1 cannot be a constant. */
14549 if (CONSTANT_P (src1))
14552 /* Source 1 cannot be a non-matching memory. */
14553 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14559 /* Attempt to expand a unary operator. Make the expansion closer to the
14560 actual machine, then just general_operand, which will allow 2 separate
14561 memory references (one output, one input) in a single insn. */
14564 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
14567 int matching_memory;
14568 rtx src, dst, op, clob;
14573 /* If the destination is memory, and we do not have matching source
14574 operands, do things in registers. */
14575 matching_memory = 0;
14578 if (rtx_equal_p (dst, src))
14579 matching_memory = 1;
14581 dst = gen_reg_rtx (mode);
14584 /* When source operand is memory, destination must match. */
14585 if (MEM_P (src) && !matching_memory)
14586 src = force_reg (mode, src);
14588 /* Emit the instruction. */
14590 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
14591 if (reload_in_progress || code == NOT)
14593 /* Reload doesn't know about the flags register, and doesn't know that
14594 it doesn't want to clobber it. */
14595 gcc_assert (code == NOT);
14600 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14601 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14604 /* Fix up the destination if needed. */
14605 if (dst != operands[0])
14606 emit_move_insn (operands[0], dst);
14609 #define LEA_SEARCH_THRESHOLD 12
14611 /* Search backward for non-agu definition of register number REGNO1
14612 or register number REGNO2 in INSN's basic block until
14613 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14614 2. Reach BB boundary, or
14615 3. Reach agu definition.
14616 Returns the distance between the non-agu definition point and INSN.
14617 If no definition point, returns -1. */
14620 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14623 basic_block bb = BLOCK_FOR_INSN (insn);
14626 enum attr_type insn_type;
14628 if (insn != BB_HEAD (bb))
14630 rtx prev = PREV_INSN (insn);
14631 while (prev && distance < LEA_SEARCH_THRESHOLD)
14633 if (NONDEBUG_INSN_P (prev))
14636 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14637 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14638 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14639 && (regno1 == DF_REF_REGNO (*def_rec)
14640 || regno2 == DF_REF_REGNO (*def_rec)))
14642 insn_type = get_attr_type (prev);
14643 if (insn_type != TYPE_LEA)
14647 if (prev == BB_HEAD (bb))
14649 prev = PREV_INSN (prev);
14653 if (distance < LEA_SEARCH_THRESHOLD)
14657 bool simple_loop = false;
14659 FOR_EACH_EDGE (e, ei, bb->preds)
14662 simple_loop = true;
14668 rtx prev = BB_END (bb);
14671 && distance < LEA_SEARCH_THRESHOLD)
14673 if (NONDEBUG_INSN_P (prev))
14676 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14677 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14678 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14679 && (regno1 == DF_REF_REGNO (*def_rec)
14680 || regno2 == DF_REF_REGNO (*def_rec)))
14682 insn_type = get_attr_type (prev);
14683 if (insn_type != TYPE_LEA)
14687 prev = PREV_INSN (prev);
14695 /* get_attr_type may modify recog data. We want to make sure
14696 that recog data is valid for instruction INSN, on which
14697 distance_non_agu_define is called. INSN is unchanged here. */
14698 extract_insn_cached (insn);
14702 /* Return the distance between INSN and the next insn that uses
14703 register number REGNO0 in memory address. Return -1 if no such
14704 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14707 distance_agu_use (unsigned int regno0, rtx insn)
14709 basic_block bb = BLOCK_FOR_INSN (insn);
14714 if (insn != BB_END (bb))
14716 rtx next = NEXT_INSN (insn);
14717 while (next && distance < LEA_SEARCH_THRESHOLD)
14719 if (NONDEBUG_INSN_P (next))
14723 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14724 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14725 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14726 && regno0 == DF_REF_REGNO (*use_rec))
14728 /* Return DISTANCE if OP0 is used in memory
14729 address in NEXT. */
14733 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14734 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14735 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14736 && regno0 == DF_REF_REGNO (*def_rec))
14738 /* Return -1 if OP0 is set in NEXT. */
14742 if (next == BB_END (bb))
14744 next = NEXT_INSN (next);
14748 if (distance < LEA_SEARCH_THRESHOLD)
14752 bool simple_loop = false;
14754 FOR_EACH_EDGE (e, ei, bb->succs)
14757 simple_loop = true;
14763 rtx next = BB_HEAD (bb);
14766 && distance < LEA_SEARCH_THRESHOLD)
14768 if (NONDEBUG_INSN_P (next))
14772 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14773 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14774 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14775 && regno0 == DF_REF_REGNO (*use_rec))
14777 /* Return DISTANCE if OP0 is used in memory
14778 address in NEXT. */
14782 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14783 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14784 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14785 && regno0 == DF_REF_REGNO (*def_rec))
14787 /* Return -1 if OP0 is set in NEXT. */
14792 next = NEXT_INSN (next);
14800 /* Define this macro to tune LEA priority vs ADD, it take effect when
14801 there is a dilemma of choicing LEA or ADD
14802 Negative value: ADD is more preferred than LEA
14804 Positive value: LEA is more preferred than ADD*/
14805 #define IX86_LEA_PRIORITY 2
14807 /* Return true if it is ok to optimize an ADD operation to LEA
14808 operation to avoid flag register consumation. For the processors
14809 like ATOM, if the destination register of LEA holds an actual
14810 address which will be used soon, LEA is better and otherwise ADD
14814 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14815 rtx insn, rtx operands[])
14817 unsigned int regno0 = true_regnum (operands[0]);
14818 unsigned int regno1 = true_regnum (operands[1]);
14819 unsigned int regno2;
14821 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14822 return regno0 != regno1;
14824 regno2 = true_regnum (operands[2]);
14826 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14827 if (regno0 != regno1 && regno0 != regno2)
14831 int dist_define, dist_use;
14832 dist_define = distance_non_agu_define (regno1, regno2, insn);
14833 if (dist_define <= 0)
14836 /* If this insn has both backward non-agu dependence and forward
14837 agu dependence, the one with short distance take effect. */
14838 dist_use = distance_agu_use (regno0, insn);
14840 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
14847 /* Return true if destination reg of SET_BODY is shift count of
14851 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14857 /* Retrieve destination of SET_BODY. */
14858 switch (GET_CODE (set_body))
14861 set_dest = SET_DEST (set_body);
14862 if (!set_dest || !REG_P (set_dest))
14866 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14867 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14875 /* Retrieve shift count of USE_BODY. */
14876 switch (GET_CODE (use_body))
14879 shift_rtx = XEXP (use_body, 1);
14882 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14883 if (ix86_dep_by_shift_count_body (set_body,
14884 XVECEXP (use_body, 0, i)))
14892 && (GET_CODE (shift_rtx) == ASHIFT
14893 || GET_CODE (shift_rtx) == LSHIFTRT
14894 || GET_CODE (shift_rtx) == ASHIFTRT
14895 || GET_CODE (shift_rtx) == ROTATE
14896 || GET_CODE (shift_rtx) == ROTATERT))
14898 rtx shift_count = XEXP (shift_rtx, 1);
14900 /* Return true if shift count is dest of SET_BODY. */
14901 if (REG_P (shift_count)
14902 && true_regnum (set_dest) == true_regnum (shift_count))
14909 /* Return true if destination reg of SET_INSN is shift count of
14913 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
14915 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
14916 PATTERN (use_insn));
14919 /* Return TRUE or FALSE depending on whether the unary operator meets the
14920 appropriate constraints. */
14923 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14924 enum machine_mode mode ATTRIBUTE_UNUSED,
14925 rtx operands[2] ATTRIBUTE_UNUSED)
14927 /* If one of operands is memory, source and destination must match. */
14928 if ((MEM_P (operands[0])
14929 || MEM_P (operands[1]))
14930 && ! rtx_equal_p (operands[0], operands[1]))
14935 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14936 are ok, keeping in mind the possible movddup alternative. */
14939 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
14941 if (MEM_P (operands[0]))
14942 return rtx_equal_p (operands[0], operands[1 + high]);
14943 if (MEM_P (operands[1]) && MEM_P (operands[2]))
14944 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
14948 /* Post-reload splitter for converting an SF or DFmode value in an
14949 SSE register into an unsigned SImode. */
14952 ix86_split_convert_uns_si_sse (rtx operands[])
14954 enum machine_mode vecmode;
14955 rtx value, large, zero_or_two31, input, two31, x;
14957 large = operands[1];
14958 zero_or_two31 = operands[2];
14959 input = operands[3];
14960 two31 = operands[4];
14961 vecmode = GET_MODE (large);
14962 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
14964 /* Load up the value into the low element. We must ensure that the other
14965 elements are valid floats -- zero is the easiest such value. */
14968 if (vecmode == V4SFmode)
14969 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
14971 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
14975 input = gen_rtx_REG (vecmode, REGNO (input));
14976 emit_move_insn (value, CONST0_RTX (vecmode));
14977 if (vecmode == V4SFmode)
14978 emit_insn (gen_sse_movss (value, value, input));
14980 emit_insn (gen_sse2_movsd (value, value, input));
14983 emit_move_insn (large, two31);
14984 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
14986 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
14987 emit_insn (gen_rtx_SET (VOIDmode, large, x));
14989 x = gen_rtx_AND (vecmode, zero_or_two31, large);
14990 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
14992 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
14993 emit_insn (gen_rtx_SET (VOIDmode, value, x));
14995 large = gen_rtx_REG (V4SImode, REGNO (large));
14996 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
14998 x = gen_rtx_REG (V4SImode, REGNO (value));
14999 if (vecmode == V4SFmode)
15000 emit_insn (gen_sse2_cvttps2dq (x, value));
15002 emit_insn (gen_sse2_cvttpd2dq (x, value));
15005 emit_insn (gen_xorv4si3 (value, value, large));
15008 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15009 Expects the 64-bit DImode to be supplied in a pair of integral
15010 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15011 -mfpmath=sse, !optimize_size only. */
15014 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15016 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15017 rtx int_xmm, fp_xmm;
15018 rtx biases, exponents;
15021 int_xmm = gen_reg_rtx (V4SImode);
15022 if (TARGET_INTER_UNIT_MOVES)
15023 emit_insn (gen_movdi_to_sse (int_xmm, input));
15024 else if (TARGET_SSE_SPLIT_REGS)
15026 emit_clobber (int_xmm);
15027 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15031 x = gen_reg_rtx (V2DImode);
15032 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15033 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15036 x = gen_rtx_CONST_VECTOR (V4SImode,
15037 gen_rtvec (4, GEN_INT (0x43300000UL),
15038 GEN_INT (0x45300000UL),
15039 const0_rtx, const0_rtx));
15040 exponents = validize_mem (force_const_mem (V4SImode, x));
15042 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15043 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15045 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15046 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15047 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15048 (0x1.0p84 + double(fp_value_hi_xmm)).
15049 Note these exponents differ by 32. */
15051 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15053 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15054 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15055 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15056 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15057 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15058 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15059 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15060 biases = validize_mem (force_const_mem (V2DFmode, biases));
15061 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15063 /* Add the upper and lower DFmode values together. */
15065 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15068 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15069 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15070 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15073 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15076 /* Not used, but eases macroization of patterns. */
15078 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15079 rtx input ATTRIBUTE_UNUSED)
15081 gcc_unreachable ();
15084 /* Convert an unsigned SImode value into a DFmode. Only currently used
15085 for SSE, but applicable anywhere. */
15088 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15090 REAL_VALUE_TYPE TWO31r;
15093 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15094 NULL, 1, OPTAB_DIRECT);
15096 fp = gen_reg_rtx (DFmode);
15097 emit_insn (gen_floatsidf2 (fp, x));
15099 real_ldexp (&TWO31r, &dconst1, 31);
15100 x = const_double_from_real_value (TWO31r, DFmode);
15102 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15104 emit_move_insn (target, x);
15107 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15108 32-bit mode; otherwise we have a direct convert instruction. */
15111 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15113 REAL_VALUE_TYPE TWO32r;
15114 rtx fp_lo, fp_hi, x;
15116 fp_lo = gen_reg_rtx (DFmode);
15117 fp_hi = gen_reg_rtx (DFmode);
15119 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15121 real_ldexp (&TWO32r, &dconst1, 32);
15122 x = const_double_from_real_value (TWO32r, DFmode);
15123 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15125 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15127 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15130 emit_move_insn (target, x);
15133 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15134 For x86_32, -mfpmath=sse, !optimize_size only. */
15136 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15138 REAL_VALUE_TYPE ONE16r;
15139 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15141 real_ldexp (&ONE16r, &dconst1, 16);
15142 x = const_double_from_real_value (ONE16r, SFmode);
15143 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15144 NULL, 0, OPTAB_DIRECT);
15145 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15146 NULL, 0, OPTAB_DIRECT);
15147 fp_hi = gen_reg_rtx (SFmode);
15148 fp_lo = gen_reg_rtx (SFmode);
15149 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15150 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15151 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15153 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15155 if (!rtx_equal_p (target, fp_hi))
15156 emit_move_insn (target, fp_hi);
15159 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15160 then replicate the value for all elements of the vector
15164 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15171 v = gen_rtvec (4, value, value, value, value);
15172 return gen_rtx_CONST_VECTOR (V4SImode, v);
15176 v = gen_rtvec (2, value, value);
15177 return gen_rtx_CONST_VECTOR (V2DImode, v);
15181 v = gen_rtvec (4, value, value, value, value);
15183 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15184 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15185 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15189 v = gen_rtvec (2, value, value);
15191 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15192 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15195 gcc_unreachable ();
15199 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15200 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15201 for an SSE register. If VECT is true, then replicate the mask for
15202 all elements of the vector register. If INVERT is true, then create
15203 a mask excluding the sign bit. */
15206 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15208 enum machine_mode vec_mode, imode;
15209 HOST_WIDE_INT hi, lo;
15214 /* Find the sign bit, sign extended to 2*HWI. */
15220 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15221 lo = 0x80000000, hi = lo < 0;
15227 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15228 if (HOST_BITS_PER_WIDE_INT >= 64)
15229 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15231 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15236 vec_mode = VOIDmode;
15237 if (HOST_BITS_PER_WIDE_INT >= 64)
15240 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15247 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15251 lo = ~lo, hi = ~hi;
15257 mask = immed_double_const (lo, hi, imode);
15259 vec = gen_rtvec (2, v, mask);
15260 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15261 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15268 gcc_unreachable ();
15272 lo = ~lo, hi = ~hi;
15274 /* Force this value into the low part of a fp vector constant. */
15275 mask = immed_double_const (lo, hi, imode);
15276 mask = gen_lowpart (mode, mask);
15278 if (vec_mode == VOIDmode)
15279 return force_reg (mode, mask);
15281 v = ix86_build_const_vector (mode, vect, mask);
15282 return force_reg (vec_mode, v);
15285 /* Generate code for floating point ABS or NEG. */
15288 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15291 rtx mask, set, use, clob, dst, src;
15292 bool use_sse = false;
15293 bool vector_mode = VECTOR_MODE_P (mode);
15294 enum machine_mode elt_mode = mode;
15298 elt_mode = GET_MODE_INNER (mode);
15301 else if (mode == TFmode)
15303 else if (TARGET_SSE_MATH)
15304 use_sse = SSE_FLOAT_MODE_P (mode);
15306 /* NEG and ABS performed with SSE use bitwise mask operations.
15307 Create the appropriate mask now. */
15309 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15318 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15319 set = gen_rtx_SET (VOIDmode, dst, set);
15324 set = gen_rtx_fmt_e (code, mode, src);
15325 set = gen_rtx_SET (VOIDmode, dst, set);
15328 use = gen_rtx_USE (VOIDmode, mask);
15329 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15330 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15331 gen_rtvec (3, set, use, clob)));
15338 /* Expand a copysign operation. Special case operand 0 being a constant. */
15341 ix86_expand_copysign (rtx operands[])
15343 enum machine_mode mode;
15344 rtx dest, op0, op1, mask, nmask;
15346 dest = operands[0];
15350 mode = GET_MODE (dest);
15352 if (GET_CODE (op0) == CONST_DOUBLE)
15354 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15356 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15357 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15359 if (mode == SFmode || mode == DFmode)
15361 enum machine_mode vmode;
15363 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15365 if (op0 == CONST0_RTX (mode))
15366 op0 = CONST0_RTX (vmode);
15369 rtx v = ix86_build_const_vector (mode, false, op0);
15371 op0 = force_reg (vmode, v);
15374 else if (op0 != CONST0_RTX (mode))
15375 op0 = force_reg (mode, op0);
15377 mask = ix86_build_signbit_mask (mode, 0, 0);
15379 if (mode == SFmode)
15380 copysign_insn = gen_copysignsf3_const;
15381 else if (mode == DFmode)
15382 copysign_insn = gen_copysigndf3_const;
15384 copysign_insn = gen_copysigntf3_const;
15386 emit_insn (copysign_insn (dest, op0, op1, mask));
15390 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15392 nmask = ix86_build_signbit_mask (mode, 0, 1);
15393 mask = ix86_build_signbit_mask (mode, 0, 0);
15395 if (mode == SFmode)
15396 copysign_insn = gen_copysignsf3_var;
15397 else if (mode == DFmode)
15398 copysign_insn = gen_copysigndf3_var;
15400 copysign_insn = gen_copysigntf3_var;
15402 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15406 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15407 be a constant, and so has already been expanded into a vector constant. */
15410 ix86_split_copysign_const (rtx operands[])
15412 enum machine_mode mode, vmode;
15413 rtx dest, op0, mask, x;
15415 dest = operands[0];
15417 mask = operands[3];
15419 mode = GET_MODE (dest);
15420 vmode = GET_MODE (mask);
15422 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15423 x = gen_rtx_AND (vmode, dest, mask);
15424 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15426 if (op0 != CONST0_RTX (vmode))
15428 x = gen_rtx_IOR (vmode, dest, op0);
15429 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15433 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15434 so we have to do two masks. */
15437 ix86_split_copysign_var (rtx operands[])
15439 enum machine_mode mode, vmode;
15440 rtx dest, scratch, op0, op1, mask, nmask, x;
15442 dest = operands[0];
15443 scratch = operands[1];
15446 nmask = operands[4];
15447 mask = operands[5];
15449 mode = GET_MODE (dest);
15450 vmode = GET_MODE (mask);
15452 if (rtx_equal_p (op0, op1))
15454 /* Shouldn't happen often (it's useless, obviously), but when it does
15455 we'd generate incorrect code if we continue below. */
15456 emit_move_insn (dest, op0);
15460 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15462 gcc_assert (REGNO (op1) == REGNO (scratch));
15464 x = gen_rtx_AND (vmode, scratch, mask);
15465 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15468 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15469 x = gen_rtx_NOT (vmode, dest);
15470 x = gen_rtx_AND (vmode, x, op0);
15471 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15475 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
15477 x = gen_rtx_AND (vmode, scratch, mask);
15479 else /* alternative 2,4 */
15481 gcc_assert (REGNO (mask) == REGNO (scratch));
15482 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
15483 x = gen_rtx_AND (vmode, scratch, op1);
15485 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15487 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
15489 dest = simplify_gen_subreg (vmode, op0, mode, 0);
15490 x = gen_rtx_AND (vmode, dest, nmask);
15492 else /* alternative 3,4 */
15494 gcc_assert (REGNO (nmask) == REGNO (dest));
15496 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15497 x = gen_rtx_AND (vmode, dest, op0);
15499 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15502 x = gen_rtx_IOR (vmode, dest, scratch);
15503 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15506 /* Return TRUE or FALSE depending on whether the first SET in INSN
15507 has source and destination with matching CC modes, and that the
15508 CC mode is at least as constrained as REQ_MODE. */
15511 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
15514 enum machine_mode set_mode;
15516 set = PATTERN (insn);
15517 if (GET_CODE (set) == PARALLEL)
15518 set = XVECEXP (set, 0, 0);
15519 gcc_assert (GET_CODE (set) == SET);
15520 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15522 set_mode = GET_MODE (SET_DEST (set));
15526 if (req_mode != CCNOmode
15527 && (req_mode != CCmode
15528 || XEXP (SET_SRC (set), 1) != const0_rtx))
15532 if (req_mode == CCGCmode)
15536 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15540 if (req_mode == CCZmode)
15551 gcc_unreachable ();
15554 return (GET_MODE (SET_SRC (set)) == set_mode);
15557 /* Generate insn patterns to do an integer compare of OPERANDS. */
15560 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
15562 enum machine_mode cmpmode;
15565 cmpmode = SELECT_CC_MODE (code, op0, op1);
15566 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
15568 /* This is very simple, but making the interface the same as in the
15569 FP case makes the rest of the code easier. */
15570 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
15571 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
15573 /* Return the test that should be put into the flags user, i.e.
15574 the bcc, scc, or cmov instruction. */
15575 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
15578 /* Figure out whether to use ordered or unordered fp comparisons.
15579 Return the appropriate mode to use. */
15582 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
15584 /* ??? In order to make all comparisons reversible, we do all comparisons
15585 non-trapping when compiling for IEEE. Once gcc is able to distinguish
15586 all forms trapping and nontrapping comparisons, we can make inequality
15587 comparisons trapping again, since it results in better code when using
15588 FCOM based compares. */
15589 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
15593 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15595 enum machine_mode mode = GET_MODE (op0);
15597 if (SCALAR_FLOAT_MODE_P (mode))
15599 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15600 return ix86_fp_compare_mode (code);
15605 /* Only zero flag is needed. */
15606 case EQ: /* ZF=0 */
15607 case NE: /* ZF!=0 */
15609 /* Codes needing carry flag. */
15610 case GEU: /* CF=0 */
15611 case LTU: /* CF=1 */
15612 /* Detect overflow checks. They need just the carry flag. */
15613 if (GET_CODE (op0) == PLUS
15614 && rtx_equal_p (op1, XEXP (op0, 0)))
15618 case GTU: /* CF=0 & ZF=0 */
15619 case LEU: /* CF=1 | ZF=1 */
15620 /* Detect overflow checks. They need just the carry flag. */
15621 if (GET_CODE (op0) == MINUS
15622 && rtx_equal_p (op1, XEXP (op0, 0)))
15626 /* Codes possibly doable only with sign flag when
15627 comparing against zero. */
15628 case GE: /* SF=OF or SF=0 */
15629 case LT: /* SF<>OF or SF=1 */
15630 if (op1 == const0_rtx)
15633 /* For other cases Carry flag is not required. */
15635 /* Codes doable only with sign flag when comparing
15636 against zero, but we miss jump instruction for it
15637 so we need to use relational tests against overflow
15638 that thus needs to be zero. */
15639 case GT: /* ZF=0 & SF=OF */
15640 case LE: /* ZF=1 | SF<>OF */
15641 if (op1 == const0_rtx)
15645 /* strcmp pattern do (use flags) and combine may ask us for proper
15650 gcc_unreachable ();
15654 /* Return the fixed registers used for condition codes. */
15657 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15664 /* If two condition code modes are compatible, return a condition code
15665 mode which is compatible with both. Otherwise, return
15668 static enum machine_mode
15669 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
15674 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15677 if ((m1 == CCGCmode && m2 == CCGOCmode)
15678 || (m1 == CCGOCmode && m2 == CCGCmode))
15684 gcc_unreachable ();
15714 /* These are only compatible with themselves, which we already
15721 /* Return a comparison we can do and that it is equivalent to
15722 swap_condition (code) apart possibly from orderedness.
15723 But, never change orderedness if TARGET_IEEE_FP, returning
15724 UNKNOWN in that case if necessary. */
15726 static enum rtx_code
15727 ix86_fp_swap_condition (enum rtx_code code)
15731 case GT: /* GTU - CF=0 & ZF=0 */
15732 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
15733 case GE: /* GEU - CF=0 */
15734 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
15735 case UNLT: /* LTU - CF=1 */
15736 return TARGET_IEEE_FP ? UNKNOWN : GT;
15737 case UNLE: /* LEU - CF=1 | ZF=1 */
15738 return TARGET_IEEE_FP ? UNKNOWN : GE;
15740 return swap_condition (code);
15744 /* Return cost of comparison CODE using the best strategy for performance.
15745 All following functions do use number of instructions as a cost metrics.
15746 In future this should be tweaked to compute bytes for optimize_size and
15747 take into account performance of various instructions on various CPUs. */
15750 ix86_fp_comparison_cost (enum rtx_code code)
15754 /* The cost of code using bit-twiddling on %ah. */
15771 arith_cost = TARGET_IEEE_FP ? 5 : 4;
15775 arith_cost = TARGET_IEEE_FP ? 6 : 4;
15778 gcc_unreachable ();
15781 switch (ix86_fp_comparison_strategy (code))
15783 case IX86_FPCMP_COMI:
15784 return arith_cost > 4 ? 3 : 2;
15785 case IX86_FPCMP_SAHF:
15786 return arith_cost > 4 ? 4 : 3;
15792 /* Return strategy to use for floating-point. We assume that fcomi is always
15793 preferrable where available, since that is also true when looking at size
15794 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15796 enum ix86_fpcmp_strategy
15797 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
15799 /* Do fcomi/sahf based test when profitable. */
15802 return IX86_FPCMP_COMI;
15804 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
15805 return IX86_FPCMP_SAHF;
15807 return IX86_FPCMP_ARITH;
15810 /* Swap, force into registers, or otherwise massage the two operands
15811 to a fp comparison. The operands are updated in place; the new
15812 comparison code is returned. */
15814 static enum rtx_code
15815 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
15817 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
15818 rtx op0 = *pop0, op1 = *pop1;
15819 enum machine_mode op_mode = GET_MODE (op0);
15820 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
15822 /* All of the unordered compare instructions only work on registers.
15823 The same is true of the fcomi compare instructions. The XFmode
15824 compare instructions require registers except when comparing
15825 against zero or when converting operand 1 from fixed point to
15829 && (fpcmp_mode == CCFPUmode
15830 || (op_mode == XFmode
15831 && ! (standard_80387_constant_p (op0) == 1
15832 || standard_80387_constant_p (op1) == 1)
15833 && GET_CODE (op1) != FLOAT)
15834 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
15836 op0 = force_reg (op_mode, op0);
15837 op1 = force_reg (op_mode, op1);
15841 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
15842 things around if they appear profitable, otherwise force op0
15843 into a register. */
15845 if (standard_80387_constant_p (op0) == 0
15847 && ! (standard_80387_constant_p (op1) == 0
15850 enum rtx_code new_code = ix86_fp_swap_condition (code);
15851 if (new_code != UNKNOWN)
15854 tmp = op0, op0 = op1, op1 = tmp;
15860 op0 = force_reg (op_mode, op0);
15862 if (CONSTANT_P (op1))
15864 int tmp = standard_80387_constant_p (op1);
15866 op1 = validize_mem (force_const_mem (op_mode, op1));
15870 op1 = force_reg (op_mode, op1);
15873 op1 = force_reg (op_mode, op1);
15877 /* Try to rearrange the comparison to make it cheaper. */
15878 if (ix86_fp_comparison_cost (code)
15879 > ix86_fp_comparison_cost (swap_condition (code))
15880 && (REG_P (op1) || can_create_pseudo_p ()))
15883 tmp = op0, op0 = op1, op1 = tmp;
15884 code = swap_condition (code);
15886 op0 = force_reg (op_mode, op0);
15894 /* Convert comparison codes we use to represent FP comparison to integer
15895 code that will result in proper branch. Return UNKNOWN if no such code
15899 ix86_fp_compare_code_to_integer (enum rtx_code code)
15928 /* Generate insn patterns to do a floating point compare of OPERANDS. */
15931 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
15933 enum machine_mode fpcmp_mode, intcmp_mode;
15936 fpcmp_mode = ix86_fp_compare_mode (code);
15937 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
15939 /* Do fcomi/sahf based test when profitable. */
15940 switch (ix86_fp_comparison_strategy (code))
15942 case IX86_FPCMP_COMI:
15943 intcmp_mode = fpcmp_mode;
15944 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15945 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15950 case IX86_FPCMP_SAHF:
15951 intcmp_mode = fpcmp_mode;
15952 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15953 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15957 scratch = gen_reg_rtx (HImode);
15958 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
15959 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
15962 case IX86_FPCMP_ARITH:
15963 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
15964 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15965 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
15967 scratch = gen_reg_rtx (HImode);
15968 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
15970 /* In the unordered case, we have to check C2 for NaN's, which
15971 doesn't happen to work out to anything nice combination-wise.
15972 So do some bit twiddling on the value we've got in AH to come
15973 up with an appropriate set of condition codes. */
15975 intcmp_mode = CCNOmode;
15980 if (code == GT || !TARGET_IEEE_FP)
15982 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15987 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15988 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15989 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
15990 intcmp_mode = CCmode;
15996 if (code == LT && TARGET_IEEE_FP)
15998 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15999 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16000 intcmp_mode = CCmode;
16005 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16011 if (code == GE || !TARGET_IEEE_FP)
16013 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16018 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16019 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16025 if (code == LE && TARGET_IEEE_FP)
16027 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16028 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16029 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16030 intcmp_mode = CCmode;
16035 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16041 if (code == EQ && TARGET_IEEE_FP)
16043 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16044 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16045 intcmp_mode = CCmode;
16050 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16056 if (code == NE && TARGET_IEEE_FP)
16058 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16059 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16065 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16071 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16075 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16080 gcc_unreachable ();
16088 /* Return the test that should be put into the flags user, i.e.
16089 the bcc, scc, or cmov instruction. */
16090 return gen_rtx_fmt_ee (code, VOIDmode,
16091 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16096 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16100 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16101 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16103 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16105 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16106 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16109 ret = ix86_expand_int_compare (code, op0, op1);
16115 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16119 switch (GET_MODE (op0))
16128 tmp = ix86_expand_compare (code, op0, op1);
16129 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16130 gen_rtx_LABEL_REF (VOIDmode, label),
16132 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16139 /* Expand DImode branch into multiple compare+branch. */
16141 rtx lo[2], hi[2], label2;
16142 enum rtx_code code1, code2, code3;
16143 enum machine_mode submode;
16145 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16147 tmp = op0, op0 = op1, op1 = tmp;
16148 code = swap_condition (code);
16150 if (GET_MODE (op0) == DImode)
16152 split_di (&op0, 1, lo+0, hi+0);
16153 split_di (&op1, 1, lo+1, hi+1);
16158 split_ti (&op0, 1, lo+0, hi+0);
16159 split_ti (&op1, 1, lo+1, hi+1);
16163 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16164 avoid two branches. This costs one extra insn, so disable when
16165 optimizing for size. */
16167 if ((code == EQ || code == NE)
16168 && (!optimize_insn_for_size_p ()
16169 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16174 if (hi[1] != const0_rtx)
16175 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16176 NULL_RTX, 0, OPTAB_WIDEN);
16179 if (lo[1] != const0_rtx)
16180 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16181 NULL_RTX, 0, OPTAB_WIDEN);
16183 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16184 NULL_RTX, 0, OPTAB_WIDEN);
16186 ix86_expand_branch (code, tmp, const0_rtx, label);
16190 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16191 op1 is a constant and the low word is zero, then we can just
16192 examine the high word. Similarly for low word -1 and
16193 less-or-equal-than or greater-than. */
16195 if (CONST_INT_P (hi[1]))
16198 case LT: case LTU: case GE: case GEU:
16199 if (lo[1] == const0_rtx)
16201 ix86_expand_branch (code, hi[0], hi[1], label);
16205 case LE: case LEU: case GT: case GTU:
16206 if (lo[1] == constm1_rtx)
16208 ix86_expand_branch (code, hi[0], hi[1], label);
16216 /* Otherwise, we need two or three jumps. */
16218 label2 = gen_label_rtx ();
16221 code2 = swap_condition (code);
16222 code3 = unsigned_condition (code);
16226 case LT: case GT: case LTU: case GTU:
16229 case LE: code1 = LT; code2 = GT; break;
16230 case GE: code1 = GT; code2 = LT; break;
16231 case LEU: code1 = LTU; code2 = GTU; break;
16232 case GEU: code1 = GTU; code2 = LTU; break;
16234 case EQ: code1 = UNKNOWN; code2 = NE; break;
16235 case NE: code2 = UNKNOWN; break;
16238 gcc_unreachable ();
16243 * if (hi(a) < hi(b)) goto true;
16244 * if (hi(a) > hi(b)) goto false;
16245 * if (lo(a) < lo(b)) goto true;
16249 if (code1 != UNKNOWN)
16250 ix86_expand_branch (code1, hi[0], hi[1], label);
16251 if (code2 != UNKNOWN)
16252 ix86_expand_branch (code2, hi[0], hi[1], label2);
16254 ix86_expand_branch (code3, lo[0], lo[1], label);
16256 if (code2 != UNKNOWN)
16257 emit_label (label2);
16262 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16267 /* Split branch based on floating point condition. */
16269 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16270 rtx target1, rtx target2, rtx tmp, rtx pushed)
16275 if (target2 != pc_rtx)
16278 code = reverse_condition_maybe_unordered (code);
16283 condition = ix86_expand_fp_compare (code, op1, op2,
16286 /* Remove pushed operand from stack. */
16288 ix86_free_from_memory (GET_MODE (pushed));
16290 i = emit_jump_insn (gen_rtx_SET
16292 gen_rtx_IF_THEN_ELSE (VOIDmode,
16293 condition, target1, target2)));
16294 if (split_branch_probability >= 0)
16295 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16299 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16303 gcc_assert (GET_MODE (dest) == QImode);
16305 ret = ix86_expand_compare (code, op0, op1);
16306 PUT_MODE (ret, QImode);
16307 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16310 /* Expand comparison setting or clearing carry flag. Return true when
16311 successful and set pop for the operation. */
16313 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16315 enum machine_mode mode =
16316 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16318 /* Do not handle DImode compares that go through special path. */
16319 if (mode == (TARGET_64BIT ? TImode : DImode))
16322 if (SCALAR_FLOAT_MODE_P (mode))
16324 rtx compare_op, compare_seq;
16326 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16328 /* Shortcut: following common codes never translate
16329 into carry flag compares. */
16330 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16331 || code == ORDERED || code == UNORDERED)
16334 /* These comparisons require zero flag; swap operands so they won't. */
16335 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16336 && !TARGET_IEEE_FP)
16341 code = swap_condition (code);
16344 /* Try to expand the comparison and verify that we end up with
16345 carry flag based comparison. This fails to be true only when
16346 we decide to expand comparison using arithmetic that is not
16347 too common scenario. */
16349 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16350 compare_seq = get_insns ();
16353 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16354 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16355 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16357 code = GET_CODE (compare_op);
16359 if (code != LTU && code != GEU)
16362 emit_insn (compare_seq);
16367 if (!INTEGRAL_MODE_P (mode))
16376 /* Convert a==0 into (unsigned)a<1. */
16379 if (op1 != const0_rtx)
16382 code = (code == EQ ? LTU : GEU);
16385 /* Convert a>b into b<a or a>=b-1. */
16388 if (CONST_INT_P (op1))
16390 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16391 /* Bail out on overflow. We still can swap operands but that
16392 would force loading of the constant into register. */
16393 if (op1 == const0_rtx
16394 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16396 code = (code == GTU ? GEU : LTU);
16403 code = (code == GTU ? LTU : GEU);
16407 /* Convert a>=0 into (unsigned)a<0x80000000. */
16410 if (mode == DImode || op1 != const0_rtx)
16412 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16413 code = (code == LT ? GEU : LTU);
16417 if (mode == DImode || op1 != constm1_rtx)
16419 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16420 code = (code == LE ? GEU : LTU);
16426 /* Swapping operands may cause constant to appear as first operand. */
16427 if (!nonimmediate_operand (op0, VOIDmode))
16429 if (!can_create_pseudo_p ())
16431 op0 = force_reg (mode, op0);
16433 *pop = ix86_expand_compare (code, op0, op1);
16434 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16439 ix86_expand_int_movcc (rtx operands[])
16441 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16442 rtx compare_seq, compare_op;
16443 enum machine_mode mode = GET_MODE (operands[0]);
16444 bool sign_bit_compare_p = false;
16445 rtx op0 = XEXP (operands[1], 0);
16446 rtx op1 = XEXP (operands[1], 1);
16449 compare_op = ix86_expand_compare (code, op0, op1);
16450 compare_seq = get_insns ();
16453 compare_code = GET_CODE (compare_op);
16455 if ((op1 == const0_rtx && (code == GE || code == LT))
16456 || (op1 == constm1_rtx && (code == GT || code == LE)))
16457 sign_bit_compare_p = true;
16459 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16460 HImode insns, we'd be swallowed in word prefix ops. */
16462 if ((mode != HImode || TARGET_FAST_PREFIX)
16463 && (mode != (TARGET_64BIT ? TImode : DImode))
16464 && CONST_INT_P (operands[2])
16465 && CONST_INT_P (operands[3]))
16467 rtx out = operands[0];
16468 HOST_WIDE_INT ct = INTVAL (operands[2]);
16469 HOST_WIDE_INT cf = INTVAL (operands[3]);
16470 HOST_WIDE_INT diff;
16473 /* Sign bit compares are better done using shifts than we do by using
16475 if (sign_bit_compare_p
16476 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
16478 /* Detect overlap between destination and compare sources. */
16481 if (!sign_bit_compare_p)
16484 bool fpcmp = false;
16486 compare_code = GET_CODE (compare_op);
16488 flags = XEXP (compare_op, 0);
16490 if (GET_MODE (flags) == CCFPmode
16491 || GET_MODE (flags) == CCFPUmode)
16495 = ix86_fp_compare_code_to_integer (compare_code);
16498 /* To simplify rest of code, restrict to the GEU case. */
16499 if (compare_code == LTU)
16501 HOST_WIDE_INT tmp = ct;
16504 compare_code = reverse_condition (compare_code);
16505 code = reverse_condition (code);
16510 PUT_CODE (compare_op,
16511 reverse_condition_maybe_unordered
16512 (GET_CODE (compare_op)));
16514 PUT_CODE (compare_op,
16515 reverse_condition (GET_CODE (compare_op)));
16519 if (reg_overlap_mentioned_p (out, op0)
16520 || reg_overlap_mentioned_p (out, op1))
16521 tmp = gen_reg_rtx (mode);
16523 if (mode == DImode)
16524 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
16526 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
16527 flags, compare_op));
16531 if (code == GT || code == GE)
16532 code = reverse_condition (code);
16535 HOST_WIDE_INT tmp = ct;
16540 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
16553 tmp = expand_simple_binop (mode, PLUS,
16555 copy_rtx (tmp), 1, OPTAB_DIRECT);
16566 tmp = expand_simple_binop (mode, IOR,
16568 copy_rtx (tmp), 1, OPTAB_DIRECT);
16570 else if (diff == -1 && ct)
16580 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16582 tmp = expand_simple_binop (mode, PLUS,
16583 copy_rtx (tmp), GEN_INT (cf),
16584 copy_rtx (tmp), 1, OPTAB_DIRECT);
16592 * andl cf - ct, dest
16602 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16605 tmp = expand_simple_binop (mode, AND,
16607 gen_int_mode (cf - ct, mode),
16608 copy_rtx (tmp), 1, OPTAB_DIRECT);
16610 tmp = expand_simple_binop (mode, PLUS,
16611 copy_rtx (tmp), GEN_INT (ct),
16612 copy_rtx (tmp), 1, OPTAB_DIRECT);
16615 if (!rtx_equal_p (tmp, out))
16616 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
16618 return 1; /* DONE */
16623 enum machine_mode cmp_mode = GET_MODE (op0);
16626 tmp = ct, ct = cf, cf = tmp;
16629 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16631 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16633 /* We may be reversing unordered compare to normal compare, that
16634 is not valid in general (we may convert non-trapping condition
16635 to trapping one), however on i386 we currently emit all
16636 comparisons unordered. */
16637 compare_code = reverse_condition_maybe_unordered (compare_code);
16638 code = reverse_condition_maybe_unordered (code);
16642 compare_code = reverse_condition (compare_code);
16643 code = reverse_condition (code);
16647 compare_code = UNKNOWN;
16648 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
16649 && CONST_INT_P (op1))
16651 if (op1 == const0_rtx
16652 && (code == LT || code == GE))
16653 compare_code = code;
16654 else if (op1 == constm1_rtx)
16658 else if (code == GT)
16663 /* Optimize dest = (op0 < 0) ? -1 : cf. */
16664 if (compare_code != UNKNOWN
16665 && GET_MODE (op0) == GET_MODE (out)
16666 && (cf == -1 || ct == -1))
16668 /* If lea code below could be used, only optimize
16669 if it results in a 2 insn sequence. */
16671 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
16672 || diff == 3 || diff == 5 || diff == 9)
16673 || (compare_code == LT && ct == -1)
16674 || (compare_code == GE && cf == -1))
16677 * notl op1 (if necessary)
16685 code = reverse_condition (code);
16688 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16690 out = expand_simple_binop (mode, IOR,
16692 out, 1, OPTAB_DIRECT);
16693 if (out != operands[0])
16694 emit_move_insn (operands[0], out);
16696 return 1; /* DONE */
16701 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
16702 || diff == 3 || diff == 5 || diff == 9)
16703 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
16705 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
16711 * lea cf(dest*(ct-cf)),dest
16715 * This also catches the degenerate setcc-only case.
16721 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16724 /* On x86_64 the lea instruction operates on Pmode, so we need
16725 to get arithmetics done in proper mode to match. */
16727 tmp = copy_rtx (out);
16731 out1 = copy_rtx (out);
16732 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
16736 tmp = gen_rtx_PLUS (mode, tmp, out1);
16742 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
16745 if (!rtx_equal_p (tmp, out))
16748 out = force_operand (tmp, copy_rtx (out));
16750 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
16752 if (!rtx_equal_p (out, operands[0]))
16753 emit_move_insn (operands[0], copy_rtx (out));
16755 return 1; /* DONE */
16759 * General case: Jumpful:
16760 * xorl dest,dest cmpl op1, op2
16761 * cmpl op1, op2 movl ct, dest
16762 * setcc dest jcc 1f
16763 * decl dest movl cf, dest
16764 * andl (cf-ct),dest 1:
16767 * Size 20. Size 14.
16769 * This is reasonably steep, but branch mispredict costs are
16770 * high on modern cpus, so consider failing only if optimizing
16774 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16775 && BRANCH_COST (optimize_insn_for_speed_p (),
16780 enum machine_mode cmp_mode = GET_MODE (op0);
16785 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16787 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16789 /* We may be reversing unordered compare to normal compare,
16790 that is not valid in general (we may convert non-trapping
16791 condition to trapping one), however on i386 we currently
16792 emit all comparisons unordered. */
16793 code = reverse_condition_maybe_unordered (code);
16797 code = reverse_condition (code);
16798 if (compare_code != UNKNOWN)
16799 compare_code = reverse_condition (compare_code);
16803 if (compare_code != UNKNOWN)
16805 /* notl op1 (if needed)
16810 For x < 0 (resp. x <= -1) there will be no notl,
16811 so if possible swap the constants to get rid of the
16813 True/false will be -1/0 while code below (store flag
16814 followed by decrement) is 0/-1, so the constants need
16815 to be exchanged once more. */
16817 if (compare_code == GE || !cf)
16819 code = reverse_condition (code);
16824 HOST_WIDE_INT tmp = cf;
16829 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16833 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16835 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
16837 copy_rtx (out), 1, OPTAB_DIRECT);
16840 out = expand_simple_binop (mode, AND, copy_rtx (out),
16841 gen_int_mode (cf - ct, mode),
16842 copy_rtx (out), 1, OPTAB_DIRECT);
16844 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
16845 copy_rtx (out), 1, OPTAB_DIRECT);
16846 if (!rtx_equal_p (out, operands[0]))
16847 emit_move_insn (operands[0], copy_rtx (out));
16849 return 1; /* DONE */
16853 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16855 /* Try a few things more with specific constants and a variable. */
16858 rtx var, orig_out, out, tmp;
16860 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
16861 return 0; /* FAIL */
16863 /* If one of the two operands is an interesting constant, load a
16864 constant with the above and mask it in with a logical operation. */
16866 if (CONST_INT_P (operands[2]))
16869 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
16870 operands[3] = constm1_rtx, op = and_optab;
16871 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
16872 operands[3] = const0_rtx, op = ior_optab;
16874 return 0; /* FAIL */
16876 else if (CONST_INT_P (operands[3]))
16879 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
16880 operands[2] = constm1_rtx, op = and_optab;
16881 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
16882 operands[2] = const0_rtx, op = ior_optab;
16884 return 0; /* FAIL */
16887 return 0; /* FAIL */
16889 orig_out = operands[0];
16890 tmp = gen_reg_rtx (mode);
16893 /* Recurse to get the constant loaded. */
16894 if (ix86_expand_int_movcc (operands) == 0)
16895 return 0; /* FAIL */
16897 /* Mask in the interesting variable. */
16898 out = expand_binop (mode, op, var, tmp, orig_out, 0,
16900 if (!rtx_equal_p (out, orig_out))
16901 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
16903 return 1; /* DONE */
16907 * For comparison with above,
16917 if (! nonimmediate_operand (operands[2], mode))
16918 operands[2] = force_reg (mode, operands[2]);
16919 if (! nonimmediate_operand (operands[3], mode))
16920 operands[3] = force_reg (mode, operands[3]);
16922 if (! register_operand (operands[2], VOIDmode)
16924 || ! register_operand (operands[3], VOIDmode)))
16925 operands[2] = force_reg (mode, operands[2]);
16928 && ! register_operand (operands[3], VOIDmode))
16929 operands[3] = force_reg (mode, operands[3]);
16931 emit_insn (compare_seq);
16932 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16933 gen_rtx_IF_THEN_ELSE (mode,
16934 compare_op, operands[2],
16937 return 1; /* DONE */
16940 /* Swap, force into registers, or otherwise massage the two operands
16941 to an sse comparison with a mask result. Thus we differ a bit from
16942 ix86_prepare_fp_compare_args which expects to produce a flags result.
16944 The DEST operand exists to help determine whether to commute commutative
16945 operators. The POP0/POP1 operands are updated in place. The new
16946 comparison code is returned, or UNKNOWN if not implementable. */
16948 static enum rtx_code
16949 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
16950 rtx *pop0, rtx *pop1)
16958 /* We have no LTGT as an operator. We could implement it with
16959 NE & ORDERED, but this requires an extra temporary. It's
16960 not clear that it's worth it. */
16967 /* These are supported directly. */
16974 /* For commutative operators, try to canonicalize the destination
16975 operand to be first in the comparison - this helps reload to
16976 avoid extra moves. */
16977 if (!dest || !rtx_equal_p (dest, *pop1))
16985 /* These are not supported directly. Swap the comparison operands
16986 to transform into something that is supported. */
16990 code = swap_condition (code);
16994 gcc_unreachable ();
17000 /* Detect conditional moves that exactly match min/max operational
17001 semantics. Note that this is IEEE safe, as long as we don't
17002 interchange the operands.
17004 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17005 and TRUE if the operation is successful and instructions are emitted. */
17008 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17009 rtx cmp_op1, rtx if_true, rtx if_false)
17011 enum machine_mode mode;
17017 else if (code == UNGE)
17020 if_true = if_false;
17026 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17028 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17033 mode = GET_MODE (dest);
17035 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17036 but MODE may be a vector mode and thus not appropriate. */
17037 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17039 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17042 if_true = force_reg (mode, if_true);
17043 v = gen_rtvec (2, if_true, if_false);
17044 tmp = gen_rtx_UNSPEC (mode, v, u);
17048 code = is_min ? SMIN : SMAX;
17049 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17052 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17056 /* Expand an sse vector comparison. Return the register with the result. */
17059 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17060 rtx op_true, rtx op_false)
17062 enum machine_mode mode = GET_MODE (dest);
17065 cmp_op0 = force_reg (mode, cmp_op0);
17066 if (!nonimmediate_operand (cmp_op1, mode))
17067 cmp_op1 = force_reg (mode, cmp_op1);
17070 || reg_overlap_mentioned_p (dest, op_true)
17071 || reg_overlap_mentioned_p (dest, op_false))
17072 dest = gen_reg_rtx (mode);
17074 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17075 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17080 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17081 operations. This is used for both scalar and vector conditional moves. */
17084 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17086 enum machine_mode mode = GET_MODE (dest);
17089 if (op_false == CONST0_RTX (mode))
17091 op_true = force_reg (mode, op_true);
17092 x = gen_rtx_AND (mode, cmp, op_true);
17093 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17095 else if (op_true == CONST0_RTX (mode))
17097 op_false = force_reg (mode, op_false);
17098 x = gen_rtx_NOT (mode, cmp);
17099 x = gen_rtx_AND (mode, x, op_false);
17100 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17102 else if (TARGET_XOP)
17104 rtx pcmov = gen_rtx_SET (mode, dest,
17105 gen_rtx_IF_THEN_ELSE (mode, cmp,
17112 op_true = force_reg (mode, op_true);
17113 op_false = force_reg (mode, op_false);
17115 t2 = gen_reg_rtx (mode);
17117 t3 = gen_reg_rtx (mode);
17121 x = gen_rtx_AND (mode, op_true, cmp);
17122 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17124 x = gen_rtx_NOT (mode, cmp);
17125 x = gen_rtx_AND (mode, x, op_false);
17126 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17128 x = gen_rtx_IOR (mode, t3, t2);
17129 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17133 /* Expand a floating-point conditional move. Return true if successful. */
17136 ix86_expand_fp_movcc (rtx operands[])
17138 enum machine_mode mode = GET_MODE (operands[0]);
17139 enum rtx_code code = GET_CODE (operands[1]);
17140 rtx tmp, compare_op;
17141 rtx op0 = XEXP (operands[1], 0);
17142 rtx op1 = XEXP (operands[1], 1);
17144 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17146 enum machine_mode cmode;
17148 /* Since we've no cmove for sse registers, don't force bad register
17149 allocation just to gain access to it. Deny movcc when the
17150 comparison mode doesn't match the move mode. */
17151 cmode = GET_MODE (op0);
17152 if (cmode == VOIDmode)
17153 cmode = GET_MODE (op1);
17157 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17158 if (code == UNKNOWN)
17161 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17162 operands[2], operands[3]))
17165 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17166 operands[2], operands[3]);
17167 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17171 /* The floating point conditional move instructions don't directly
17172 support conditions resulting from a signed integer comparison. */
17174 compare_op = ix86_expand_compare (code, op0, op1);
17175 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17177 tmp = gen_reg_rtx (QImode);
17178 ix86_expand_setcc (tmp, code, op0, op1);
17180 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17183 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17184 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17185 operands[2], operands[3])));
17190 /* Expand a floating-point vector conditional move; a vcond operation
17191 rather than a movcc operation. */
17194 ix86_expand_fp_vcond (rtx operands[])
17196 enum rtx_code code = GET_CODE (operands[3]);
17199 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17200 &operands[4], &operands[5]);
17201 if (code == UNKNOWN)
17204 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17205 operands[5], operands[1], operands[2]))
17208 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17209 operands[1], operands[2]);
17210 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17214 /* Expand a signed/unsigned integral vector conditional move. */
17217 ix86_expand_int_vcond (rtx operands[])
17219 enum machine_mode mode = GET_MODE (operands[0]);
17220 enum rtx_code code = GET_CODE (operands[3]);
17221 bool negate = false;
17224 cop0 = operands[4];
17225 cop1 = operands[5];
17227 /* XOP supports all of the comparisons on all vector int types. */
17230 /* Canonicalize the comparison to EQ, GT, GTU. */
17241 code = reverse_condition (code);
17247 code = reverse_condition (code);
17253 code = swap_condition (code);
17254 x = cop0, cop0 = cop1, cop1 = x;
17258 gcc_unreachable ();
17261 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17262 if (mode == V2DImode)
17267 /* SSE4.1 supports EQ. */
17268 if (!TARGET_SSE4_1)
17274 /* SSE4.2 supports GT/GTU. */
17275 if (!TARGET_SSE4_2)
17280 gcc_unreachable ();
17284 /* Unsigned parallel compare is not supported by the hardware.
17285 Play some tricks to turn this into a signed comparison
17289 cop0 = force_reg (mode, cop0);
17297 rtx (*gen_sub3) (rtx, rtx, rtx);
17299 /* Subtract (-(INT MAX) - 1) from both operands to make
17301 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17303 gen_sub3 = (mode == V4SImode
17304 ? gen_subv4si3 : gen_subv2di3);
17305 t1 = gen_reg_rtx (mode);
17306 emit_insn (gen_sub3 (t1, cop0, mask));
17308 t2 = gen_reg_rtx (mode);
17309 emit_insn (gen_sub3 (t2, cop1, mask));
17319 /* Perform a parallel unsigned saturating subtraction. */
17320 x = gen_reg_rtx (mode);
17321 emit_insn (gen_rtx_SET (VOIDmode, x,
17322 gen_rtx_US_MINUS (mode, cop0, cop1)));
17325 cop1 = CONST0_RTX (mode);
17331 gcc_unreachable ();
17336 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17337 operands[1+negate], operands[2-negate]);
17339 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17340 operands[2-negate]);
17344 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17345 true if we should do zero extension, else sign extension. HIGH_P is
17346 true if we want the N/2 high elements, else the low elements. */
17349 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17351 enum machine_mode imode = GET_MODE (operands[1]);
17352 rtx (*unpack)(rtx, rtx, rtx);
17359 unpack = gen_vec_interleave_highv16qi;
17361 unpack = gen_vec_interleave_lowv16qi;
17365 unpack = gen_vec_interleave_highv8hi;
17367 unpack = gen_vec_interleave_lowv8hi;
17371 unpack = gen_vec_interleave_highv4si;
17373 unpack = gen_vec_interleave_lowv4si;
17376 gcc_unreachable ();
17379 dest = gen_lowpart (imode, operands[0]);
17382 se = force_reg (imode, CONST0_RTX (imode));
17384 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17385 operands[1], pc_rtx, pc_rtx);
17387 emit_insn (unpack (dest, operands[1], se));
17390 /* This function performs the same task as ix86_expand_sse_unpack,
17391 but with SSE4.1 instructions. */
17394 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17396 enum machine_mode imode = GET_MODE (operands[1]);
17397 rtx (*unpack)(rtx, rtx);
17404 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17406 unpack = gen_sse4_1_extendv8qiv8hi2;
17410 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17412 unpack = gen_sse4_1_extendv4hiv4si2;
17416 unpack = gen_sse4_1_zero_extendv2siv2di2;
17418 unpack = gen_sse4_1_extendv2siv2di2;
17421 gcc_unreachable ();
17424 dest = operands[0];
17427 /* Shift higher 8 bytes to lower 8 bytes. */
17428 src = gen_reg_rtx (imode);
17429 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17430 gen_lowpart (V1TImode, operands[1]),
17436 emit_insn (unpack (dest, src));
17439 /* Expand conditional increment or decrement using adb/sbb instructions.
17440 The default case using setcc followed by the conditional move can be
17441 done by generic code. */
17443 ix86_expand_int_addcc (rtx operands[])
17445 enum rtx_code code = GET_CODE (operands[1]);
17447 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17449 rtx val = const0_rtx;
17450 bool fpcmp = false;
17451 enum machine_mode mode;
17452 rtx op0 = XEXP (operands[1], 0);
17453 rtx op1 = XEXP (operands[1], 1);
17455 if (operands[3] != const1_rtx
17456 && operands[3] != constm1_rtx)
17458 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17460 code = GET_CODE (compare_op);
17462 flags = XEXP (compare_op, 0);
17464 if (GET_MODE (flags) == CCFPmode
17465 || GET_MODE (flags) == CCFPUmode)
17468 code = ix86_fp_compare_code_to_integer (code);
17475 PUT_CODE (compare_op,
17476 reverse_condition_maybe_unordered
17477 (GET_CODE (compare_op)));
17479 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
17482 mode = GET_MODE (operands[0]);
17484 /* Construct either adc or sbb insn. */
17485 if ((code == LTU) == (operands[3] == constm1_rtx))
17490 insn = gen_subqi3_carry;
17493 insn = gen_subhi3_carry;
17496 insn = gen_subsi3_carry;
17499 insn = gen_subdi3_carry;
17502 gcc_unreachable ();
17510 insn = gen_addqi3_carry;
17513 insn = gen_addhi3_carry;
17516 insn = gen_addsi3_carry;
17519 insn = gen_adddi3_carry;
17522 gcc_unreachable ();
17525 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
17527 return 1; /* DONE */
17531 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
17532 works for floating pointer parameters and nonoffsetable memories.
17533 For pushes, it returns just stack offsets; the values will be saved
17534 in the right order. Maximally three parts are generated. */
17537 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
17542 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
17544 size = (GET_MODE_SIZE (mode) + 4) / 8;
17546 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
17547 gcc_assert (size >= 2 && size <= 4);
17549 /* Optimize constant pool reference to immediates. This is used by fp
17550 moves, that force all constants to memory to allow combining. */
17551 if (MEM_P (operand) && MEM_READONLY_P (operand))
17553 rtx tmp = maybe_get_pool_constant (operand);
17558 if (MEM_P (operand) && !offsettable_memref_p (operand))
17560 /* The only non-offsetable memories we handle are pushes. */
17561 int ok = push_operand (operand, VOIDmode);
17565 operand = copy_rtx (operand);
17566 PUT_MODE (operand, Pmode);
17567 parts[0] = parts[1] = parts[2] = parts[3] = operand;
17571 if (GET_CODE (operand) == CONST_VECTOR)
17573 enum machine_mode imode = int_mode_for_mode (mode);
17574 /* Caution: if we looked through a constant pool memory above,
17575 the operand may actually have a different mode now. That's
17576 ok, since we want to pun this all the way back to an integer. */
17577 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
17578 gcc_assert (operand != NULL);
17584 if (mode == DImode)
17585 split_di (&operand, 1, &parts[0], &parts[1]);
17590 if (REG_P (operand))
17592 gcc_assert (reload_completed);
17593 for (i = 0; i < size; i++)
17594 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
17596 else if (offsettable_memref_p (operand))
17598 operand = adjust_address (operand, SImode, 0);
17599 parts[0] = operand;
17600 for (i = 1; i < size; i++)
17601 parts[i] = adjust_address (operand, SImode, 4 * i);
17603 else if (GET_CODE (operand) == CONST_DOUBLE)
17608 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17612 real_to_target (l, &r, mode);
17613 parts[3] = gen_int_mode (l[3], SImode);
17614 parts[2] = gen_int_mode (l[2], SImode);
17617 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
17618 parts[2] = gen_int_mode (l[2], SImode);
17621 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
17624 gcc_unreachable ();
17626 parts[1] = gen_int_mode (l[1], SImode);
17627 parts[0] = gen_int_mode (l[0], SImode);
17630 gcc_unreachable ();
17635 if (mode == TImode)
17636 split_ti (&operand, 1, &parts[0], &parts[1]);
17637 if (mode == XFmode || mode == TFmode)
17639 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
17640 if (REG_P (operand))
17642 gcc_assert (reload_completed);
17643 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
17644 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
17646 else if (offsettable_memref_p (operand))
17648 operand = adjust_address (operand, DImode, 0);
17649 parts[0] = operand;
17650 parts[1] = adjust_address (operand, upper_mode, 8);
17652 else if (GET_CODE (operand) == CONST_DOUBLE)
17657 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17658 real_to_target (l, &r, mode);
17660 /* Do not use shift by 32 to avoid warning on 32bit systems. */
17661 if (HOST_BITS_PER_WIDE_INT >= 64)
17664 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
17665 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
17668 parts[0] = immed_double_const (l[0], l[1], DImode);
17670 if (upper_mode == SImode)
17671 parts[1] = gen_int_mode (l[2], SImode);
17672 else if (HOST_BITS_PER_WIDE_INT >= 64)
17675 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
17676 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
17679 parts[1] = immed_double_const (l[2], l[3], DImode);
17682 gcc_unreachable ();
17689 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
17690 Return false when normal moves are needed; true when all required
17691 insns have been emitted. Operands 2-4 contain the input values
17692 int the correct order; operands 5-7 contain the output values. */
17695 ix86_split_long_move (rtx operands[])
17700 int collisions = 0;
17701 enum machine_mode mode = GET_MODE (operands[0]);
17702 bool collisionparts[4];
17704 /* The DFmode expanders may ask us to move double.
17705 For 64bit target this is single move. By hiding the fact
17706 here we simplify i386.md splitters. */
17707 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
17709 /* Optimize constant pool reference to immediates. This is used by
17710 fp moves, that force all constants to memory to allow combining. */
17712 if (MEM_P (operands[1])
17713 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
17714 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
17715 operands[1] = get_pool_constant (XEXP (operands[1], 0));
17716 if (push_operand (operands[0], VOIDmode))
17718 operands[0] = copy_rtx (operands[0]);
17719 PUT_MODE (operands[0], Pmode);
17722 operands[0] = gen_lowpart (DImode, operands[0]);
17723 operands[1] = gen_lowpart (DImode, operands[1]);
17724 emit_move_insn (operands[0], operands[1]);
17728 /* The only non-offsettable memory we handle is push. */
17729 if (push_operand (operands[0], VOIDmode))
17732 gcc_assert (!MEM_P (operands[0])
17733 || offsettable_memref_p (operands[0]));
17735 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
17736 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
17738 /* When emitting push, take care for source operands on the stack. */
17739 if (push && MEM_P (operands[1])
17740 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
17742 rtx src_base = XEXP (part[1][nparts - 1], 0);
17744 /* Compensate for the stack decrement by 4. */
17745 if (!TARGET_64BIT && nparts == 3
17746 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
17747 src_base = plus_constant (src_base, 4);
17749 /* src_base refers to the stack pointer and is
17750 automatically decreased by emitted push. */
17751 for (i = 0; i < nparts; i++)
17752 part[1][i] = change_address (part[1][i],
17753 GET_MODE (part[1][i]), src_base);
17756 /* We need to do copy in the right order in case an address register
17757 of the source overlaps the destination. */
17758 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
17762 for (i = 0; i < nparts; i++)
17765 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
17766 if (collisionparts[i])
17770 /* Collision in the middle part can be handled by reordering. */
17771 if (collisions == 1 && nparts == 3 && collisionparts [1])
17773 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17774 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17776 else if (collisions == 1
17778 && (collisionparts [1] || collisionparts [2]))
17780 if (collisionparts [1])
17782 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17783 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17787 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
17788 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
17792 /* If there are more collisions, we can't handle it by reordering.
17793 Do an lea to the last part and use only one colliding move. */
17794 else if (collisions > 1)
17800 base = part[0][nparts - 1];
17802 /* Handle the case when the last part isn't valid for lea.
17803 Happens in 64-bit mode storing the 12-byte XFmode. */
17804 if (GET_MODE (base) != Pmode)
17805 base = gen_rtx_REG (Pmode, REGNO (base));
17807 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
17808 part[1][0] = replace_equiv_address (part[1][0], base);
17809 for (i = 1; i < nparts; i++)
17811 tmp = plus_constant (base, UNITS_PER_WORD * i);
17812 part[1][i] = replace_equiv_address (part[1][i], tmp);
17823 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
17824 emit_insn (gen_addsi3 (stack_pointer_rtx,
17825 stack_pointer_rtx, GEN_INT (-4)));
17826 emit_move_insn (part[0][2], part[1][2]);
17828 else if (nparts == 4)
17830 emit_move_insn (part[0][3], part[1][3]);
17831 emit_move_insn (part[0][2], part[1][2]);
17836 /* In 64bit mode we don't have 32bit push available. In case this is
17837 register, it is OK - we will just use larger counterpart. We also
17838 retype memory - these comes from attempt to avoid REX prefix on
17839 moving of second half of TFmode value. */
17840 if (GET_MODE (part[1][1]) == SImode)
17842 switch (GET_CODE (part[1][1]))
17845 part[1][1] = adjust_address (part[1][1], DImode, 0);
17849 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
17853 gcc_unreachable ();
17856 if (GET_MODE (part[1][0]) == SImode)
17857 part[1][0] = part[1][1];
17860 emit_move_insn (part[0][1], part[1][1]);
17861 emit_move_insn (part[0][0], part[1][0]);
17865 /* Choose correct order to not overwrite the source before it is copied. */
17866 if ((REG_P (part[0][0])
17867 && REG_P (part[1][1])
17868 && (REGNO (part[0][0]) == REGNO (part[1][1])
17870 && REGNO (part[0][0]) == REGNO (part[1][2]))
17872 && REGNO (part[0][0]) == REGNO (part[1][3]))))
17874 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
17876 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
17878 operands[2 + i] = part[0][j];
17879 operands[6 + i] = part[1][j];
17884 for (i = 0; i < nparts; i++)
17886 operands[2 + i] = part[0][i];
17887 operands[6 + i] = part[1][i];
17891 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
17892 if (optimize_insn_for_size_p ())
17894 for (j = 0; j < nparts - 1; j++)
17895 if (CONST_INT_P (operands[6 + j])
17896 && operands[6 + j] != const0_rtx
17897 && REG_P (operands[2 + j]))
17898 for (i = j; i < nparts - 1; i++)
17899 if (CONST_INT_P (operands[7 + i])
17900 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
17901 operands[7 + i] = operands[2 + j];
17904 for (i = 0; i < nparts; i++)
17905 emit_move_insn (operands[2 + i], operands[6 + i]);
17910 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
17911 left shift by a constant, either using a single shift or
17912 a sequence of add instructions. */
17915 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
17919 emit_insn ((mode == DImode
17921 : gen_adddi3) (operand, operand, operand));
17923 else if (!optimize_insn_for_size_p ()
17924 && count * ix86_cost->add <= ix86_cost->shift_const)
17927 for (i=0; i<count; i++)
17929 emit_insn ((mode == DImode
17931 : gen_adddi3) (operand, operand, operand));
17935 emit_insn ((mode == DImode
17937 : gen_ashldi3) (operand, operand, GEN_INT (count)));
17941 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
17943 rtx low[2], high[2];
17945 const int single_width = mode == DImode ? 32 : 64;
17947 if (CONST_INT_P (operands[2]))
17949 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17950 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17952 if (count >= single_width)
17954 emit_move_insn (high[0], low[1]);
17955 emit_move_insn (low[0], const0_rtx);
17957 if (count > single_width)
17958 ix86_expand_ashl_const (high[0], count - single_width, mode);
17962 if (!rtx_equal_p (operands[0], operands[1]))
17963 emit_move_insn (operands[0], operands[1]);
17964 emit_insn ((mode == DImode
17966 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
17967 ix86_expand_ashl_const (low[0], count, mode);
17972 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17974 if (operands[1] == const1_rtx)
17976 /* Assuming we've chosen a QImode capable registers, then 1 << N
17977 can be done with two 32/64-bit shifts, no branches, no cmoves. */
17978 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17980 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17982 ix86_expand_clear (low[0]);
17983 ix86_expand_clear (high[0]);
17984 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
17986 d = gen_lowpart (QImode, low[0]);
17987 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17988 s = gen_rtx_EQ (QImode, flags, const0_rtx);
17989 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17991 d = gen_lowpart (QImode, high[0]);
17992 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17993 s = gen_rtx_NE (QImode, flags, const0_rtx);
17994 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17997 /* Otherwise, we can get the same results by manually performing
17998 a bit extract operation on bit 5/6, and then performing the two
17999 shifts. The two methods of getting 0/1 into low/high are exactly
18000 the same size. Avoiding the shift in the bit extract case helps
18001 pentium4 a bit; no one else seems to care much either way. */
18006 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18007 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
18009 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
18010 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18012 emit_insn ((mode == DImode
18014 : gen_lshrdi3) (high[0], high[0],
18015 GEN_INT (mode == DImode ? 5 : 6)));
18016 emit_insn ((mode == DImode
18018 : gen_anddi3) (high[0], high[0], const1_rtx));
18019 emit_move_insn (low[0], high[0]);
18020 emit_insn ((mode == DImode
18022 : gen_xordi3) (low[0], low[0], const1_rtx));
18025 emit_insn ((mode == DImode
18027 : gen_ashldi3) (low[0], low[0], operands[2]));
18028 emit_insn ((mode == DImode
18030 : gen_ashldi3) (high[0], high[0], operands[2]));
18034 if (operands[1] == constm1_rtx)
18036 /* For -1 << N, we can avoid the shld instruction, because we
18037 know that we're shifting 0...31/63 ones into a -1. */
18038 emit_move_insn (low[0], constm1_rtx);
18039 if (optimize_insn_for_size_p ())
18040 emit_move_insn (high[0], low[0]);
18042 emit_move_insn (high[0], constm1_rtx);
18046 if (!rtx_equal_p (operands[0], operands[1]))
18047 emit_move_insn (operands[0], operands[1]);
18049 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18050 emit_insn ((mode == DImode
18052 : gen_x86_64_shld) (high[0], low[0], operands[2]));
18055 emit_insn ((mode == DImode
18057 : gen_ashldi3) (low[0], low[0], operands[2]));
18059 if (TARGET_CMOVE && scratch)
18061 ix86_expand_clear (scratch);
18062 emit_insn ((mode == DImode
18063 ? gen_x86_shiftsi_adj_1
18064 : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
18068 emit_insn ((mode == DImode
18069 ? gen_x86_shiftsi_adj_2
18070 : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
18074 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18076 rtx low[2], high[2];
18078 const int single_width = mode == DImode ? 32 : 64;
18080 if (CONST_INT_P (operands[2]))
18082 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18083 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18085 if (count == single_width * 2 - 1)
18087 emit_move_insn (high[0], high[1]);
18088 emit_insn ((mode == DImode
18090 : gen_ashrdi3) (high[0], high[0],
18091 GEN_INT (single_width - 1)));
18092 emit_move_insn (low[0], high[0]);
18095 else if (count >= single_width)
18097 emit_move_insn (low[0], high[1]);
18098 emit_move_insn (high[0], low[0]);
18099 emit_insn ((mode == DImode
18101 : gen_ashrdi3) (high[0], high[0],
18102 GEN_INT (single_width - 1)));
18103 if (count > single_width)
18104 emit_insn ((mode == DImode
18106 : gen_ashrdi3) (low[0], low[0],
18107 GEN_INT (count - single_width)));
18111 if (!rtx_equal_p (operands[0], operands[1]))
18112 emit_move_insn (operands[0], operands[1]);
18113 emit_insn ((mode == DImode
18115 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18116 emit_insn ((mode == DImode
18118 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
18123 if (!rtx_equal_p (operands[0], operands[1]))
18124 emit_move_insn (operands[0], operands[1]);
18126 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18128 emit_insn ((mode == DImode
18130 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18131 emit_insn ((mode == DImode
18133 : gen_ashrdi3) (high[0], high[0], operands[2]));
18135 if (TARGET_CMOVE && scratch)
18137 emit_move_insn (scratch, high[0]);
18138 emit_insn ((mode == DImode
18140 : gen_ashrdi3) (scratch, scratch,
18141 GEN_INT (single_width - 1)));
18142 emit_insn ((mode == DImode
18143 ? gen_x86_shiftsi_adj_1
18144 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18148 emit_insn ((mode == DImode
18149 ? gen_x86_shiftsi_adj_3
18150 : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
18155 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18157 rtx low[2], high[2];
18159 const int single_width = mode == DImode ? 32 : 64;
18161 if (CONST_INT_P (operands[2]))
18163 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18164 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18166 if (count >= single_width)
18168 emit_move_insn (low[0], high[1]);
18169 ix86_expand_clear (high[0]);
18171 if (count > single_width)
18172 emit_insn ((mode == DImode
18174 : gen_lshrdi3) (low[0], low[0],
18175 GEN_INT (count - single_width)));
18179 if (!rtx_equal_p (operands[0], operands[1]))
18180 emit_move_insn (operands[0], operands[1]);
18181 emit_insn ((mode == DImode
18183 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18184 emit_insn ((mode == DImode
18186 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
18191 if (!rtx_equal_p (operands[0], operands[1]))
18192 emit_move_insn (operands[0], operands[1]);
18194 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18196 emit_insn ((mode == DImode
18198 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18199 emit_insn ((mode == DImode
18201 : gen_lshrdi3) (high[0], high[0], operands[2]));
18203 /* Heh. By reversing the arguments, we can reuse this pattern. */
18204 if (TARGET_CMOVE && scratch)
18206 ix86_expand_clear (scratch);
18207 emit_insn ((mode == DImode
18208 ? gen_x86_shiftsi_adj_1
18209 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18213 emit_insn ((mode == DImode
18214 ? gen_x86_shiftsi_adj_2
18215 : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
18219 /* Predict just emitted jump instruction to be taken with probability PROB. */
18221 predict_jump (int prob)
18223 rtx insn = get_last_insn ();
18224 gcc_assert (JUMP_P (insn));
18225 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18228 /* Helper function for the string operations below. Dest VARIABLE whether
18229 it is aligned to VALUE bytes. If true, jump to the label. */
18231 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18233 rtx label = gen_label_rtx ();
18234 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18235 if (GET_MODE (variable) == DImode)
18236 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18238 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18239 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18242 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18244 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18248 /* Adjust COUNTER by the VALUE. */
18250 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18252 if (GET_MODE (countreg) == DImode)
18253 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
18255 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
18258 /* Zero extend possibly SImode EXP to Pmode register. */
18260 ix86_zero_extend_to_Pmode (rtx exp)
18263 if (GET_MODE (exp) == VOIDmode)
18264 return force_reg (Pmode, exp);
18265 if (GET_MODE (exp) == Pmode)
18266 return copy_to_mode_reg (Pmode, exp);
18267 r = gen_reg_rtx (Pmode);
18268 emit_insn (gen_zero_extendsidi2 (r, exp));
18272 /* Divide COUNTREG by SCALE. */
18274 scale_counter (rtx countreg, int scale)
18280 if (CONST_INT_P (countreg))
18281 return GEN_INT (INTVAL (countreg) / scale);
18282 gcc_assert (REG_P (countreg));
18284 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18285 GEN_INT (exact_log2 (scale)),
18286 NULL, 1, OPTAB_DIRECT);
18290 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18291 DImode for constant loop counts. */
18293 static enum machine_mode
18294 counter_mode (rtx count_exp)
18296 if (GET_MODE (count_exp) != VOIDmode)
18297 return GET_MODE (count_exp);
18298 if (!CONST_INT_P (count_exp))
18300 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18305 /* When SRCPTR is non-NULL, output simple loop to move memory
18306 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18307 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18308 equivalent loop to set memory by VALUE (supposed to be in MODE).
18310 The size is rounded down to whole number of chunk size moved at once.
18311 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18315 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18316 rtx destptr, rtx srcptr, rtx value,
18317 rtx count, enum machine_mode mode, int unroll,
18320 rtx out_label, top_label, iter, tmp;
18321 enum machine_mode iter_mode = counter_mode (count);
18322 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18323 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18329 top_label = gen_label_rtx ();
18330 out_label = gen_label_rtx ();
18331 iter = gen_reg_rtx (iter_mode);
18333 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18334 NULL, 1, OPTAB_DIRECT);
18335 /* Those two should combine. */
18336 if (piece_size == const1_rtx)
18338 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18340 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18342 emit_move_insn (iter, const0_rtx);
18344 emit_label (top_label);
18346 tmp = convert_modes (Pmode, iter_mode, iter, true);
18347 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18348 destmem = change_address (destmem, mode, x_addr);
18352 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18353 srcmem = change_address (srcmem, mode, y_addr);
18355 /* When unrolling for chips that reorder memory reads and writes,
18356 we can save registers by using single temporary.
18357 Also using 4 temporaries is overkill in 32bit mode. */
18358 if (!TARGET_64BIT && 0)
18360 for (i = 0; i < unroll; i++)
18365 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18367 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18369 emit_move_insn (destmem, srcmem);
18375 gcc_assert (unroll <= 4);
18376 for (i = 0; i < unroll; i++)
18378 tmpreg[i] = gen_reg_rtx (mode);
18382 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18384 emit_move_insn (tmpreg[i], srcmem);
18386 for (i = 0; i < unroll; i++)
18391 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18393 emit_move_insn (destmem, tmpreg[i]);
18398 for (i = 0; i < unroll; i++)
18402 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18403 emit_move_insn (destmem, value);
18406 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18407 true, OPTAB_LIB_WIDEN);
18409 emit_move_insn (iter, tmp);
18411 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18413 if (expected_size != -1)
18415 expected_size /= GET_MODE_SIZE (mode) * unroll;
18416 if (expected_size == 0)
18418 else if (expected_size > REG_BR_PROB_BASE)
18419 predict_jump (REG_BR_PROB_BASE - 1);
18421 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18424 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18425 iter = ix86_zero_extend_to_Pmode (iter);
18426 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18427 true, OPTAB_LIB_WIDEN);
18428 if (tmp != destptr)
18429 emit_move_insn (destptr, tmp);
18432 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18433 true, OPTAB_LIB_WIDEN);
18435 emit_move_insn (srcptr, tmp);
18437 emit_label (out_label);
18440 /* Output "rep; mov" instruction.
18441 Arguments have same meaning as for previous function */
18443 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18444 rtx destptr, rtx srcptr,
18446 enum machine_mode mode)
18452 /* If the size is known, it is shorter to use rep movs. */
18453 if (mode == QImode && CONST_INT_P (count)
18454 && !(INTVAL (count) & 3))
18457 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18458 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18459 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
18460 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
18461 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18462 if (mode != QImode)
18464 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18465 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18466 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18467 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
18468 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18469 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
18473 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18474 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
18476 if (CONST_INT_P (count))
18478 count = GEN_INT (INTVAL (count)
18479 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18480 destmem = shallow_copy_rtx (destmem);
18481 srcmem = shallow_copy_rtx (srcmem);
18482 set_mem_size (destmem, count);
18483 set_mem_size (srcmem, count);
18487 if (MEM_SIZE (destmem))
18488 set_mem_size (destmem, NULL_RTX);
18489 if (MEM_SIZE (srcmem))
18490 set_mem_size (srcmem, NULL_RTX);
18492 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
18496 /* Output "rep; stos" instruction.
18497 Arguments have same meaning as for previous function */
18499 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
18500 rtx count, enum machine_mode mode,
18506 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18507 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18508 value = force_reg (mode, gen_lowpart (mode, value));
18509 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18510 if (mode != QImode)
18512 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18513 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18514 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18517 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18518 if (orig_value == const0_rtx && CONST_INT_P (count))
18520 count = GEN_INT (INTVAL (count)
18521 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18522 destmem = shallow_copy_rtx (destmem);
18523 set_mem_size (destmem, count);
18525 else if (MEM_SIZE (destmem))
18526 set_mem_size (destmem, NULL_RTX);
18527 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
18531 emit_strmov (rtx destmem, rtx srcmem,
18532 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
18534 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
18535 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
18536 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18539 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
18541 expand_movmem_epilogue (rtx destmem, rtx srcmem,
18542 rtx destptr, rtx srcptr, rtx count, int max_size)
18545 if (CONST_INT_P (count))
18547 HOST_WIDE_INT countval = INTVAL (count);
18550 if ((countval & 0x10) && max_size > 16)
18554 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18555 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
18558 gcc_unreachable ();
18561 if ((countval & 0x08) && max_size > 8)
18564 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18567 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18568 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
18572 if ((countval & 0x04) && max_size > 4)
18574 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18577 if ((countval & 0x02) && max_size > 2)
18579 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
18582 if ((countval & 0x01) && max_size > 1)
18584 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
18591 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
18592 count, 1, OPTAB_DIRECT);
18593 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
18594 count, QImode, 1, 4);
18598 /* When there are stringops, we can cheaply increase dest and src pointers.
18599 Otherwise we save code size by maintaining offset (zero is readily
18600 available from preceding rep operation) and using x86 addressing modes.
18602 if (TARGET_SINGLE_STRINGOP)
18606 rtx label = ix86_expand_aligntest (count, 4, true);
18607 src = change_address (srcmem, SImode, srcptr);
18608 dest = change_address (destmem, SImode, destptr);
18609 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18610 emit_label (label);
18611 LABEL_NUSES (label) = 1;
18615 rtx label = ix86_expand_aligntest (count, 2, true);
18616 src = change_address (srcmem, HImode, srcptr);
18617 dest = change_address (destmem, HImode, destptr);
18618 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18619 emit_label (label);
18620 LABEL_NUSES (label) = 1;
18624 rtx label = ix86_expand_aligntest (count, 1, true);
18625 src = change_address (srcmem, QImode, srcptr);
18626 dest = change_address (destmem, QImode, destptr);
18627 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18628 emit_label (label);
18629 LABEL_NUSES (label) = 1;
18634 rtx offset = force_reg (Pmode, const0_rtx);
18639 rtx label = ix86_expand_aligntest (count, 4, true);
18640 src = change_address (srcmem, SImode, srcptr);
18641 dest = change_address (destmem, SImode, destptr);
18642 emit_move_insn (dest, src);
18643 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
18644 true, OPTAB_LIB_WIDEN);
18646 emit_move_insn (offset, tmp);
18647 emit_label (label);
18648 LABEL_NUSES (label) = 1;
18652 rtx label = ix86_expand_aligntest (count, 2, true);
18653 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18654 src = change_address (srcmem, HImode, tmp);
18655 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18656 dest = change_address (destmem, HImode, tmp);
18657 emit_move_insn (dest, src);
18658 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
18659 true, OPTAB_LIB_WIDEN);
18661 emit_move_insn (offset, tmp);
18662 emit_label (label);
18663 LABEL_NUSES (label) = 1;
18667 rtx label = ix86_expand_aligntest (count, 1, true);
18668 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18669 src = change_address (srcmem, QImode, tmp);
18670 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18671 dest = change_address (destmem, QImode, tmp);
18672 emit_move_insn (dest, src);
18673 emit_label (label);
18674 LABEL_NUSES (label) = 1;
18679 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18681 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
18682 rtx count, int max_size)
18685 expand_simple_binop (counter_mode (count), AND, count,
18686 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
18687 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
18688 gen_lowpart (QImode, value), count, QImode,
18692 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18694 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
18698 if (CONST_INT_P (count))
18700 HOST_WIDE_INT countval = INTVAL (count);
18703 if ((countval & 0x10) && max_size > 16)
18707 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18708 emit_insn (gen_strset (destptr, dest, value));
18709 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
18710 emit_insn (gen_strset (destptr, dest, value));
18713 gcc_unreachable ();
18716 if ((countval & 0x08) && max_size > 8)
18720 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18721 emit_insn (gen_strset (destptr, dest, value));
18725 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18726 emit_insn (gen_strset (destptr, dest, value));
18727 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
18728 emit_insn (gen_strset (destptr, dest, value));
18732 if ((countval & 0x04) && max_size > 4)
18734 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18735 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18738 if ((countval & 0x02) && max_size > 2)
18740 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
18741 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18744 if ((countval & 0x01) && max_size > 1)
18746 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
18747 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18754 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
18759 rtx label = ix86_expand_aligntest (count, 16, true);
18762 dest = change_address (destmem, DImode, destptr);
18763 emit_insn (gen_strset (destptr, dest, value));
18764 emit_insn (gen_strset (destptr, dest, value));
18768 dest = change_address (destmem, SImode, destptr);
18769 emit_insn (gen_strset (destptr, dest, value));
18770 emit_insn (gen_strset (destptr, dest, value));
18771 emit_insn (gen_strset (destptr, dest, value));
18772 emit_insn (gen_strset (destptr, dest, value));
18774 emit_label (label);
18775 LABEL_NUSES (label) = 1;
18779 rtx label = ix86_expand_aligntest (count, 8, true);
18782 dest = change_address (destmem, DImode, destptr);
18783 emit_insn (gen_strset (destptr, dest, value));
18787 dest = change_address (destmem, SImode, destptr);
18788 emit_insn (gen_strset (destptr, dest, value));
18789 emit_insn (gen_strset (destptr, dest, value));
18791 emit_label (label);
18792 LABEL_NUSES (label) = 1;
18796 rtx label = ix86_expand_aligntest (count, 4, true);
18797 dest = change_address (destmem, SImode, destptr);
18798 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18799 emit_label (label);
18800 LABEL_NUSES (label) = 1;
18804 rtx label = ix86_expand_aligntest (count, 2, true);
18805 dest = change_address (destmem, HImode, destptr);
18806 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18807 emit_label (label);
18808 LABEL_NUSES (label) = 1;
18812 rtx label = ix86_expand_aligntest (count, 1, true);
18813 dest = change_address (destmem, QImode, destptr);
18814 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18815 emit_label (label);
18816 LABEL_NUSES (label) = 1;
18820 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
18821 DESIRED_ALIGNMENT. */
18823 expand_movmem_prologue (rtx destmem, rtx srcmem,
18824 rtx destptr, rtx srcptr, rtx count,
18825 int align, int desired_alignment)
18827 if (align <= 1 && desired_alignment > 1)
18829 rtx label = ix86_expand_aligntest (destptr, 1, false);
18830 srcmem = change_address (srcmem, QImode, srcptr);
18831 destmem = change_address (destmem, QImode, destptr);
18832 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18833 ix86_adjust_counter (count, 1);
18834 emit_label (label);
18835 LABEL_NUSES (label) = 1;
18837 if (align <= 2 && desired_alignment > 2)
18839 rtx label = ix86_expand_aligntest (destptr, 2, false);
18840 srcmem = change_address (srcmem, HImode, srcptr);
18841 destmem = change_address (destmem, HImode, destptr);
18842 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18843 ix86_adjust_counter (count, 2);
18844 emit_label (label);
18845 LABEL_NUSES (label) = 1;
18847 if (align <= 4 && desired_alignment > 4)
18849 rtx label = ix86_expand_aligntest (destptr, 4, false);
18850 srcmem = change_address (srcmem, SImode, srcptr);
18851 destmem = change_address (destmem, SImode, destptr);
18852 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18853 ix86_adjust_counter (count, 4);
18854 emit_label (label);
18855 LABEL_NUSES (label) = 1;
18857 gcc_assert (desired_alignment <= 8);
18860 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
18861 ALIGN_BYTES is how many bytes need to be copied. */
18863 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
18864 int desired_align, int align_bytes)
18867 rtx src_size, dst_size;
18869 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
18870 if (src_align_bytes >= 0)
18871 src_align_bytes = desired_align - src_align_bytes;
18872 src_size = MEM_SIZE (src);
18873 dst_size = MEM_SIZE (dst);
18874 if (align_bytes & 1)
18876 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18877 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
18879 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18881 if (align_bytes & 2)
18883 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18884 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
18885 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18886 set_mem_align (dst, 2 * BITS_PER_UNIT);
18887 if (src_align_bytes >= 0
18888 && (src_align_bytes & 1) == (align_bytes & 1)
18889 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
18890 set_mem_align (src, 2 * BITS_PER_UNIT);
18892 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18894 if (align_bytes & 4)
18896 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18897 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
18898 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18899 set_mem_align (dst, 4 * BITS_PER_UNIT);
18900 if (src_align_bytes >= 0)
18902 unsigned int src_align = 0;
18903 if ((src_align_bytes & 3) == (align_bytes & 3))
18905 else if ((src_align_bytes & 1) == (align_bytes & 1))
18907 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18908 set_mem_align (src, src_align * BITS_PER_UNIT);
18911 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18913 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18914 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
18915 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18916 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18917 if (src_align_bytes >= 0)
18919 unsigned int src_align = 0;
18920 if ((src_align_bytes & 7) == (align_bytes & 7))
18922 else if ((src_align_bytes & 3) == (align_bytes & 3))
18924 else if ((src_align_bytes & 1) == (align_bytes & 1))
18926 if (src_align > (unsigned int) desired_align)
18927 src_align = desired_align;
18928 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18929 set_mem_align (src, src_align * BITS_PER_UNIT);
18932 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18934 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
18939 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
18940 DESIRED_ALIGNMENT. */
18942 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
18943 int align, int desired_alignment)
18945 if (align <= 1 && desired_alignment > 1)
18947 rtx label = ix86_expand_aligntest (destptr, 1, false);
18948 destmem = change_address (destmem, QImode, destptr);
18949 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
18950 ix86_adjust_counter (count, 1);
18951 emit_label (label);
18952 LABEL_NUSES (label) = 1;
18954 if (align <= 2 && desired_alignment > 2)
18956 rtx label = ix86_expand_aligntest (destptr, 2, false);
18957 destmem = change_address (destmem, HImode, destptr);
18958 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
18959 ix86_adjust_counter (count, 2);
18960 emit_label (label);
18961 LABEL_NUSES (label) = 1;
18963 if (align <= 4 && desired_alignment > 4)
18965 rtx label = ix86_expand_aligntest (destptr, 4, false);
18966 destmem = change_address (destmem, SImode, destptr);
18967 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
18968 ix86_adjust_counter (count, 4);
18969 emit_label (label);
18970 LABEL_NUSES (label) = 1;
18972 gcc_assert (desired_alignment <= 8);
18975 /* Set enough from DST to align DST known to by aligned by ALIGN to
18976 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
18978 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
18979 int desired_align, int align_bytes)
18982 rtx dst_size = MEM_SIZE (dst);
18983 if (align_bytes & 1)
18985 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18987 emit_insn (gen_strset (destreg, dst,
18988 gen_lowpart (QImode, value)));
18990 if (align_bytes & 2)
18992 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18993 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18994 set_mem_align (dst, 2 * BITS_PER_UNIT);
18996 emit_insn (gen_strset (destreg, dst,
18997 gen_lowpart (HImode, value)));
18999 if (align_bytes & 4)
19001 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19002 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19003 set_mem_align (dst, 4 * BITS_PER_UNIT);
19005 emit_insn (gen_strset (destreg, dst,
19006 gen_lowpart (SImode, value)));
19008 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19009 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19010 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19012 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19016 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19017 static enum stringop_alg
19018 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19019 int *dynamic_check)
19021 const struct stringop_algs * algs;
19022 bool optimize_for_speed;
19023 /* Algorithms using the rep prefix want at least edi and ecx;
19024 additionally, memset wants eax and memcpy wants esi. Don't
19025 consider such algorithms if the user has appropriated those
19026 registers for their own purposes. */
19027 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19029 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19031 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19032 || (alg != rep_prefix_1_byte \
19033 && alg != rep_prefix_4_byte \
19034 && alg != rep_prefix_8_byte))
19035 const struct processor_costs *cost;
19037 /* Even if the string operation call is cold, we still might spend a lot
19038 of time processing large blocks. */
19039 if (optimize_function_for_size_p (cfun)
19040 || (optimize_insn_for_size_p ()
19041 && expected_size != -1 && expected_size < 256))
19042 optimize_for_speed = false;
19044 optimize_for_speed = true;
19046 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19048 *dynamic_check = -1;
19050 algs = &cost->memset[TARGET_64BIT != 0];
19052 algs = &cost->memcpy[TARGET_64BIT != 0];
19053 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19054 return stringop_alg;
19055 /* rep; movq or rep; movl is the smallest variant. */
19056 else if (!optimize_for_speed)
19058 if (!count || (count & 3))
19059 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19061 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19063 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19065 else if (expected_size != -1 && expected_size < 4)
19066 return loop_1_byte;
19067 else if (expected_size != -1)
19070 enum stringop_alg alg = libcall;
19071 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
19073 /* We get here if the algorithms that were not libcall-based
19074 were rep-prefix based and we are unable to use rep prefixes
19075 based on global register usage. Break out of the loop and
19076 use the heuristic below. */
19077 if (algs->size[i].max == 0)
19079 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19081 enum stringop_alg candidate = algs->size[i].alg;
19083 if (candidate != libcall && ALG_USABLE_P (candidate))
19085 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19086 last non-libcall inline algorithm. */
19087 if (TARGET_INLINE_ALL_STRINGOPS)
19089 /* When the current size is best to be copied by a libcall,
19090 but we are still forced to inline, run the heuristic below
19091 that will pick code for medium sized blocks. */
19092 if (alg != libcall)
19096 else if (ALG_USABLE_P (candidate))
19100 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19102 /* When asked to inline the call anyway, try to pick meaningful choice.
19103 We look for maximal size of block that is faster to copy by hand and
19104 take blocks of at most of that size guessing that average size will
19105 be roughly half of the block.
19107 If this turns out to be bad, we might simply specify the preferred
19108 choice in ix86_costs. */
19109 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19110 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19113 enum stringop_alg alg;
19115 bool any_alg_usable_p = true;
19117 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
19119 enum stringop_alg candidate = algs->size[i].alg;
19120 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19122 if (candidate != libcall && candidate
19123 && ALG_USABLE_P (candidate))
19124 max = algs->size[i].max;
19126 /* If there aren't any usable algorithms, then recursing on
19127 smaller sizes isn't going to find anything. Just return the
19128 simple byte-at-a-time copy loop. */
19129 if (!any_alg_usable_p)
19131 /* Pick something reasonable. */
19132 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19133 *dynamic_check = 128;
19134 return loop_1_byte;
19138 alg = decide_alg (count, max / 2, memset, dynamic_check);
19139 gcc_assert (*dynamic_check == -1);
19140 gcc_assert (alg != libcall);
19141 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19142 *dynamic_check = max;
19145 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19146 #undef ALG_USABLE_P
19149 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19150 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19152 decide_alignment (int align,
19153 enum stringop_alg alg,
19156 int desired_align = 0;
19160 gcc_unreachable ();
19162 case unrolled_loop:
19163 desired_align = GET_MODE_SIZE (Pmode);
19165 case rep_prefix_8_byte:
19168 case rep_prefix_4_byte:
19169 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19170 copying whole cacheline at once. */
19171 if (TARGET_PENTIUMPRO)
19176 case rep_prefix_1_byte:
19177 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19178 copying whole cacheline at once. */
19179 if (TARGET_PENTIUMPRO)
19193 if (desired_align < align)
19194 desired_align = align;
19195 if (expected_size != -1 && expected_size < 4)
19196 desired_align = align;
19197 return desired_align;
19200 /* Return the smallest power of 2 greater than VAL. */
19202 smallest_pow2_greater_than (int val)
19210 /* Expand string move (memcpy) operation. Use i386 string operations when
19211 profitable. expand_setmem contains similar code. The code depends upon
19212 architecture, block size and alignment, but always has the same
19215 1) Prologue guard: Conditional that jumps up to epilogues for small
19216 blocks that can be handled by epilogue alone. This is faster but
19217 also needed for correctness, since prologue assume the block is larger
19218 than the desired alignment.
19220 Optional dynamic check for size and libcall for large
19221 blocks is emitted here too, with -minline-stringops-dynamically.
19223 2) Prologue: copy first few bytes in order to get destination aligned
19224 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19225 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19226 We emit either a jump tree on power of two sized blocks, or a byte loop.
19228 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19229 with specified algorithm.
19231 4) Epilogue: code copying tail of the block that is too small to be
19232 handled by main body (or up to size guarded by prologue guard). */
19235 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19236 rtx expected_align_exp, rtx expected_size_exp)
19242 rtx jump_around_label = NULL;
19243 HOST_WIDE_INT align = 1;
19244 unsigned HOST_WIDE_INT count = 0;
19245 HOST_WIDE_INT expected_size = -1;
19246 int size_needed = 0, epilogue_size_needed;
19247 int desired_align = 0, align_bytes = 0;
19248 enum stringop_alg alg;
19250 bool need_zero_guard = false;
19252 if (CONST_INT_P (align_exp))
19253 align = INTVAL (align_exp);
19254 /* i386 can do misaligned access on reasonably increased cost. */
19255 if (CONST_INT_P (expected_align_exp)
19256 && INTVAL (expected_align_exp) > align)
19257 align = INTVAL (expected_align_exp);
19258 /* ALIGN is the minimum of destination and source alignment, but we care here
19259 just about destination alignment. */
19260 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19261 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19263 if (CONST_INT_P (count_exp))
19264 count = expected_size = INTVAL (count_exp);
19265 if (CONST_INT_P (expected_size_exp) && count == 0)
19266 expected_size = INTVAL (expected_size_exp);
19268 /* Make sure we don't need to care about overflow later on. */
19269 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19272 /* Step 0: Decide on preferred algorithm, desired alignment and
19273 size of chunks to be copied by main loop. */
19275 alg = decide_alg (count, expected_size, false, &dynamic_check);
19276 desired_align = decide_alignment (align, alg, expected_size);
19278 if (!TARGET_ALIGN_STRINGOPS)
19279 align = desired_align;
19281 if (alg == libcall)
19283 gcc_assert (alg != no_stringop);
19285 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19286 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19287 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19292 gcc_unreachable ();
19294 need_zero_guard = true;
19295 size_needed = GET_MODE_SIZE (Pmode);
19297 case unrolled_loop:
19298 need_zero_guard = true;
19299 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19301 case rep_prefix_8_byte:
19304 case rep_prefix_4_byte:
19307 case rep_prefix_1_byte:
19311 need_zero_guard = true;
19316 epilogue_size_needed = size_needed;
19318 /* Step 1: Prologue guard. */
19320 /* Alignment code needs count to be in register. */
19321 if (CONST_INT_P (count_exp) && desired_align > align)
19323 if (INTVAL (count_exp) > desired_align
19324 && INTVAL (count_exp) > size_needed)
19327 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19328 if (align_bytes <= 0)
19331 align_bytes = desired_align - align_bytes;
19333 if (align_bytes == 0)
19334 count_exp = force_reg (counter_mode (count_exp), count_exp);
19336 gcc_assert (desired_align >= 1 && align >= 1);
19338 /* Ensure that alignment prologue won't copy past end of block. */
19339 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19341 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19342 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19343 Make sure it is power of 2. */
19344 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19348 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19350 /* If main algorithm works on QImode, no epilogue is needed.
19351 For small sizes just don't align anything. */
19352 if (size_needed == 1)
19353 desired_align = align;
19360 label = gen_label_rtx ();
19361 emit_cmp_and_jump_insns (count_exp,
19362 GEN_INT (epilogue_size_needed),
19363 LTU, 0, counter_mode (count_exp), 1, label);
19364 if (expected_size == -1 || expected_size < epilogue_size_needed)
19365 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19367 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19371 /* Emit code to decide on runtime whether library call or inline should be
19373 if (dynamic_check != -1)
19375 if (CONST_INT_P (count_exp))
19377 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19379 emit_block_move_via_libcall (dst, src, count_exp, false);
19380 count_exp = const0_rtx;
19386 rtx hot_label = gen_label_rtx ();
19387 jump_around_label = gen_label_rtx ();
19388 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19389 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19390 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19391 emit_block_move_via_libcall (dst, src, count_exp, false);
19392 emit_jump (jump_around_label);
19393 emit_label (hot_label);
19397 /* Step 2: Alignment prologue. */
19399 if (desired_align > align)
19401 if (align_bytes == 0)
19403 /* Except for the first move in epilogue, we no longer know
19404 constant offset in aliasing info. It don't seems to worth
19405 the pain to maintain it for the first move, so throw away
19407 src = change_address (src, BLKmode, srcreg);
19408 dst = change_address (dst, BLKmode, destreg);
19409 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19414 /* If we know how many bytes need to be stored before dst is
19415 sufficiently aligned, maintain aliasing info accurately. */
19416 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19417 desired_align, align_bytes);
19418 count_exp = plus_constant (count_exp, -align_bytes);
19419 count -= align_bytes;
19421 if (need_zero_guard
19422 && (count < (unsigned HOST_WIDE_INT) size_needed
19423 || (align_bytes == 0
19424 && count < ((unsigned HOST_WIDE_INT) size_needed
19425 + desired_align - align))))
19427 /* It is possible that we copied enough so the main loop will not
19429 gcc_assert (size_needed > 1);
19430 if (label == NULL_RTX)
19431 label = gen_label_rtx ();
19432 emit_cmp_and_jump_insns (count_exp,
19433 GEN_INT (size_needed),
19434 LTU, 0, counter_mode (count_exp), 1, label);
19435 if (expected_size == -1
19436 || expected_size < (desired_align - align) / 2 + size_needed)
19437 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19439 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19442 if (label && size_needed == 1)
19444 emit_label (label);
19445 LABEL_NUSES (label) = 1;
19447 epilogue_size_needed = 1;
19449 else if (label == NULL_RTX)
19450 epilogue_size_needed = size_needed;
19452 /* Step 3: Main loop. */
19458 gcc_unreachable ();
19460 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19461 count_exp, QImode, 1, expected_size);
19464 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19465 count_exp, Pmode, 1, expected_size);
19467 case unrolled_loop:
19468 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
19469 registers for 4 temporaries anyway. */
19470 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19471 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
19474 case rep_prefix_8_byte:
19475 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19478 case rep_prefix_4_byte:
19479 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19482 case rep_prefix_1_byte:
19483 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19487 /* Adjust properly the offset of src and dest memory for aliasing. */
19488 if (CONST_INT_P (count_exp))
19490 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
19491 (count / size_needed) * size_needed);
19492 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19493 (count / size_needed) * size_needed);
19497 src = change_address (src, BLKmode, srcreg);
19498 dst = change_address (dst, BLKmode, destreg);
19501 /* Step 4: Epilogue to copy the remaining bytes. */
19505 /* When the main loop is done, COUNT_EXP might hold original count,
19506 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19507 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19508 bytes. Compensate if needed. */
19510 if (size_needed < epilogue_size_needed)
19513 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19514 GEN_INT (size_needed - 1), count_exp, 1,
19516 if (tmp != count_exp)
19517 emit_move_insn (count_exp, tmp);
19519 emit_label (label);
19520 LABEL_NUSES (label) = 1;
19523 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19524 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
19525 epilogue_size_needed);
19526 if (jump_around_label)
19527 emit_label (jump_around_label);
19531 /* Helper function for memcpy. For QImode value 0xXY produce
19532 0xXYXYXYXY of wide specified by MODE. This is essentially
19533 a * 0x10101010, but we can do slightly better than
19534 synth_mult by unwinding the sequence by hand on CPUs with
19537 promote_duplicated_reg (enum machine_mode mode, rtx val)
19539 enum machine_mode valmode = GET_MODE (val);
19541 int nops = mode == DImode ? 3 : 2;
19543 gcc_assert (mode == SImode || mode == DImode);
19544 if (val == const0_rtx)
19545 return copy_to_mode_reg (mode, const0_rtx);
19546 if (CONST_INT_P (val))
19548 HOST_WIDE_INT v = INTVAL (val) & 255;
19552 if (mode == DImode)
19553 v |= (v << 16) << 16;
19554 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
19557 if (valmode == VOIDmode)
19559 if (valmode != QImode)
19560 val = gen_lowpart (QImode, val);
19561 if (mode == QImode)
19563 if (!TARGET_PARTIAL_REG_STALL)
19565 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
19566 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
19567 <= (ix86_cost->shift_const + ix86_cost->add) * nops
19568 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
19570 rtx reg = convert_modes (mode, QImode, val, true);
19571 tmp = promote_duplicated_reg (mode, const1_rtx);
19572 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
19577 rtx reg = convert_modes (mode, QImode, val, true);
19579 if (!TARGET_PARTIAL_REG_STALL)
19580 if (mode == SImode)
19581 emit_insn (gen_movsi_insv_1 (reg, reg));
19583 emit_insn (gen_movdi_insv_1 (reg, reg));
19586 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
19587 NULL, 1, OPTAB_DIRECT);
19589 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19591 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
19592 NULL, 1, OPTAB_DIRECT);
19593 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19594 if (mode == SImode)
19596 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
19597 NULL, 1, OPTAB_DIRECT);
19598 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19603 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
19604 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
19605 alignment from ALIGN to DESIRED_ALIGN. */
19607 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
19612 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
19613 promoted_val = promote_duplicated_reg (DImode, val);
19614 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
19615 promoted_val = promote_duplicated_reg (SImode, val);
19616 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
19617 promoted_val = promote_duplicated_reg (HImode, val);
19619 promoted_val = val;
19621 return promoted_val;
19624 /* Expand string clear operation (bzero). Use i386 string operations when
19625 profitable. See expand_movmem comment for explanation of individual
19626 steps performed. */
19628 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
19629 rtx expected_align_exp, rtx expected_size_exp)
19634 rtx jump_around_label = NULL;
19635 HOST_WIDE_INT align = 1;
19636 unsigned HOST_WIDE_INT count = 0;
19637 HOST_WIDE_INT expected_size = -1;
19638 int size_needed = 0, epilogue_size_needed;
19639 int desired_align = 0, align_bytes = 0;
19640 enum stringop_alg alg;
19641 rtx promoted_val = NULL;
19642 bool force_loopy_epilogue = false;
19644 bool need_zero_guard = false;
19646 if (CONST_INT_P (align_exp))
19647 align = INTVAL (align_exp);
19648 /* i386 can do misaligned access on reasonably increased cost. */
19649 if (CONST_INT_P (expected_align_exp)
19650 && INTVAL (expected_align_exp) > align)
19651 align = INTVAL (expected_align_exp);
19652 if (CONST_INT_P (count_exp))
19653 count = expected_size = INTVAL (count_exp);
19654 if (CONST_INT_P (expected_size_exp) && count == 0)
19655 expected_size = INTVAL (expected_size_exp);
19657 /* Make sure we don't need to care about overflow later on. */
19658 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19661 /* Step 0: Decide on preferred algorithm, desired alignment and
19662 size of chunks to be copied by main loop. */
19664 alg = decide_alg (count, expected_size, true, &dynamic_check);
19665 desired_align = decide_alignment (align, alg, expected_size);
19667 if (!TARGET_ALIGN_STRINGOPS)
19668 align = desired_align;
19670 if (alg == libcall)
19672 gcc_assert (alg != no_stringop);
19674 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
19675 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19680 gcc_unreachable ();
19682 need_zero_guard = true;
19683 size_needed = GET_MODE_SIZE (Pmode);
19685 case unrolled_loop:
19686 need_zero_guard = true;
19687 size_needed = GET_MODE_SIZE (Pmode) * 4;
19689 case rep_prefix_8_byte:
19692 case rep_prefix_4_byte:
19695 case rep_prefix_1_byte:
19699 need_zero_guard = true;
19703 epilogue_size_needed = size_needed;
19705 /* Step 1: Prologue guard. */
19707 /* Alignment code needs count to be in register. */
19708 if (CONST_INT_P (count_exp) && desired_align > align)
19710 if (INTVAL (count_exp) > desired_align
19711 && INTVAL (count_exp) > size_needed)
19714 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19715 if (align_bytes <= 0)
19718 align_bytes = desired_align - align_bytes;
19720 if (align_bytes == 0)
19722 enum machine_mode mode = SImode;
19723 if (TARGET_64BIT && (count & ~0xffffffff))
19725 count_exp = force_reg (mode, count_exp);
19728 /* Do the cheap promotion to allow better CSE across the
19729 main loop and epilogue (ie one load of the big constant in the
19730 front of all code. */
19731 if (CONST_INT_P (val_exp))
19732 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19733 desired_align, align);
19734 /* Ensure that alignment prologue won't copy past end of block. */
19735 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19737 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19738 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
19739 Make sure it is power of 2. */
19740 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19742 /* To improve performance of small blocks, we jump around the VAL
19743 promoting mode. This mean that if the promoted VAL is not constant,
19744 we might not use it in the epilogue and have to use byte
19746 if (epilogue_size_needed > 2 && !promoted_val)
19747 force_loopy_epilogue = true;
19750 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19752 /* If main algorithm works on QImode, no epilogue is needed.
19753 For small sizes just don't align anything. */
19754 if (size_needed == 1)
19755 desired_align = align;
19762 label = gen_label_rtx ();
19763 emit_cmp_and_jump_insns (count_exp,
19764 GEN_INT (epilogue_size_needed),
19765 LTU, 0, counter_mode (count_exp), 1, label);
19766 if (expected_size == -1 || expected_size <= epilogue_size_needed)
19767 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19769 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19772 if (dynamic_check != -1)
19774 rtx hot_label = gen_label_rtx ();
19775 jump_around_label = gen_label_rtx ();
19776 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19777 LEU, 0, counter_mode (count_exp), 1, hot_label);
19778 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19779 set_storage_via_libcall (dst, count_exp, val_exp, false);
19780 emit_jump (jump_around_label);
19781 emit_label (hot_label);
19784 /* Step 2: Alignment prologue. */
19786 /* Do the expensive promotion once we branched off the small blocks. */
19788 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19789 desired_align, align);
19790 gcc_assert (desired_align >= 1 && align >= 1);
19792 if (desired_align > align)
19794 if (align_bytes == 0)
19796 /* Except for the first move in epilogue, we no longer know
19797 constant offset in aliasing info. It don't seems to worth
19798 the pain to maintain it for the first move, so throw away
19800 dst = change_address (dst, BLKmode, destreg);
19801 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
19806 /* If we know how many bytes need to be stored before dst is
19807 sufficiently aligned, maintain aliasing info accurately. */
19808 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
19809 desired_align, align_bytes);
19810 count_exp = plus_constant (count_exp, -align_bytes);
19811 count -= align_bytes;
19813 if (need_zero_guard
19814 && (count < (unsigned HOST_WIDE_INT) size_needed
19815 || (align_bytes == 0
19816 && count < ((unsigned HOST_WIDE_INT) size_needed
19817 + desired_align - align))))
19819 /* It is possible that we copied enough so the main loop will not
19821 gcc_assert (size_needed > 1);
19822 if (label == NULL_RTX)
19823 label = gen_label_rtx ();
19824 emit_cmp_and_jump_insns (count_exp,
19825 GEN_INT (size_needed),
19826 LTU, 0, counter_mode (count_exp), 1, label);
19827 if (expected_size == -1
19828 || expected_size < (desired_align - align) / 2 + size_needed)
19829 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19831 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19834 if (label && size_needed == 1)
19836 emit_label (label);
19837 LABEL_NUSES (label) = 1;
19839 promoted_val = val_exp;
19840 epilogue_size_needed = 1;
19842 else if (label == NULL_RTX)
19843 epilogue_size_needed = size_needed;
19845 /* Step 3: Main loop. */
19851 gcc_unreachable ();
19853 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19854 count_exp, QImode, 1, expected_size);
19857 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19858 count_exp, Pmode, 1, expected_size);
19860 case unrolled_loop:
19861 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19862 count_exp, Pmode, 4, expected_size);
19864 case rep_prefix_8_byte:
19865 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19868 case rep_prefix_4_byte:
19869 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19872 case rep_prefix_1_byte:
19873 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19877 /* Adjust properly the offset of src and dest memory for aliasing. */
19878 if (CONST_INT_P (count_exp))
19879 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19880 (count / size_needed) * size_needed);
19882 dst = change_address (dst, BLKmode, destreg);
19884 /* Step 4: Epilogue to copy the remaining bytes. */
19888 /* When the main loop is done, COUNT_EXP might hold original count,
19889 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19890 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19891 bytes. Compensate if needed. */
19893 if (size_needed < epilogue_size_needed)
19896 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19897 GEN_INT (size_needed - 1), count_exp, 1,
19899 if (tmp != count_exp)
19900 emit_move_insn (count_exp, tmp);
19902 emit_label (label);
19903 LABEL_NUSES (label) = 1;
19906 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19908 if (force_loopy_epilogue)
19909 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
19910 epilogue_size_needed);
19912 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
19913 epilogue_size_needed);
19915 if (jump_around_label)
19916 emit_label (jump_around_label);
19920 /* Expand the appropriate insns for doing strlen if not just doing
19923 out = result, initialized with the start address
19924 align_rtx = alignment of the address.
19925 scratch = scratch register, initialized with the startaddress when
19926 not aligned, otherwise undefined
19928 This is just the body. It needs the initializations mentioned above and
19929 some address computing at the end. These things are done in i386.md. */
19932 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
19936 rtx align_2_label = NULL_RTX;
19937 rtx align_3_label = NULL_RTX;
19938 rtx align_4_label = gen_label_rtx ();
19939 rtx end_0_label = gen_label_rtx ();
19941 rtx tmpreg = gen_reg_rtx (SImode);
19942 rtx scratch = gen_reg_rtx (SImode);
19946 if (CONST_INT_P (align_rtx))
19947 align = INTVAL (align_rtx);
19949 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
19951 /* Is there a known alignment and is it less than 4? */
19954 rtx scratch1 = gen_reg_rtx (Pmode);
19955 emit_move_insn (scratch1, out);
19956 /* Is there a known alignment and is it not 2? */
19959 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
19960 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
19962 /* Leave just the 3 lower bits. */
19963 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
19964 NULL_RTX, 0, OPTAB_WIDEN);
19966 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19967 Pmode, 1, align_4_label);
19968 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
19969 Pmode, 1, align_2_label);
19970 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
19971 Pmode, 1, align_3_label);
19975 /* Since the alignment is 2, we have to check 2 or 0 bytes;
19976 check if is aligned to 4 - byte. */
19978 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
19979 NULL_RTX, 0, OPTAB_WIDEN);
19981 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19982 Pmode, 1, align_4_label);
19985 mem = change_address (src, QImode, out);
19987 /* Now compare the bytes. */
19989 /* Compare the first n unaligned byte on a byte per byte basis. */
19990 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
19991 QImode, 1, end_0_label);
19993 /* Increment the address. */
19994 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19996 /* Not needed with an alignment of 2 */
19999 emit_label (align_2_label);
20001 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20004 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20006 emit_label (align_3_label);
20009 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20012 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20015 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20016 align this loop. It gives only huge programs, but does not help to
20018 emit_label (align_4_label);
20020 mem = change_address (src, SImode, out);
20021 emit_move_insn (scratch, mem);
20022 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20024 /* This formula yields a nonzero result iff one of the bytes is zero.
20025 This saves three branches inside loop and many cycles. */
20027 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20028 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20029 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20030 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20031 gen_int_mode (0x80808080, SImode)));
20032 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20037 rtx reg = gen_reg_rtx (SImode);
20038 rtx reg2 = gen_reg_rtx (Pmode);
20039 emit_move_insn (reg, tmpreg);
20040 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20042 /* If zero is not in the first two bytes, move two bytes forward. */
20043 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20044 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20045 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20046 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20047 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20050 /* Emit lea manually to avoid clobbering of flags. */
20051 emit_insn (gen_rtx_SET (SImode, reg2,
20052 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20054 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20055 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20056 emit_insn (gen_rtx_SET (VOIDmode, out,
20057 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20063 rtx end_2_label = gen_label_rtx ();
20064 /* Is zero in the first two bytes? */
20066 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20067 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20068 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20069 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20070 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20072 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20073 JUMP_LABEL (tmp) = end_2_label;
20075 /* Not in the first two. Move two bytes forward. */
20076 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20077 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20079 emit_label (end_2_label);
20083 /* Avoid branch in fixing the byte. */
20084 tmpreg = gen_lowpart (QImode, tmpreg);
20085 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20086 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20087 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20088 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20090 emit_label (end_0_label);
20093 /* Expand strlen. */
20096 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20098 rtx addr, scratch1, scratch2, scratch3, scratch4;
20100 /* The generic case of strlen expander is long. Avoid it's
20101 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20103 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20104 && !TARGET_INLINE_ALL_STRINGOPS
20105 && !optimize_insn_for_size_p ()
20106 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20109 addr = force_reg (Pmode, XEXP (src, 0));
20110 scratch1 = gen_reg_rtx (Pmode);
20112 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20113 && !optimize_insn_for_size_p ())
20115 /* Well it seems that some optimizer does not combine a call like
20116 foo(strlen(bar), strlen(bar));
20117 when the move and the subtraction is done here. It does calculate
20118 the length just once when these instructions are done inside of
20119 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20120 often used and I use one fewer register for the lifetime of
20121 output_strlen_unroll() this is better. */
20123 emit_move_insn (out, addr);
20125 ix86_expand_strlensi_unroll_1 (out, src, align);
20127 /* strlensi_unroll_1 returns the address of the zero at the end of
20128 the string, like memchr(), so compute the length by subtracting
20129 the start address. */
20130 emit_insn (ix86_gen_sub3 (out, out, addr));
20136 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20137 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20140 scratch2 = gen_reg_rtx (Pmode);
20141 scratch3 = gen_reg_rtx (Pmode);
20142 scratch4 = force_reg (Pmode, constm1_rtx);
20144 emit_move_insn (scratch3, addr);
20145 eoschar = force_reg (QImode, eoschar);
20147 src = replace_equiv_address_nv (src, scratch3);
20149 /* If .md starts supporting :P, this can be done in .md. */
20150 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20151 scratch4), UNSPEC_SCAS);
20152 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20153 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20154 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20159 /* For given symbol (function) construct code to compute address of it's PLT
20160 entry in large x86-64 PIC model. */
20162 construct_plt_address (rtx symbol)
20164 rtx tmp = gen_reg_rtx (Pmode);
20165 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20167 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20168 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20170 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20171 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20176 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20178 rtx pop, int sibcall)
20180 rtx use = NULL, call;
20182 if (pop == const0_rtx)
20184 gcc_assert (!TARGET_64BIT || !pop);
20186 if (TARGET_MACHO && !TARGET_64BIT)
20189 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20190 fnaddr = machopic_indirect_call_target (fnaddr);
20195 /* Static functions and indirect calls don't need the pic register. */
20196 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20197 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20198 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20199 use_reg (&use, pic_offset_table_rtx);
20202 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20204 rtx al = gen_rtx_REG (QImode, AX_REG);
20205 emit_move_insn (al, callarg2);
20206 use_reg (&use, al);
20209 if (ix86_cmodel == CM_LARGE_PIC
20211 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20212 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20213 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20215 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20216 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20218 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20219 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20222 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20224 call = gen_rtx_SET (VOIDmode, retval, call);
20227 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20228 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20229 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20232 && ix86_cfun_abi () == MS_ABI
20233 && (!callarg2 || INTVAL (callarg2) != -2))
20235 /* We need to represent that SI and DI registers are clobbered
20237 static int clobbered_registers[] = {
20238 XMM6_REG, XMM7_REG, XMM8_REG,
20239 XMM9_REG, XMM10_REG, XMM11_REG,
20240 XMM12_REG, XMM13_REG, XMM14_REG,
20241 XMM15_REG, SI_REG, DI_REG
20244 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20245 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20246 UNSPEC_MS_TO_SYSV_CALL);
20250 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20251 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20254 (SSE_REGNO_P (clobbered_registers[i])
20256 clobbered_registers[i]));
20258 call = gen_rtx_PARALLEL (VOIDmode,
20259 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20263 call = emit_call_insn (call);
20265 CALL_INSN_FUNCTION_USAGE (call) = use;
20269 /* Clear stack slot assignments remembered from previous functions.
20270 This is called from INIT_EXPANDERS once before RTL is emitted for each
20273 static struct machine_function *
20274 ix86_init_machine_status (void)
20276 struct machine_function *f;
20278 f = ggc_alloc_cleared_machine_function ();
20279 f->use_fast_prologue_epilogue_nregs = -1;
20280 f->tls_descriptor_call_expanded_p = 0;
20281 f->call_abi = ix86_abi;
20286 /* Return a MEM corresponding to a stack slot with mode MODE.
20287 Allocate a new slot if necessary.
20289 The RTL for a function can have several slots available: N is
20290 which slot to use. */
20293 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20295 struct stack_local_entry *s;
20297 gcc_assert (n < MAX_386_STACK_LOCALS);
20299 /* Virtual slot is valid only before vregs are instantiated. */
20300 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20302 for (s = ix86_stack_locals; s; s = s->next)
20303 if (s->mode == mode && s->n == n)
20304 return copy_rtx (s->rtl);
20306 s = ggc_alloc_stack_local_entry ();
20309 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20311 s->next = ix86_stack_locals;
20312 ix86_stack_locals = s;
20316 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20318 static GTY(()) rtx ix86_tls_symbol;
20320 ix86_tls_get_addr (void)
20323 if (!ix86_tls_symbol)
20325 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20326 (TARGET_ANY_GNU_TLS
20328 ? "___tls_get_addr"
20329 : "__tls_get_addr");
20332 return ix86_tls_symbol;
20335 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20337 static GTY(()) rtx ix86_tls_module_base_symbol;
20339 ix86_tls_module_base (void)
20342 if (!ix86_tls_module_base_symbol)
20344 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20345 "_TLS_MODULE_BASE_");
20346 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20347 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20350 return ix86_tls_module_base_symbol;
20353 /* Calculate the length of the memory address in the instruction
20354 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20357 memory_address_length (rtx addr)
20359 struct ix86_address parts;
20360 rtx base, index, disp;
20364 if (GET_CODE (addr) == PRE_DEC
20365 || GET_CODE (addr) == POST_INC
20366 || GET_CODE (addr) == PRE_MODIFY
20367 || GET_CODE (addr) == POST_MODIFY)
20370 ok = ix86_decompose_address (addr, &parts);
20373 if (parts.base && GET_CODE (parts.base) == SUBREG)
20374 parts.base = SUBREG_REG (parts.base);
20375 if (parts.index && GET_CODE (parts.index) == SUBREG)
20376 parts.index = SUBREG_REG (parts.index);
20379 index = parts.index;
20384 - esp as the base always wants an index,
20385 - ebp as the base always wants a displacement,
20386 - r12 as the base always wants an index,
20387 - r13 as the base always wants a displacement. */
20389 /* Register Indirect. */
20390 if (base && !index && !disp)
20392 /* esp (for its index) and ebp (for its displacement) need
20393 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20396 && (addr == arg_pointer_rtx
20397 || addr == frame_pointer_rtx
20398 || REGNO (addr) == SP_REG
20399 || REGNO (addr) == BP_REG
20400 || REGNO (addr) == R12_REG
20401 || REGNO (addr) == R13_REG))
20405 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20406 is not disp32, but disp32(%rip), so for disp32
20407 SIB byte is needed, unless print_operand_address
20408 optimizes it into disp32(%rip) or (%rip) is implied
20410 else if (disp && !base && !index)
20417 if (GET_CODE (disp) == CONST)
20418 symbol = XEXP (disp, 0);
20419 if (GET_CODE (symbol) == PLUS
20420 && CONST_INT_P (XEXP (symbol, 1)))
20421 symbol = XEXP (symbol, 0);
20423 if (GET_CODE (symbol) != LABEL_REF
20424 && (GET_CODE (symbol) != SYMBOL_REF
20425 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20426 && (GET_CODE (symbol) != UNSPEC
20427 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20428 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20435 /* Find the length of the displacement constant. */
20438 if (base && satisfies_constraint_K (disp))
20443 /* ebp always wants a displacement. Similarly r13. */
20444 else if (base && REG_P (base)
20445 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20448 /* An index requires the two-byte modrm form.... */
20450 /* ...like esp (or r12), which always wants an index. */
20451 || base == arg_pointer_rtx
20452 || base == frame_pointer_rtx
20453 || (base && REG_P (base)
20454 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
20471 /* Compute default value for "length_immediate" attribute. When SHORTFORM
20472 is set, expect that insn have 8bit immediate alternative. */
20474 ix86_attr_length_immediate_default (rtx insn, int shortform)
20478 extract_insn_cached (insn);
20479 for (i = recog_data.n_operands - 1; i >= 0; --i)
20480 if (CONSTANT_P (recog_data.operand[i]))
20482 enum attr_mode mode = get_attr_mode (insn);
20485 if (shortform && CONST_INT_P (recog_data.operand[i]))
20487 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
20494 ival = trunc_int_for_mode (ival, HImode);
20497 ival = trunc_int_for_mode (ival, SImode);
20502 if (IN_RANGE (ival, -128, 127))
20519 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
20524 fatal_insn ("unknown insn mode", insn);
20529 /* Compute default value for "length_address" attribute. */
20531 ix86_attr_length_address_default (rtx insn)
20535 if (get_attr_type (insn) == TYPE_LEA)
20537 rtx set = PATTERN (insn), addr;
20539 if (GET_CODE (set) == PARALLEL)
20540 set = XVECEXP (set, 0, 0);
20542 gcc_assert (GET_CODE (set) == SET);
20544 addr = SET_SRC (set);
20545 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
20547 if (GET_CODE (addr) == ZERO_EXTEND)
20548 addr = XEXP (addr, 0);
20549 if (GET_CODE (addr) == SUBREG)
20550 addr = SUBREG_REG (addr);
20553 return memory_address_length (addr);
20556 extract_insn_cached (insn);
20557 for (i = recog_data.n_operands - 1; i >= 0; --i)
20558 if (MEM_P (recog_data.operand[i]))
20560 constrain_operands_cached (reload_completed);
20561 if (which_alternative != -1)
20563 const char *constraints = recog_data.constraints[i];
20564 int alt = which_alternative;
20566 while (*constraints == '=' || *constraints == '+')
20569 while (*constraints++ != ',')
20571 /* Skip ignored operands. */
20572 if (*constraints == 'X')
20575 return memory_address_length (XEXP (recog_data.operand[i], 0));
20580 /* Compute default value for "length_vex" attribute. It includes
20581 2 or 3 byte VEX prefix and 1 opcode byte. */
20584 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
20589 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
20590 byte VEX prefix. */
20591 if (!has_0f_opcode || has_vex_w)
20594 /* We can always use 2 byte VEX prefix in 32bit. */
20598 extract_insn_cached (insn);
20600 for (i = recog_data.n_operands - 1; i >= 0; --i)
20601 if (REG_P (recog_data.operand[i]))
20603 /* REX.W bit uses 3 byte VEX prefix. */
20604 if (GET_MODE (recog_data.operand[i]) == DImode
20605 && GENERAL_REG_P (recog_data.operand[i]))
20610 /* REX.X or REX.B bits use 3 byte VEX prefix. */
20611 if (MEM_P (recog_data.operand[i])
20612 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
20619 /* Return the maximum number of instructions a cpu can issue. */
20622 ix86_issue_rate (void)
20626 case PROCESSOR_PENTIUM:
20627 case PROCESSOR_ATOM:
20631 case PROCESSOR_PENTIUMPRO:
20632 case PROCESSOR_PENTIUM4:
20633 case PROCESSOR_ATHLON:
20635 case PROCESSOR_AMDFAM10:
20636 case PROCESSOR_NOCONA:
20637 case PROCESSOR_GENERIC32:
20638 case PROCESSOR_GENERIC64:
20639 case PROCESSOR_BDVER1:
20642 case PROCESSOR_CORE2:
20650 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
20651 by DEP_INSN and nothing set by DEP_INSN. */
20654 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
20658 /* Simplify the test for uninteresting insns. */
20659 if (insn_type != TYPE_SETCC
20660 && insn_type != TYPE_ICMOV
20661 && insn_type != TYPE_FCMOV
20662 && insn_type != TYPE_IBR)
20665 if ((set = single_set (dep_insn)) != 0)
20667 set = SET_DEST (set);
20670 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
20671 && XVECLEN (PATTERN (dep_insn), 0) == 2
20672 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
20673 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
20675 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20676 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20681 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
20684 /* This test is true if the dependent insn reads the flags but
20685 not any other potentially set register. */
20686 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
20689 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
20695 /* Return true iff USE_INSN has a memory address with operands set by
20699 ix86_agi_dependent (rtx set_insn, rtx use_insn)
20702 extract_insn_cached (use_insn);
20703 for (i = recog_data.n_operands - 1; i >= 0; --i)
20704 if (MEM_P (recog_data.operand[i]))
20706 rtx addr = XEXP (recog_data.operand[i], 0);
20707 return modified_in_p (addr, set_insn) != 0;
20713 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
20715 enum attr_type insn_type, dep_insn_type;
20716 enum attr_memory memory;
20718 int dep_insn_code_number;
20720 /* Anti and output dependencies have zero cost on all CPUs. */
20721 if (REG_NOTE_KIND (link) != 0)
20724 dep_insn_code_number = recog_memoized (dep_insn);
20726 /* If we can't recognize the insns, we can't really do anything. */
20727 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
20730 insn_type = get_attr_type (insn);
20731 dep_insn_type = get_attr_type (dep_insn);
20735 case PROCESSOR_PENTIUM:
20736 /* Address Generation Interlock adds a cycle of latency. */
20737 if (insn_type == TYPE_LEA)
20739 rtx addr = PATTERN (insn);
20741 if (GET_CODE (addr) == PARALLEL)
20742 addr = XVECEXP (addr, 0, 0);
20744 gcc_assert (GET_CODE (addr) == SET);
20746 addr = SET_SRC (addr);
20747 if (modified_in_p (addr, dep_insn))
20750 else if (ix86_agi_dependent (dep_insn, insn))
20753 /* ??? Compares pair with jump/setcc. */
20754 if (ix86_flags_dependent (insn, dep_insn, insn_type))
20757 /* Floating point stores require value to be ready one cycle earlier. */
20758 if (insn_type == TYPE_FMOV
20759 && get_attr_memory (insn) == MEMORY_STORE
20760 && !ix86_agi_dependent (dep_insn, insn))
20764 case PROCESSOR_PENTIUMPRO:
20765 memory = get_attr_memory (insn);
20767 /* INT->FP conversion is expensive. */
20768 if (get_attr_fp_int_src (dep_insn))
20771 /* There is one cycle extra latency between an FP op and a store. */
20772 if (insn_type == TYPE_FMOV
20773 && (set = single_set (dep_insn)) != NULL_RTX
20774 && (set2 = single_set (insn)) != NULL_RTX
20775 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
20776 && MEM_P (SET_DEST (set2)))
20779 /* Show ability of reorder buffer to hide latency of load by executing
20780 in parallel with previous instruction in case
20781 previous instruction is not needed to compute the address. */
20782 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20783 && !ix86_agi_dependent (dep_insn, insn))
20785 /* Claim moves to take one cycle, as core can issue one load
20786 at time and the next load can start cycle later. */
20787 if (dep_insn_type == TYPE_IMOV
20788 || dep_insn_type == TYPE_FMOV)
20796 memory = get_attr_memory (insn);
20798 /* The esp dependency is resolved before the instruction is really
20800 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
20801 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
20804 /* INT->FP conversion is expensive. */
20805 if (get_attr_fp_int_src (dep_insn))
20808 /* Show ability of reorder buffer to hide latency of load by executing
20809 in parallel with previous instruction in case
20810 previous instruction is not needed to compute the address. */
20811 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20812 && !ix86_agi_dependent (dep_insn, insn))
20814 /* Claim moves to take one cycle, as core can issue one load
20815 at time and the next load can start cycle later. */
20816 if (dep_insn_type == TYPE_IMOV
20817 || dep_insn_type == TYPE_FMOV)
20826 case PROCESSOR_ATHLON:
20828 case PROCESSOR_AMDFAM10:
20829 case PROCESSOR_BDVER1:
20830 case PROCESSOR_ATOM:
20831 case PROCESSOR_GENERIC32:
20832 case PROCESSOR_GENERIC64:
20833 memory = get_attr_memory (insn);
20835 /* Show ability of reorder buffer to hide latency of load by executing
20836 in parallel with previous instruction in case
20837 previous instruction is not needed to compute the address. */
20838 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20839 && !ix86_agi_dependent (dep_insn, insn))
20841 enum attr_unit unit = get_attr_unit (insn);
20844 /* Because of the difference between the length of integer and
20845 floating unit pipeline preparation stages, the memory operands
20846 for floating point are cheaper.
20848 ??? For Athlon it the difference is most probably 2. */
20849 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
20852 loadcost = TARGET_ATHLON ? 2 : 0;
20854 if (cost >= loadcost)
20867 /* How many alternative schedules to try. This should be as wide as the
20868 scheduling freedom in the DFA, but no wider. Making this value too
20869 large results extra work for the scheduler. */
20872 ia32_multipass_dfa_lookahead (void)
20876 case PROCESSOR_PENTIUM:
20879 case PROCESSOR_PENTIUMPRO:
20889 /* Compute the alignment given to a constant that is being placed in memory.
20890 EXP is the constant and ALIGN is the alignment that the object would
20892 The value of this function is used instead of that alignment to align
20896 ix86_constant_alignment (tree exp, int align)
20898 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
20899 || TREE_CODE (exp) == INTEGER_CST)
20901 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
20903 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
20906 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
20907 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
20908 return BITS_PER_WORD;
20913 /* Compute the alignment for a static variable.
20914 TYPE is the data type, and ALIGN is the alignment that
20915 the object would ordinarily have. The value of this function is used
20916 instead of that alignment to align the object. */
20919 ix86_data_alignment (tree type, int align)
20921 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
20923 if (AGGREGATE_TYPE_P (type)
20924 && TYPE_SIZE (type)
20925 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20926 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
20927 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
20928 && align < max_align)
20931 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20932 to 16byte boundary. */
20935 if (AGGREGATE_TYPE_P (type)
20936 && TYPE_SIZE (type)
20937 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20938 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
20939 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20943 if (TREE_CODE (type) == ARRAY_TYPE)
20945 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20947 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20950 else if (TREE_CODE (type) == COMPLEX_TYPE)
20953 if (TYPE_MODE (type) == DCmode && align < 64)
20955 if ((TYPE_MODE (type) == XCmode
20956 || TYPE_MODE (type) == TCmode) && align < 128)
20959 else if ((TREE_CODE (type) == RECORD_TYPE
20960 || TREE_CODE (type) == UNION_TYPE
20961 || TREE_CODE (type) == QUAL_UNION_TYPE)
20962 && TYPE_FIELDS (type))
20964 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20966 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20969 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20970 || TREE_CODE (type) == INTEGER_TYPE)
20972 if (TYPE_MODE (type) == DFmode && align < 64)
20974 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20981 /* Compute the alignment for a local variable or a stack slot. EXP is
20982 the data type or decl itself, MODE is the widest mode available and
20983 ALIGN is the alignment that the object would ordinarily have. The
20984 value of this macro is used instead of that alignment to align the
20988 ix86_local_alignment (tree exp, enum machine_mode mode,
20989 unsigned int align)
20993 if (exp && DECL_P (exp))
20995 type = TREE_TYPE (exp);
21004 /* Don't do dynamic stack realignment for long long objects with
21005 -mpreferred-stack-boundary=2. */
21008 && ix86_preferred_stack_boundary < 64
21009 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21010 && (!type || !TYPE_USER_ALIGN (type))
21011 && (!decl || !DECL_USER_ALIGN (decl)))
21014 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21015 register in MODE. We will return the largest alignment of XF
21019 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21020 align = GET_MODE_ALIGNMENT (DFmode);
21024 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21025 to 16byte boundary. Exact wording is:
21027 An array uses the same alignment as its elements, except that a local or
21028 global array variable of length at least 16 bytes or
21029 a C99 variable-length array variable always has alignment of at least 16 bytes.
21031 This was added to allow use of aligned SSE instructions at arrays. This
21032 rule is meant for static storage (where compiler can not do the analysis
21033 by itself). We follow it for automatic variables only when convenient.
21034 We fully control everything in the function compiled and functions from
21035 other unit can not rely on the alignment.
21037 Exclude va_list type. It is the common case of local array where
21038 we can not benefit from the alignment. */
21039 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21042 if (AGGREGATE_TYPE_P (type)
21043 && (TYPE_MAIN_VARIANT (type)
21044 != TYPE_MAIN_VARIANT (va_list_type_node))
21045 && TYPE_SIZE (type)
21046 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21047 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21048 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21051 if (TREE_CODE (type) == ARRAY_TYPE)
21053 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21055 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21058 else if (TREE_CODE (type) == COMPLEX_TYPE)
21060 if (TYPE_MODE (type) == DCmode && align < 64)
21062 if ((TYPE_MODE (type) == XCmode
21063 || TYPE_MODE (type) == TCmode) && align < 128)
21066 else if ((TREE_CODE (type) == RECORD_TYPE
21067 || TREE_CODE (type) == UNION_TYPE
21068 || TREE_CODE (type) == QUAL_UNION_TYPE)
21069 && TYPE_FIELDS (type))
21071 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21073 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21076 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21077 || TREE_CODE (type) == INTEGER_TYPE)
21080 if (TYPE_MODE (type) == DFmode && align < 64)
21082 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21088 /* Compute the minimum required alignment for dynamic stack realignment
21089 purposes for a local variable, parameter or a stack slot. EXP is
21090 the data type or decl itself, MODE is its mode and ALIGN is the
21091 alignment that the object would ordinarily have. */
21094 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21095 unsigned int align)
21099 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21102 if (exp && DECL_P (exp))
21104 type = TREE_TYPE (exp);
21113 /* Don't do dynamic stack realignment for long long objects with
21114 -mpreferred-stack-boundary=2. */
21115 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21116 && (!type || !TYPE_USER_ALIGN (type))
21117 && (!decl || !DECL_USER_ALIGN (decl)))
21123 /* Find a location for the static chain incoming to a nested function.
21124 This is a register, unless all free registers are used by arguments. */
21127 ix86_static_chain (const_tree fndecl, bool incoming_p)
21131 if (!DECL_STATIC_CHAIN (fndecl))
21136 /* We always use R10 in 64-bit mode. */
21142 /* By default in 32-bit mode we use ECX to pass the static chain. */
21145 fntype = TREE_TYPE (fndecl);
21146 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21148 /* Fastcall functions use ecx/edx for arguments, which leaves
21149 us with EAX for the static chain. */
21152 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21154 /* Thiscall functions use ecx for arguments, which leaves
21155 us with EAX for the static chain. */
21158 else if (ix86_function_regparm (fntype, fndecl) == 3)
21160 /* For regparm 3, we have no free call-clobbered registers in
21161 which to store the static chain. In order to implement this,
21162 we have the trampoline push the static chain to the stack.
21163 However, we can't push a value below the return address when
21164 we call the nested function directly, so we have to use an
21165 alternate entry point. For this we use ESI, and have the
21166 alternate entry point push ESI, so that things appear the
21167 same once we're executing the nested function. */
21170 if (fndecl == current_function_decl)
21171 ix86_static_chain_on_stack = true;
21172 return gen_frame_mem (SImode,
21173 plus_constant (arg_pointer_rtx, -8));
21179 return gen_rtx_REG (Pmode, regno);
21182 /* Emit RTL insns to initialize the variable parts of a trampoline.
21183 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21184 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21185 to be passed to the target function. */
21188 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21192 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21199 /* Depending on the static chain location, either load a register
21200 with a constant, or push the constant to the stack. All of the
21201 instructions are the same size. */
21202 chain = ix86_static_chain (fndecl, true);
21205 if (REGNO (chain) == CX_REG)
21207 else if (REGNO (chain) == AX_REG)
21210 gcc_unreachable ();
21215 mem = adjust_address (m_tramp, QImode, 0);
21216 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21218 mem = adjust_address (m_tramp, SImode, 1);
21219 emit_move_insn (mem, chain_value);
21221 /* Compute offset from the end of the jmp to the target function.
21222 In the case in which the trampoline stores the static chain on
21223 the stack, we need to skip the first insn which pushes the
21224 (call-saved) register static chain; this push is 1 byte. */
21225 disp = expand_binop (SImode, sub_optab, fnaddr,
21226 plus_constant (XEXP (m_tramp, 0),
21227 MEM_P (chain) ? 9 : 10),
21228 NULL_RTX, 1, OPTAB_DIRECT);
21230 mem = adjust_address (m_tramp, QImode, 5);
21231 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21233 mem = adjust_address (m_tramp, SImode, 6);
21234 emit_move_insn (mem, disp);
21240 /* Load the function address to r11. Try to load address using
21241 the shorter movl instead of movabs. We may want to support
21242 movq for kernel mode, but kernel does not use trampolines at
21244 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21246 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21248 mem = adjust_address (m_tramp, HImode, offset);
21249 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21251 mem = adjust_address (m_tramp, SImode, offset + 2);
21252 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21257 mem = adjust_address (m_tramp, HImode, offset);
21258 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21260 mem = adjust_address (m_tramp, DImode, offset + 2);
21261 emit_move_insn (mem, fnaddr);
21265 /* Load static chain using movabs to r10. */
21266 mem = adjust_address (m_tramp, HImode, offset);
21267 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21269 mem = adjust_address (m_tramp, DImode, offset + 2);
21270 emit_move_insn (mem, chain_value);
21273 /* Jump to r11; the last (unused) byte is a nop, only there to
21274 pad the write out to a single 32-bit store. */
21275 mem = adjust_address (m_tramp, SImode, offset);
21276 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21279 gcc_assert (offset <= TRAMPOLINE_SIZE);
21282 #ifdef ENABLE_EXECUTE_STACK
21283 #ifdef CHECK_EXECUTE_STACK_ENABLED
21284 if (CHECK_EXECUTE_STACK_ENABLED)
21286 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21287 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21291 /* The following file contains several enumerations and data structures
21292 built from the definitions in i386-builtin-types.def. */
21294 #include "i386-builtin-types.inc"
21296 /* Table for the ix86 builtin non-function types. */
21297 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21299 /* Retrieve an element from the above table, building some of
21300 the types lazily. */
21303 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21305 unsigned int index;
21308 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21310 type = ix86_builtin_type_tab[(int) tcode];
21314 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21315 if (tcode <= IX86_BT_LAST_VECT)
21317 enum machine_mode mode;
21319 index = tcode - IX86_BT_LAST_PRIM - 1;
21320 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21321 mode = ix86_builtin_type_vect_mode[index];
21323 type = build_vector_type_for_mode (itype, mode);
21329 index = tcode - IX86_BT_LAST_VECT - 1;
21330 if (tcode <= IX86_BT_LAST_PTR)
21331 quals = TYPE_UNQUALIFIED;
21333 quals = TYPE_QUAL_CONST;
21335 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21336 if (quals != TYPE_UNQUALIFIED)
21337 itype = build_qualified_type (itype, quals);
21339 type = build_pointer_type (itype);
21342 ix86_builtin_type_tab[(int) tcode] = type;
21346 /* Table for the ix86 builtin function types. */
21347 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21349 /* Retrieve an element from the above table, building some of
21350 the types lazily. */
21353 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21357 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21359 type = ix86_builtin_func_type_tab[(int) tcode];
21363 if (tcode <= IX86_BT_LAST_FUNC)
21365 unsigned start = ix86_builtin_func_start[(int) tcode];
21366 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21367 tree rtype, atype, args = void_list_node;
21370 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21371 for (i = after - 1; i > start; --i)
21373 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21374 args = tree_cons (NULL, atype, args);
21377 type = build_function_type (rtype, args);
21381 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21382 enum ix86_builtin_func_type icode;
21384 icode = ix86_builtin_func_alias_base[index];
21385 type = ix86_get_builtin_func_type (icode);
21388 ix86_builtin_func_type_tab[(int) tcode] = type;
21393 /* Codes for all the SSE/MMX builtins. */
21396 IX86_BUILTIN_ADDPS,
21397 IX86_BUILTIN_ADDSS,
21398 IX86_BUILTIN_DIVPS,
21399 IX86_BUILTIN_DIVSS,
21400 IX86_BUILTIN_MULPS,
21401 IX86_BUILTIN_MULSS,
21402 IX86_BUILTIN_SUBPS,
21403 IX86_BUILTIN_SUBSS,
21405 IX86_BUILTIN_CMPEQPS,
21406 IX86_BUILTIN_CMPLTPS,
21407 IX86_BUILTIN_CMPLEPS,
21408 IX86_BUILTIN_CMPGTPS,
21409 IX86_BUILTIN_CMPGEPS,
21410 IX86_BUILTIN_CMPNEQPS,
21411 IX86_BUILTIN_CMPNLTPS,
21412 IX86_BUILTIN_CMPNLEPS,
21413 IX86_BUILTIN_CMPNGTPS,
21414 IX86_BUILTIN_CMPNGEPS,
21415 IX86_BUILTIN_CMPORDPS,
21416 IX86_BUILTIN_CMPUNORDPS,
21417 IX86_BUILTIN_CMPEQSS,
21418 IX86_BUILTIN_CMPLTSS,
21419 IX86_BUILTIN_CMPLESS,
21420 IX86_BUILTIN_CMPNEQSS,
21421 IX86_BUILTIN_CMPNLTSS,
21422 IX86_BUILTIN_CMPNLESS,
21423 IX86_BUILTIN_CMPNGTSS,
21424 IX86_BUILTIN_CMPNGESS,
21425 IX86_BUILTIN_CMPORDSS,
21426 IX86_BUILTIN_CMPUNORDSS,
21428 IX86_BUILTIN_COMIEQSS,
21429 IX86_BUILTIN_COMILTSS,
21430 IX86_BUILTIN_COMILESS,
21431 IX86_BUILTIN_COMIGTSS,
21432 IX86_BUILTIN_COMIGESS,
21433 IX86_BUILTIN_COMINEQSS,
21434 IX86_BUILTIN_UCOMIEQSS,
21435 IX86_BUILTIN_UCOMILTSS,
21436 IX86_BUILTIN_UCOMILESS,
21437 IX86_BUILTIN_UCOMIGTSS,
21438 IX86_BUILTIN_UCOMIGESS,
21439 IX86_BUILTIN_UCOMINEQSS,
21441 IX86_BUILTIN_CVTPI2PS,
21442 IX86_BUILTIN_CVTPS2PI,
21443 IX86_BUILTIN_CVTSI2SS,
21444 IX86_BUILTIN_CVTSI642SS,
21445 IX86_BUILTIN_CVTSS2SI,
21446 IX86_BUILTIN_CVTSS2SI64,
21447 IX86_BUILTIN_CVTTPS2PI,
21448 IX86_BUILTIN_CVTTSS2SI,
21449 IX86_BUILTIN_CVTTSS2SI64,
21451 IX86_BUILTIN_MAXPS,
21452 IX86_BUILTIN_MAXSS,
21453 IX86_BUILTIN_MINPS,
21454 IX86_BUILTIN_MINSS,
21456 IX86_BUILTIN_LOADUPS,
21457 IX86_BUILTIN_STOREUPS,
21458 IX86_BUILTIN_MOVSS,
21460 IX86_BUILTIN_MOVHLPS,
21461 IX86_BUILTIN_MOVLHPS,
21462 IX86_BUILTIN_LOADHPS,
21463 IX86_BUILTIN_LOADLPS,
21464 IX86_BUILTIN_STOREHPS,
21465 IX86_BUILTIN_STORELPS,
21467 IX86_BUILTIN_MASKMOVQ,
21468 IX86_BUILTIN_MOVMSKPS,
21469 IX86_BUILTIN_PMOVMSKB,
21471 IX86_BUILTIN_MOVNTPS,
21472 IX86_BUILTIN_MOVNTQ,
21474 IX86_BUILTIN_LOADDQU,
21475 IX86_BUILTIN_STOREDQU,
21477 IX86_BUILTIN_PACKSSWB,
21478 IX86_BUILTIN_PACKSSDW,
21479 IX86_BUILTIN_PACKUSWB,
21481 IX86_BUILTIN_PADDB,
21482 IX86_BUILTIN_PADDW,
21483 IX86_BUILTIN_PADDD,
21484 IX86_BUILTIN_PADDQ,
21485 IX86_BUILTIN_PADDSB,
21486 IX86_BUILTIN_PADDSW,
21487 IX86_BUILTIN_PADDUSB,
21488 IX86_BUILTIN_PADDUSW,
21489 IX86_BUILTIN_PSUBB,
21490 IX86_BUILTIN_PSUBW,
21491 IX86_BUILTIN_PSUBD,
21492 IX86_BUILTIN_PSUBQ,
21493 IX86_BUILTIN_PSUBSB,
21494 IX86_BUILTIN_PSUBSW,
21495 IX86_BUILTIN_PSUBUSB,
21496 IX86_BUILTIN_PSUBUSW,
21499 IX86_BUILTIN_PANDN,
21503 IX86_BUILTIN_PAVGB,
21504 IX86_BUILTIN_PAVGW,
21506 IX86_BUILTIN_PCMPEQB,
21507 IX86_BUILTIN_PCMPEQW,
21508 IX86_BUILTIN_PCMPEQD,
21509 IX86_BUILTIN_PCMPGTB,
21510 IX86_BUILTIN_PCMPGTW,
21511 IX86_BUILTIN_PCMPGTD,
21513 IX86_BUILTIN_PMADDWD,
21515 IX86_BUILTIN_PMAXSW,
21516 IX86_BUILTIN_PMAXUB,
21517 IX86_BUILTIN_PMINSW,
21518 IX86_BUILTIN_PMINUB,
21520 IX86_BUILTIN_PMULHUW,
21521 IX86_BUILTIN_PMULHW,
21522 IX86_BUILTIN_PMULLW,
21524 IX86_BUILTIN_PSADBW,
21525 IX86_BUILTIN_PSHUFW,
21527 IX86_BUILTIN_PSLLW,
21528 IX86_BUILTIN_PSLLD,
21529 IX86_BUILTIN_PSLLQ,
21530 IX86_BUILTIN_PSRAW,
21531 IX86_BUILTIN_PSRAD,
21532 IX86_BUILTIN_PSRLW,
21533 IX86_BUILTIN_PSRLD,
21534 IX86_BUILTIN_PSRLQ,
21535 IX86_BUILTIN_PSLLWI,
21536 IX86_BUILTIN_PSLLDI,
21537 IX86_BUILTIN_PSLLQI,
21538 IX86_BUILTIN_PSRAWI,
21539 IX86_BUILTIN_PSRADI,
21540 IX86_BUILTIN_PSRLWI,
21541 IX86_BUILTIN_PSRLDI,
21542 IX86_BUILTIN_PSRLQI,
21544 IX86_BUILTIN_PUNPCKHBW,
21545 IX86_BUILTIN_PUNPCKHWD,
21546 IX86_BUILTIN_PUNPCKHDQ,
21547 IX86_BUILTIN_PUNPCKLBW,
21548 IX86_BUILTIN_PUNPCKLWD,
21549 IX86_BUILTIN_PUNPCKLDQ,
21551 IX86_BUILTIN_SHUFPS,
21553 IX86_BUILTIN_RCPPS,
21554 IX86_BUILTIN_RCPSS,
21555 IX86_BUILTIN_RSQRTPS,
21556 IX86_BUILTIN_RSQRTPS_NR,
21557 IX86_BUILTIN_RSQRTSS,
21558 IX86_BUILTIN_RSQRTF,
21559 IX86_BUILTIN_SQRTPS,
21560 IX86_BUILTIN_SQRTPS_NR,
21561 IX86_BUILTIN_SQRTSS,
21563 IX86_BUILTIN_UNPCKHPS,
21564 IX86_BUILTIN_UNPCKLPS,
21566 IX86_BUILTIN_ANDPS,
21567 IX86_BUILTIN_ANDNPS,
21569 IX86_BUILTIN_XORPS,
21572 IX86_BUILTIN_LDMXCSR,
21573 IX86_BUILTIN_STMXCSR,
21574 IX86_BUILTIN_SFENCE,
21576 /* 3DNow! Original */
21577 IX86_BUILTIN_FEMMS,
21578 IX86_BUILTIN_PAVGUSB,
21579 IX86_BUILTIN_PF2ID,
21580 IX86_BUILTIN_PFACC,
21581 IX86_BUILTIN_PFADD,
21582 IX86_BUILTIN_PFCMPEQ,
21583 IX86_BUILTIN_PFCMPGE,
21584 IX86_BUILTIN_PFCMPGT,
21585 IX86_BUILTIN_PFMAX,
21586 IX86_BUILTIN_PFMIN,
21587 IX86_BUILTIN_PFMUL,
21588 IX86_BUILTIN_PFRCP,
21589 IX86_BUILTIN_PFRCPIT1,
21590 IX86_BUILTIN_PFRCPIT2,
21591 IX86_BUILTIN_PFRSQIT1,
21592 IX86_BUILTIN_PFRSQRT,
21593 IX86_BUILTIN_PFSUB,
21594 IX86_BUILTIN_PFSUBR,
21595 IX86_BUILTIN_PI2FD,
21596 IX86_BUILTIN_PMULHRW,
21598 /* 3DNow! Athlon Extensions */
21599 IX86_BUILTIN_PF2IW,
21600 IX86_BUILTIN_PFNACC,
21601 IX86_BUILTIN_PFPNACC,
21602 IX86_BUILTIN_PI2FW,
21603 IX86_BUILTIN_PSWAPDSI,
21604 IX86_BUILTIN_PSWAPDSF,
21607 IX86_BUILTIN_ADDPD,
21608 IX86_BUILTIN_ADDSD,
21609 IX86_BUILTIN_DIVPD,
21610 IX86_BUILTIN_DIVSD,
21611 IX86_BUILTIN_MULPD,
21612 IX86_BUILTIN_MULSD,
21613 IX86_BUILTIN_SUBPD,
21614 IX86_BUILTIN_SUBSD,
21616 IX86_BUILTIN_CMPEQPD,
21617 IX86_BUILTIN_CMPLTPD,
21618 IX86_BUILTIN_CMPLEPD,
21619 IX86_BUILTIN_CMPGTPD,
21620 IX86_BUILTIN_CMPGEPD,
21621 IX86_BUILTIN_CMPNEQPD,
21622 IX86_BUILTIN_CMPNLTPD,
21623 IX86_BUILTIN_CMPNLEPD,
21624 IX86_BUILTIN_CMPNGTPD,
21625 IX86_BUILTIN_CMPNGEPD,
21626 IX86_BUILTIN_CMPORDPD,
21627 IX86_BUILTIN_CMPUNORDPD,
21628 IX86_BUILTIN_CMPEQSD,
21629 IX86_BUILTIN_CMPLTSD,
21630 IX86_BUILTIN_CMPLESD,
21631 IX86_BUILTIN_CMPNEQSD,
21632 IX86_BUILTIN_CMPNLTSD,
21633 IX86_BUILTIN_CMPNLESD,
21634 IX86_BUILTIN_CMPORDSD,
21635 IX86_BUILTIN_CMPUNORDSD,
21637 IX86_BUILTIN_COMIEQSD,
21638 IX86_BUILTIN_COMILTSD,
21639 IX86_BUILTIN_COMILESD,
21640 IX86_BUILTIN_COMIGTSD,
21641 IX86_BUILTIN_COMIGESD,
21642 IX86_BUILTIN_COMINEQSD,
21643 IX86_BUILTIN_UCOMIEQSD,
21644 IX86_BUILTIN_UCOMILTSD,
21645 IX86_BUILTIN_UCOMILESD,
21646 IX86_BUILTIN_UCOMIGTSD,
21647 IX86_BUILTIN_UCOMIGESD,
21648 IX86_BUILTIN_UCOMINEQSD,
21650 IX86_BUILTIN_MAXPD,
21651 IX86_BUILTIN_MAXSD,
21652 IX86_BUILTIN_MINPD,
21653 IX86_BUILTIN_MINSD,
21655 IX86_BUILTIN_ANDPD,
21656 IX86_BUILTIN_ANDNPD,
21658 IX86_BUILTIN_XORPD,
21660 IX86_BUILTIN_SQRTPD,
21661 IX86_BUILTIN_SQRTSD,
21663 IX86_BUILTIN_UNPCKHPD,
21664 IX86_BUILTIN_UNPCKLPD,
21666 IX86_BUILTIN_SHUFPD,
21668 IX86_BUILTIN_LOADUPD,
21669 IX86_BUILTIN_STOREUPD,
21670 IX86_BUILTIN_MOVSD,
21672 IX86_BUILTIN_LOADHPD,
21673 IX86_BUILTIN_LOADLPD,
21675 IX86_BUILTIN_CVTDQ2PD,
21676 IX86_BUILTIN_CVTDQ2PS,
21678 IX86_BUILTIN_CVTPD2DQ,
21679 IX86_BUILTIN_CVTPD2PI,
21680 IX86_BUILTIN_CVTPD2PS,
21681 IX86_BUILTIN_CVTTPD2DQ,
21682 IX86_BUILTIN_CVTTPD2PI,
21684 IX86_BUILTIN_CVTPI2PD,
21685 IX86_BUILTIN_CVTSI2SD,
21686 IX86_BUILTIN_CVTSI642SD,
21688 IX86_BUILTIN_CVTSD2SI,
21689 IX86_BUILTIN_CVTSD2SI64,
21690 IX86_BUILTIN_CVTSD2SS,
21691 IX86_BUILTIN_CVTSS2SD,
21692 IX86_BUILTIN_CVTTSD2SI,
21693 IX86_BUILTIN_CVTTSD2SI64,
21695 IX86_BUILTIN_CVTPS2DQ,
21696 IX86_BUILTIN_CVTPS2PD,
21697 IX86_BUILTIN_CVTTPS2DQ,
21699 IX86_BUILTIN_MOVNTI,
21700 IX86_BUILTIN_MOVNTPD,
21701 IX86_BUILTIN_MOVNTDQ,
21703 IX86_BUILTIN_MOVQ128,
21706 IX86_BUILTIN_MASKMOVDQU,
21707 IX86_BUILTIN_MOVMSKPD,
21708 IX86_BUILTIN_PMOVMSKB128,
21710 IX86_BUILTIN_PACKSSWB128,
21711 IX86_BUILTIN_PACKSSDW128,
21712 IX86_BUILTIN_PACKUSWB128,
21714 IX86_BUILTIN_PADDB128,
21715 IX86_BUILTIN_PADDW128,
21716 IX86_BUILTIN_PADDD128,
21717 IX86_BUILTIN_PADDQ128,
21718 IX86_BUILTIN_PADDSB128,
21719 IX86_BUILTIN_PADDSW128,
21720 IX86_BUILTIN_PADDUSB128,
21721 IX86_BUILTIN_PADDUSW128,
21722 IX86_BUILTIN_PSUBB128,
21723 IX86_BUILTIN_PSUBW128,
21724 IX86_BUILTIN_PSUBD128,
21725 IX86_BUILTIN_PSUBQ128,
21726 IX86_BUILTIN_PSUBSB128,
21727 IX86_BUILTIN_PSUBSW128,
21728 IX86_BUILTIN_PSUBUSB128,
21729 IX86_BUILTIN_PSUBUSW128,
21731 IX86_BUILTIN_PAND128,
21732 IX86_BUILTIN_PANDN128,
21733 IX86_BUILTIN_POR128,
21734 IX86_BUILTIN_PXOR128,
21736 IX86_BUILTIN_PAVGB128,
21737 IX86_BUILTIN_PAVGW128,
21739 IX86_BUILTIN_PCMPEQB128,
21740 IX86_BUILTIN_PCMPEQW128,
21741 IX86_BUILTIN_PCMPEQD128,
21742 IX86_BUILTIN_PCMPGTB128,
21743 IX86_BUILTIN_PCMPGTW128,
21744 IX86_BUILTIN_PCMPGTD128,
21746 IX86_BUILTIN_PMADDWD128,
21748 IX86_BUILTIN_PMAXSW128,
21749 IX86_BUILTIN_PMAXUB128,
21750 IX86_BUILTIN_PMINSW128,
21751 IX86_BUILTIN_PMINUB128,
21753 IX86_BUILTIN_PMULUDQ,
21754 IX86_BUILTIN_PMULUDQ128,
21755 IX86_BUILTIN_PMULHUW128,
21756 IX86_BUILTIN_PMULHW128,
21757 IX86_BUILTIN_PMULLW128,
21759 IX86_BUILTIN_PSADBW128,
21760 IX86_BUILTIN_PSHUFHW,
21761 IX86_BUILTIN_PSHUFLW,
21762 IX86_BUILTIN_PSHUFD,
21764 IX86_BUILTIN_PSLLDQI128,
21765 IX86_BUILTIN_PSLLWI128,
21766 IX86_BUILTIN_PSLLDI128,
21767 IX86_BUILTIN_PSLLQI128,
21768 IX86_BUILTIN_PSRAWI128,
21769 IX86_BUILTIN_PSRADI128,
21770 IX86_BUILTIN_PSRLDQI128,
21771 IX86_BUILTIN_PSRLWI128,
21772 IX86_BUILTIN_PSRLDI128,
21773 IX86_BUILTIN_PSRLQI128,
21775 IX86_BUILTIN_PSLLDQ128,
21776 IX86_BUILTIN_PSLLW128,
21777 IX86_BUILTIN_PSLLD128,
21778 IX86_BUILTIN_PSLLQ128,
21779 IX86_BUILTIN_PSRAW128,
21780 IX86_BUILTIN_PSRAD128,
21781 IX86_BUILTIN_PSRLW128,
21782 IX86_BUILTIN_PSRLD128,
21783 IX86_BUILTIN_PSRLQ128,
21785 IX86_BUILTIN_PUNPCKHBW128,
21786 IX86_BUILTIN_PUNPCKHWD128,
21787 IX86_BUILTIN_PUNPCKHDQ128,
21788 IX86_BUILTIN_PUNPCKHQDQ128,
21789 IX86_BUILTIN_PUNPCKLBW128,
21790 IX86_BUILTIN_PUNPCKLWD128,
21791 IX86_BUILTIN_PUNPCKLDQ128,
21792 IX86_BUILTIN_PUNPCKLQDQ128,
21794 IX86_BUILTIN_CLFLUSH,
21795 IX86_BUILTIN_MFENCE,
21796 IX86_BUILTIN_LFENCE,
21798 IX86_BUILTIN_BSRSI,
21799 IX86_BUILTIN_BSRDI,
21800 IX86_BUILTIN_RDPMC,
21801 IX86_BUILTIN_RDTSC,
21802 IX86_BUILTIN_RDTSCP,
21803 IX86_BUILTIN_ROLQI,
21804 IX86_BUILTIN_ROLHI,
21805 IX86_BUILTIN_RORQI,
21806 IX86_BUILTIN_RORHI,
21809 IX86_BUILTIN_ADDSUBPS,
21810 IX86_BUILTIN_HADDPS,
21811 IX86_BUILTIN_HSUBPS,
21812 IX86_BUILTIN_MOVSHDUP,
21813 IX86_BUILTIN_MOVSLDUP,
21814 IX86_BUILTIN_ADDSUBPD,
21815 IX86_BUILTIN_HADDPD,
21816 IX86_BUILTIN_HSUBPD,
21817 IX86_BUILTIN_LDDQU,
21819 IX86_BUILTIN_MONITOR,
21820 IX86_BUILTIN_MWAIT,
21823 IX86_BUILTIN_PHADDW,
21824 IX86_BUILTIN_PHADDD,
21825 IX86_BUILTIN_PHADDSW,
21826 IX86_BUILTIN_PHSUBW,
21827 IX86_BUILTIN_PHSUBD,
21828 IX86_BUILTIN_PHSUBSW,
21829 IX86_BUILTIN_PMADDUBSW,
21830 IX86_BUILTIN_PMULHRSW,
21831 IX86_BUILTIN_PSHUFB,
21832 IX86_BUILTIN_PSIGNB,
21833 IX86_BUILTIN_PSIGNW,
21834 IX86_BUILTIN_PSIGND,
21835 IX86_BUILTIN_PALIGNR,
21836 IX86_BUILTIN_PABSB,
21837 IX86_BUILTIN_PABSW,
21838 IX86_BUILTIN_PABSD,
21840 IX86_BUILTIN_PHADDW128,
21841 IX86_BUILTIN_PHADDD128,
21842 IX86_BUILTIN_PHADDSW128,
21843 IX86_BUILTIN_PHSUBW128,
21844 IX86_BUILTIN_PHSUBD128,
21845 IX86_BUILTIN_PHSUBSW128,
21846 IX86_BUILTIN_PMADDUBSW128,
21847 IX86_BUILTIN_PMULHRSW128,
21848 IX86_BUILTIN_PSHUFB128,
21849 IX86_BUILTIN_PSIGNB128,
21850 IX86_BUILTIN_PSIGNW128,
21851 IX86_BUILTIN_PSIGND128,
21852 IX86_BUILTIN_PALIGNR128,
21853 IX86_BUILTIN_PABSB128,
21854 IX86_BUILTIN_PABSW128,
21855 IX86_BUILTIN_PABSD128,
21857 /* AMDFAM10 - SSE4A New Instructions. */
21858 IX86_BUILTIN_MOVNTSD,
21859 IX86_BUILTIN_MOVNTSS,
21860 IX86_BUILTIN_EXTRQI,
21861 IX86_BUILTIN_EXTRQ,
21862 IX86_BUILTIN_INSERTQI,
21863 IX86_BUILTIN_INSERTQ,
21866 IX86_BUILTIN_BLENDPD,
21867 IX86_BUILTIN_BLENDPS,
21868 IX86_BUILTIN_BLENDVPD,
21869 IX86_BUILTIN_BLENDVPS,
21870 IX86_BUILTIN_PBLENDVB128,
21871 IX86_BUILTIN_PBLENDW128,
21876 IX86_BUILTIN_INSERTPS128,
21878 IX86_BUILTIN_MOVNTDQA,
21879 IX86_BUILTIN_MPSADBW128,
21880 IX86_BUILTIN_PACKUSDW128,
21881 IX86_BUILTIN_PCMPEQQ,
21882 IX86_BUILTIN_PHMINPOSUW128,
21884 IX86_BUILTIN_PMAXSB128,
21885 IX86_BUILTIN_PMAXSD128,
21886 IX86_BUILTIN_PMAXUD128,
21887 IX86_BUILTIN_PMAXUW128,
21889 IX86_BUILTIN_PMINSB128,
21890 IX86_BUILTIN_PMINSD128,
21891 IX86_BUILTIN_PMINUD128,
21892 IX86_BUILTIN_PMINUW128,
21894 IX86_BUILTIN_PMOVSXBW128,
21895 IX86_BUILTIN_PMOVSXBD128,
21896 IX86_BUILTIN_PMOVSXBQ128,
21897 IX86_BUILTIN_PMOVSXWD128,
21898 IX86_BUILTIN_PMOVSXWQ128,
21899 IX86_BUILTIN_PMOVSXDQ128,
21901 IX86_BUILTIN_PMOVZXBW128,
21902 IX86_BUILTIN_PMOVZXBD128,
21903 IX86_BUILTIN_PMOVZXBQ128,
21904 IX86_BUILTIN_PMOVZXWD128,
21905 IX86_BUILTIN_PMOVZXWQ128,
21906 IX86_BUILTIN_PMOVZXDQ128,
21908 IX86_BUILTIN_PMULDQ128,
21909 IX86_BUILTIN_PMULLD128,
21911 IX86_BUILTIN_ROUNDPD,
21912 IX86_BUILTIN_ROUNDPS,
21913 IX86_BUILTIN_ROUNDSD,
21914 IX86_BUILTIN_ROUNDSS,
21916 IX86_BUILTIN_PTESTZ,
21917 IX86_BUILTIN_PTESTC,
21918 IX86_BUILTIN_PTESTNZC,
21920 IX86_BUILTIN_VEC_INIT_V2SI,
21921 IX86_BUILTIN_VEC_INIT_V4HI,
21922 IX86_BUILTIN_VEC_INIT_V8QI,
21923 IX86_BUILTIN_VEC_EXT_V2DF,
21924 IX86_BUILTIN_VEC_EXT_V2DI,
21925 IX86_BUILTIN_VEC_EXT_V4SF,
21926 IX86_BUILTIN_VEC_EXT_V4SI,
21927 IX86_BUILTIN_VEC_EXT_V8HI,
21928 IX86_BUILTIN_VEC_EXT_V2SI,
21929 IX86_BUILTIN_VEC_EXT_V4HI,
21930 IX86_BUILTIN_VEC_EXT_V16QI,
21931 IX86_BUILTIN_VEC_SET_V2DI,
21932 IX86_BUILTIN_VEC_SET_V4SF,
21933 IX86_BUILTIN_VEC_SET_V4SI,
21934 IX86_BUILTIN_VEC_SET_V8HI,
21935 IX86_BUILTIN_VEC_SET_V4HI,
21936 IX86_BUILTIN_VEC_SET_V16QI,
21938 IX86_BUILTIN_VEC_PACK_SFIX,
21941 IX86_BUILTIN_CRC32QI,
21942 IX86_BUILTIN_CRC32HI,
21943 IX86_BUILTIN_CRC32SI,
21944 IX86_BUILTIN_CRC32DI,
21946 IX86_BUILTIN_PCMPESTRI128,
21947 IX86_BUILTIN_PCMPESTRM128,
21948 IX86_BUILTIN_PCMPESTRA128,
21949 IX86_BUILTIN_PCMPESTRC128,
21950 IX86_BUILTIN_PCMPESTRO128,
21951 IX86_BUILTIN_PCMPESTRS128,
21952 IX86_BUILTIN_PCMPESTRZ128,
21953 IX86_BUILTIN_PCMPISTRI128,
21954 IX86_BUILTIN_PCMPISTRM128,
21955 IX86_BUILTIN_PCMPISTRA128,
21956 IX86_BUILTIN_PCMPISTRC128,
21957 IX86_BUILTIN_PCMPISTRO128,
21958 IX86_BUILTIN_PCMPISTRS128,
21959 IX86_BUILTIN_PCMPISTRZ128,
21961 IX86_BUILTIN_PCMPGTQ,
21963 /* AES instructions */
21964 IX86_BUILTIN_AESENC128,
21965 IX86_BUILTIN_AESENCLAST128,
21966 IX86_BUILTIN_AESDEC128,
21967 IX86_BUILTIN_AESDECLAST128,
21968 IX86_BUILTIN_AESIMC128,
21969 IX86_BUILTIN_AESKEYGENASSIST128,
21971 /* PCLMUL instruction */
21972 IX86_BUILTIN_PCLMULQDQ128,
21975 IX86_BUILTIN_ADDPD256,
21976 IX86_BUILTIN_ADDPS256,
21977 IX86_BUILTIN_ADDSUBPD256,
21978 IX86_BUILTIN_ADDSUBPS256,
21979 IX86_BUILTIN_ANDPD256,
21980 IX86_BUILTIN_ANDPS256,
21981 IX86_BUILTIN_ANDNPD256,
21982 IX86_BUILTIN_ANDNPS256,
21983 IX86_BUILTIN_BLENDPD256,
21984 IX86_BUILTIN_BLENDPS256,
21985 IX86_BUILTIN_BLENDVPD256,
21986 IX86_BUILTIN_BLENDVPS256,
21987 IX86_BUILTIN_DIVPD256,
21988 IX86_BUILTIN_DIVPS256,
21989 IX86_BUILTIN_DPPS256,
21990 IX86_BUILTIN_HADDPD256,
21991 IX86_BUILTIN_HADDPS256,
21992 IX86_BUILTIN_HSUBPD256,
21993 IX86_BUILTIN_HSUBPS256,
21994 IX86_BUILTIN_MAXPD256,
21995 IX86_BUILTIN_MAXPS256,
21996 IX86_BUILTIN_MINPD256,
21997 IX86_BUILTIN_MINPS256,
21998 IX86_BUILTIN_MULPD256,
21999 IX86_BUILTIN_MULPS256,
22000 IX86_BUILTIN_ORPD256,
22001 IX86_BUILTIN_ORPS256,
22002 IX86_BUILTIN_SHUFPD256,
22003 IX86_BUILTIN_SHUFPS256,
22004 IX86_BUILTIN_SUBPD256,
22005 IX86_BUILTIN_SUBPS256,
22006 IX86_BUILTIN_XORPD256,
22007 IX86_BUILTIN_XORPS256,
22008 IX86_BUILTIN_CMPSD,
22009 IX86_BUILTIN_CMPSS,
22010 IX86_BUILTIN_CMPPD,
22011 IX86_BUILTIN_CMPPS,
22012 IX86_BUILTIN_CMPPD256,
22013 IX86_BUILTIN_CMPPS256,
22014 IX86_BUILTIN_CVTDQ2PD256,
22015 IX86_BUILTIN_CVTDQ2PS256,
22016 IX86_BUILTIN_CVTPD2PS256,
22017 IX86_BUILTIN_CVTPS2DQ256,
22018 IX86_BUILTIN_CVTPS2PD256,
22019 IX86_BUILTIN_CVTTPD2DQ256,
22020 IX86_BUILTIN_CVTPD2DQ256,
22021 IX86_BUILTIN_CVTTPS2DQ256,
22022 IX86_BUILTIN_EXTRACTF128PD256,
22023 IX86_BUILTIN_EXTRACTF128PS256,
22024 IX86_BUILTIN_EXTRACTF128SI256,
22025 IX86_BUILTIN_VZEROALL,
22026 IX86_BUILTIN_VZEROUPPER,
22027 IX86_BUILTIN_VPERMILVARPD,
22028 IX86_BUILTIN_VPERMILVARPS,
22029 IX86_BUILTIN_VPERMILVARPD256,
22030 IX86_BUILTIN_VPERMILVARPS256,
22031 IX86_BUILTIN_VPERMILPD,
22032 IX86_BUILTIN_VPERMILPS,
22033 IX86_BUILTIN_VPERMILPD256,
22034 IX86_BUILTIN_VPERMILPS256,
22035 IX86_BUILTIN_VPERMIL2PD,
22036 IX86_BUILTIN_VPERMIL2PS,
22037 IX86_BUILTIN_VPERMIL2PD256,
22038 IX86_BUILTIN_VPERMIL2PS256,
22039 IX86_BUILTIN_VPERM2F128PD256,
22040 IX86_BUILTIN_VPERM2F128PS256,
22041 IX86_BUILTIN_VPERM2F128SI256,
22042 IX86_BUILTIN_VBROADCASTSS,
22043 IX86_BUILTIN_VBROADCASTSD256,
22044 IX86_BUILTIN_VBROADCASTSS256,
22045 IX86_BUILTIN_VBROADCASTPD256,
22046 IX86_BUILTIN_VBROADCASTPS256,
22047 IX86_BUILTIN_VINSERTF128PD256,
22048 IX86_BUILTIN_VINSERTF128PS256,
22049 IX86_BUILTIN_VINSERTF128SI256,
22050 IX86_BUILTIN_LOADUPD256,
22051 IX86_BUILTIN_LOADUPS256,
22052 IX86_BUILTIN_STOREUPD256,
22053 IX86_BUILTIN_STOREUPS256,
22054 IX86_BUILTIN_LDDQU256,
22055 IX86_BUILTIN_MOVNTDQ256,
22056 IX86_BUILTIN_MOVNTPD256,
22057 IX86_BUILTIN_MOVNTPS256,
22058 IX86_BUILTIN_LOADDQU256,
22059 IX86_BUILTIN_STOREDQU256,
22060 IX86_BUILTIN_MASKLOADPD,
22061 IX86_BUILTIN_MASKLOADPS,
22062 IX86_BUILTIN_MASKSTOREPD,
22063 IX86_BUILTIN_MASKSTOREPS,
22064 IX86_BUILTIN_MASKLOADPD256,
22065 IX86_BUILTIN_MASKLOADPS256,
22066 IX86_BUILTIN_MASKSTOREPD256,
22067 IX86_BUILTIN_MASKSTOREPS256,
22068 IX86_BUILTIN_MOVSHDUP256,
22069 IX86_BUILTIN_MOVSLDUP256,
22070 IX86_BUILTIN_MOVDDUP256,
22072 IX86_BUILTIN_SQRTPD256,
22073 IX86_BUILTIN_SQRTPS256,
22074 IX86_BUILTIN_SQRTPS_NR256,
22075 IX86_BUILTIN_RSQRTPS256,
22076 IX86_BUILTIN_RSQRTPS_NR256,
22078 IX86_BUILTIN_RCPPS256,
22080 IX86_BUILTIN_ROUNDPD256,
22081 IX86_BUILTIN_ROUNDPS256,
22083 IX86_BUILTIN_UNPCKHPD256,
22084 IX86_BUILTIN_UNPCKLPD256,
22085 IX86_BUILTIN_UNPCKHPS256,
22086 IX86_BUILTIN_UNPCKLPS256,
22088 IX86_BUILTIN_SI256_SI,
22089 IX86_BUILTIN_PS256_PS,
22090 IX86_BUILTIN_PD256_PD,
22091 IX86_BUILTIN_SI_SI256,
22092 IX86_BUILTIN_PS_PS256,
22093 IX86_BUILTIN_PD_PD256,
22095 IX86_BUILTIN_VTESTZPD,
22096 IX86_BUILTIN_VTESTCPD,
22097 IX86_BUILTIN_VTESTNZCPD,
22098 IX86_BUILTIN_VTESTZPS,
22099 IX86_BUILTIN_VTESTCPS,
22100 IX86_BUILTIN_VTESTNZCPS,
22101 IX86_BUILTIN_VTESTZPD256,
22102 IX86_BUILTIN_VTESTCPD256,
22103 IX86_BUILTIN_VTESTNZCPD256,
22104 IX86_BUILTIN_VTESTZPS256,
22105 IX86_BUILTIN_VTESTCPS256,
22106 IX86_BUILTIN_VTESTNZCPS256,
22107 IX86_BUILTIN_PTESTZ256,
22108 IX86_BUILTIN_PTESTC256,
22109 IX86_BUILTIN_PTESTNZC256,
22111 IX86_BUILTIN_MOVMSKPD256,
22112 IX86_BUILTIN_MOVMSKPS256,
22114 /* TFmode support builtins. */
22116 IX86_BUILTIN_HUGE_VALQ,
22117 IX86_BUILTIN_FABSQ,
22118 IX86_BUILTIN_COPYSIGNQ,
22120 /* Vectorizer support builtins. */
22121 IX86_BUILTIN_CPYSGNPS,
22122 IX86_BUILTIN_CPYSGNPD,
22124 IX86_BUILTIN_CVTUDQ2PS,
22126 IX86_BUILTIN_VEC_PERM_V2DF,
22127 IX86_BUILTIN_VEC_PERM_V4SF,
22128 IX86_BUILTIN_VEC_PERM_V2DI,
22129 IX86_BUILTIN_VEC_PERM_V4SI,
22130 IX86_BUILTIN_VEC_PERM_V8HI,
22131 IX86_BUILTIN_VEC_PERM_V16QI,
22132 IX86_BUILTIN_VEC_PERM_V2DI_U,
22133 IX86_BUILTIN_VEC_PERM_V4SI_U,
22134 IX86_BUILTIN_VEC_PERM_V8HI_U,
22135 IX86_BUILTIN_VEC_PERM_V16QI_U,
22136 IX86_BUILTIN_VEC_PERM_V4DF,
22137 IX86_BUILTIN_VEC_PERM_V8SF,
22139 /* FMA4 and XOP instructions. */
22140 IX86_BUILTIN_VFMADDSS,
22141 IX86_BUILTIN_VFMADDSD,
22142 IX86_BUILTIN_VFMADDPS,
22143 IX86_BUILTIN_VFMADDPD,
22144 IX86_BUILTIN_VFMSUBSS,
22145 IX86_BUILTIN_VFMSUBSD,
22146 IX86_BUILTIN_VFMSUBPS,
22147 IX86_BUILTIN_VFMSUBPD,
22148 IX86_BUILTIN_VFMADDSUBPS,
22149 IX86_BUILTIN_VFMADDSUBPD,
22150 IX86_BUILTIN_VFMSUBADDPS,
22151 IX86_BUILTIN_VFMSUBADDPD,
22152 IX86_BUILTIN_VFNMADDSS,
22153 IX86_BUILTIN_VFNMADDSD,
22154 IX86_BUILTIN_VFNMADDPS,
22155 IX86_BUILTIN_VFNMADDPD,
22156 IX86_BUILTIN_VFNMSUBSS,
22157 IX86_BUILTIN_VFNMSUBSD,
22158 IX86_BUILTIN_VFNMSUBPS,
22159 IX86_BUILTIN_VFNMSUBPD,
22160 IX86_BUILTIN_VFMADDPS256,
22161 IX86_BUILTIN_VFMADDPD256,
22162 IX86_BUILTIN_VFMSUBPS256,
22163 IX86_BUILTIN_VFMSUBPD256,
22164 IX86_BUILTIN_VFMADDSUBPS256,
22165 IX86_BUILTIN_VFMADDSUBPD256,
22166 IX86_BUILTIN_VFMSUBADDPS256,
22167 IX86_BUILTIN_VFMSUBADDPD256,
22168 IX86_BUILTIN_VFNMADDPS256,
22169 IX86_BUILTIN_VFNMADDPD256,
22170 IX86_BUILTIN_VFNMSUBPS256,
22171 IX86_BUILTIN_VFNMSUBPD256,
22173 IX86_BUILTIN_VPCMOV,
22174 IX86_BUILTIN_VPCMOV_V2DI,
22175 IX86_BUILTIN_VPCMOV_V4SI,
22176 IX86_BUILTIN_VPCMOV_V8HI,
22177 IX86_BUILTIN_VPCMOV_V16QI,
22178 IX86_BUILTIN_VPCMOV_V4SF,
22179 IX86_BUILTIN_VPCMOV_V2DF,
22180 IX86_BUILTIN_VPCMOV256,
22181 IX86_BUILTIN_VPCMOV_V4DI256,
22182 IX86_BUILTIN_VPCMOV_V8SI256,
22183 IX86_BUILTIN_VPCMOV_V16HI256,
22184 IX86_BUILTIN_VPCMOV_V32QI256,
22185 IX86_BUILTIN_VPCMOV_V8SF256,
22186 IX86_BUILTIN_VPCMOV_V4DF256,
22188 IX86_BUILTIN_VPPERM,
22190 IX86_BUILTIN_VPMACSSWW,
22191 IX86_BUILTIN_VPMACSWW,
22192 IX86_BUILTIN_VPMACSSWD,
22193 IX86_BUILTIN_VPMACSWD,
22194 IX86_BUILTIN_VPMACSSDD,
22195 IX86_BUILTIN_VPMACSDD,
22196 IX86_BUILTIN_VPMACSSDQL,
22197 IX86_BUILTIN_VPMACSSDQH,
22198 IX86_BUILTIN_VPMACSDQL,
22199 IX86_BUILTIN_VPMACSDQH,
22200 IX86_BUILTIN_VPMADCSSWD,
22201 IX86_BUILTIN_VPMADCSWD,
22203 IX86_BUILTIN_VPHADDBW,
22204 IX86_BUILTIN_VPHADDBD,
22205 IX86_BUILTIN_VPHADDBQ,
22206 IX86_BUILTIN_VPHADDWD,
22207 IX86_BUILTIN_VPHADDWQ,
22208 IX86_BUILTIN_VPHADDDQ,
22209 IX86_BUILTIN_VPHADDUBW,
22210 IX86_BUILTIN_VPHADDUBD,
22211 IX86_BUILTIN_VPHADDUBQ,
22212 IX86_BUILTIN_VPHADDUWD,
22213 IX86_BUILTIN_VPHADDUWQ,
22214 IX86_BUILTIN_VPHADDUDQ,
22215 IX86_BUILTIN_VPHSUBBW,
22216 IX86_BUILTIN_VPHSUBWD,
22217 IX86_BUILTIN_VPHSUBDQ,
22219 IX86_BUILTIN_VPROTB,
22220 IX86_BUILTIN_VPROTW,
22221 IX86_BUILTIN_VPROTD,
22222 IX86_BUILTIN_VPROTQ,
22223 IX86_BUILTIN_VPROTB_IMM,
22224 IX86_BUILTIN_VPROTW_IMM,
22225 IX86_BUILTIN_VPROTD_IMM,
22226 IX86_BUILTIN_VPROTQ_IMM,
22228 IX86_BUILTIN_VPSHLB,
22229 IX86_BUILTIN_VPSHLW,
22230 IX86_BUILTIN_VPSHLD,
22231 IX86_BUILTIN_VPSHLQ,
22232 IX86_BUILTIN_VPSHAB,
22233 IX86_BUILTIN_VPSHAW,
22234 IX86_BUILTIN_VPSHAD,
22235 IX86_BUILTIN_VPSHAQ,
22237 IX86_BUILTIN_VFRCZSS,
22238 IX86_BUILTIN_VFRCZSD,
22239 IX86_BUILTIN_VFRCZPS,
22240 IX86_BUILTIN_VFRCZPD,
22241 IX86_BUILTIN_VFRCZPS256,
22242 IX86_BUILTIN_VFRCZPD256,
22244 IX86_BUILTIN_VPCOMEQUB,
22245 IX86_BUILTIN_VPCOMNEUB,
22246 IX86_BUILTIN_VPCOMLTUB,
22247 IX86_BUILTIN_VPCOMLEUB,
22248 IX86_BUILTIN_VPCOMGTUB,
22249 IX86_BUILTIN_VPCOMGEUB,
22250 IX86_BUILTIN_VPCOMFALSEUB,
22251 IX86_BUILTIN_VPCOMTRUEUB,
22253 IX86_BUILTIN_VPCOMEQUW,
22254 IX86_BUILTIN_VPCOMNEUW,
22255 IX86_BUILTIN_VPCOMLTUW,
22256 IX86_BUILTIN_VPCOMLEUW,
22257 IX86_BUILTIN_VPCOMGTUW,
22258 IX86_BUILTIN_VPCOMGEUW,
22259 IX86_BUILTIN_VPCOMFALSEUW,
22260 IX86_BUILTIN_VPCOMTRUEUW,
22262 IX86_BUILTIN_VPCOMEQUD,
22263 IX86_BUILTIN_VPCOMNEUD,
22264 IX86_BUILTIN_VPCOMLTUD,
22265 IX86_BUILTIN_VPCOMLEUD,
22266 IX86_BUILTIN_VPCOMGTUD,
22267 IX86_BUILTIN_VPCOMGEUD,
22268 IX86_BUILTIN_VPCOMFALSEUD,
22269 IX86_BUILTIN_VPCOMTRUEUD,
22271 IX86_BUILTIN_VPCOMEQUQ,
22272 IX86_BUILTIN_VPCOMNEUQ,
22273 IX86_BUILTIN_VPCOMLTUQ,
22274 IX86_BUILTIN_VPCOMLEUQ,
22275 IX86_BUILTIN_VPCOMGTUQ,
22276 IX86_BUILTIN_VPCOMGEUQ,
22277 IX86_BUILTIN_VPCOMFALSEUQ,
22278 IX86_BUILTIN_VPCOMTRUEUQ,
22280 IX86_BUILTIN_VPCOMEQB,
22281 IX86_BUILTIN_VPCOMNEB,
22282 IX86_BUILTIN_VPCOMLTB,
22283 IX86_BUILTIN_VPCOMLEB,
22284 IX86_BUILTIN_VPCOMGTB,
22285 IX86_BUILTIN_VPCOMGEB,
22286 IX86_BUILTIN_VPCOMFALSEB,
22287 IX86_BUILTIN_VPCOMTRUEB,
22289 IX86_BUILTIN_VPCOMEQW,
22290 IX86_BUILTIN_VPCOMNEW,
22291 IX86_BUILTIN_VPCOMLTW,
22292 IX86_BUILTIN_VPCOMLEW,
22293 IX86_BUILTIN_VPCOMGTW,
22294 IX86_BUILTIN_VPCOMGEW,
22295 IX86_BUILTIN_VPCOMFALSEW,
22296 IX86_BUILTIN_VPCOMTRUEW,
22298 IX86_BUILTIN_VPCOMEQD,
22299 IX86_BUILTIN_VPCOMNED,
22300 IX86_BUILTIN_VPCOMLTD,
22301 IX86_BUILTIN_VPCOMLED,
22302 IX86_BUILTIN_VPCOMGTD,
22303 IX86_BUILTIN_VPCOMGED,
22304 IX86_BUILTIN_VPCOMFALSED,
22305 IX86_BUILTIN_VPCOMTRUED,
22307 IX86_BUILTIN_VPCOMEQQ,
22308 IX86_BUILTIN_VPCOMNEQ,
22309 IX86_BUILTIN_VPCOMLTQ,
22310 IX86_BUILTIN_VPCOMLEQ,
22311 IX86_BUILTIN_VPCOMGTQ,
22312 IX86_BUILTIN_VPCOMGEQ,
22313 IX86_BUILTIN_VPCOMFALSEQ,
22314 IX86_BUILTIN_VPCOMTRUEQ,
22316 /* LWP instructions. */
22317 IX86_BUILTIN_LLWPCB,
22318 IX86_BUILTIN_SLWPCB,
22319 IX86_BUILTIN_LWPVAL32,
22320 IX86_BUILTIN_LWPVAL64,
22321 IX86_BUILTIN_LWPINS32,
22322 IX86_BUILTIN_LWPINS64,
22326 /* FSGSBASE instructions. */
22327 IX86_BUILTIN_RDFSBASE32,
22328 IX86_BUILTIN_RDFSBASE64,
22329 IX86_BUILTIN_RDGSBASE32,
22330 IX86_BUILTIN_RDGSBASE64,
22331 IX86_BUILTIN_WRFSBASE32,
22332 IX86_BUILTIN_WRFSBASE64,
22333 IX86_BUILTIN_WRGSBASE32,
22334 IX86_BUILTIN_WRGSBASE64,
22336 /* RDRND instructions. */
22337 IX86_BUILTIN_RDRAND16,
22338 IX86_BUILTIN_RDRAND32,
22339 IX86_BUILTIN_RDRAND64,
22341 /* F16C instructions. */
22342 IX86_BUILTIN_CVTPH2PS,
22343 IX86_BUILTIN_CVTPH2PS256,
22344 IX86_BUILTIN_CVTPS2PH,
22345 IX86_BUILTIN_CVTPS2PH256,
22350 /* Table for the ix86 builtin decls. */
22351 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22353 /* Table of all of the builtin functions that are possible with different ISA's
22354 but are waiting to be built until a function is declared to use that
22356 struct builtin_isa {
22357 const char *name; /* function name */
22358 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22359 int isa; /* isa_flags this builtin is defined for */
22360 bool const_p; /* true if the declaration is constant */
22361 bool set_and_not_built_p;
22364 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22367 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22368 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22369 function decl in the ix86_builtins array. Returns the function decl or
22370 NULL_TREE, if the builtin was not added.
22372 If the front end has a special hook for builtin functions, delay adding
22373 builtin functions that aren't in the current ISA until the ISA is changed
22374 with function specific optimization. Doing so, can save about 300K for the
22375 default compiler. When the builtin is expanded, check at that time whether
22378 If the front end doesn't have a special hook, record all builtins, even if
22379 it isn't an instruction set in the current ISA in case the user uses
22380 function specific options for a different ISA, so that we don't get scope
22381 errors if a builtin is added in the middle of a function scope. */
22384 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22385 enum ix86_builtins code)
22387 tree decl = NULL_TREE;
22389 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22391 ix86_builtins_isa[(int) code].isa = mask;
22393 mask &= ~OPTION_MASK_ISA_64BIT;
22395 || (mask & ix86_isa_flags) != 0
22396 || (lang_hooks.builtin_function
22397 == lang_hooks.builtin_function_ext_scope))
22400 tree type = ix86_get_builtin_func_type (tcode);
22401 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22403 ix86_builtins[(int) code] = decl;
22404 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22408 ix86_builtins[(int) code] = NULL_TREE;
22409 ix86_builtins_isa[(int) code].tcode = tcode;
22410 ix86_builtins_isa[(int) code].name = name;
22411 ix86_builtins_isa[(int) code].const_p = false;
22412 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22419 /* Like def_builtin, but also marks the function decl "const". */
22422 def_builtin_const (int mask, const char *name,
22423 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22425 tree decl = def_builtin (mask, name, tcode, code);
22427 TREE_READONLY (decl) = 1;
22429 ix86_builtins_isa[(int) code].const_p = true;
22434 /* Add any new builtin functions for a given ISA that may not have been
22435 declared. This saves a bit of space compared to adding all of the
22436 declarations to the tree, even if we didn't use them. */
22439 ix86_add_new_builtins (int isa)
22443 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22445 if ((ix86_builtins_isa[i].isa & isa) != 0
22446 && ix86_builtins_isa[i].set_and_not_built_p)
22450 /* Don't define the builtin again. */
22451 ix86_builtins_isa[i].set_and_not_built_p = false;
22453 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22454 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
22455 type, i, BUILT_IN_MD, NULL,
22458 ix86_builtins[i] = decl;
22459 if (ix86_builtins_isa[i].const_p)
22460 TREE_READONLY (decl) = 1;
22465 /* Bits for builtin_description.flag. */
22467 /* Set when we don't support the comparison natively, and should
22468 swap_comparison in order to support it. */
22469 #define BUILTIN_DESC_SWAP_OPERANDS 1
22471 struct builtin_description
22473 const unsigned int mask;
22474 const enum insn_code icode;
22475 const char *const name;
22476 const enum ix86_builtins code;
22477 const enum rtx_code comparison;
22481 static const struct builtin_description bdesc_comi[] =
22483 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
22484 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
22485 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
22486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
22487 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
22488 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
22489 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
22490 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
22491 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
22492 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
22493 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
22494 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
22495 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
22496 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
22497 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
22498 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
22499 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
22500 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
22501 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
22502 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
22503 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
22504 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
22505 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
22506 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
22509 static const struct builtin_description bdesc_pcmpestr[] =
22512 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
22513 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
22514 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
22515 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
22516 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
22517 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
22518 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
22521 static const struct builtin_description bdesc_pcmpistr[] =
22524 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
22525 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
22526 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
22527 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
22528 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
22529 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
22530 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
22533 /* Special builtins with variable number of arguments. */
22534 static const struct builtin_description bdesc_special_args[] =
22536 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
22537 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
22540 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22543 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22546 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22547 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22548 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22550 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22551 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22552 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22553 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22555 /* SSE or 3DNow!A */
22556 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22557 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
22560 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22561 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22562 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22563 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
22564 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22565 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
22566 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
22567 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
22568 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22570 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22571 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22574 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22577 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
22580 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22581 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22584 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
22585 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
22587 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22588 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22589 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22590 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
22591 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
22593 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22594 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22595 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22596 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22597 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22598 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
22599 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22601 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
22602 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22603 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22605 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
22606 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
22607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
22608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
22609 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
22610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
22611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
22612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
22614 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
22615 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
22616 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
22617 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
22618 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
22619 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
22622 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22623 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22624 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22625 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22626 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22627 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22628 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22629 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22632 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
22633 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22634 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22637 /* Builtins with variable number of arguments. */
22638 static const struct builtin_description bdesc_args[] =
22640 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
22641 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
22642 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
22643 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22644 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22645 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22646 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22649 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22650 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22651 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22652 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22653 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22654 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22656 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22657 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22658 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22659 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22660 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22661 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22662 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22663 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22665 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22666 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22668 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22669 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22670 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22671 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22673 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22674 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22675 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22676 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22677 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22678 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22680 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22681 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22682 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22683 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22684 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
22685 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
22687 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22688 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
22689 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22691 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
22693 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22694 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22695 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22696 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22697 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22698 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22700 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22701 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22702 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22703 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22704 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22705 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22707 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22708 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22709 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22710 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22713 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22714 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22715 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22716 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22718 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22719 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22720 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22721 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22722 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22723 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22724 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22725 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22726 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22727 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22728 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22729 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22730 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22731 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22732 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22735 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22736 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22737 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22738 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22739 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22740 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22743 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
22744 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22745 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22746 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22747 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22748 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22749 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22750 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22751 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22752 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22753 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22754 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22756 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22758 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22759 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22760 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22761 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22762 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22763 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22764 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22765 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22769 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22771 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22772 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22773 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22775 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22776 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22777 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
22778 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22779 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22780 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22781 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22782 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22783 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22784 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22785 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22786 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22787 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22788 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22790 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22791 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22792 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22793 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22795 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22796 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22797 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22798 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22800 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22802 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22803 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22804 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22805 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22806 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22808 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
22809 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
22810 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
22812 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
22814 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22815 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22816 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22818 /* SSE MMX or 3Dnow!A */
22819 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22820 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22821 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22823 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22824 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22825 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22826 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22828 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
22829 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
22831 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
22834 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22836 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
22837 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
22838 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
22839 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
22840 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
22841 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22842 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
22843 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
22844 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
22845 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
22846 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
22847 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
22849 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
22850 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
22851 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
22852 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
22853 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22854 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22856 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22857 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22858 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
22859 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22860 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
22864 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22865 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22866 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22867 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
22871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22873 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22874 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22875 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22876 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22878 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22880 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22883 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22885 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
22887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22889 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22890 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22891 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22892 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22893 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22894 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22895 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22896 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22897 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22898 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22899 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22900 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22901 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22903 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22904 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22905 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22906 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22908 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22909 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22910 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22911 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22913 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22916 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22917 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22919 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
22921 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22922 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22923 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22924 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22925 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22926 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22927 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22928 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22930 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22931 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22934 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22935 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22936 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22937 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22939 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22940 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
22942 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22944 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22945 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22957 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22958 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22959 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22962 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22963 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22964 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22965 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22966 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22967 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22968 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22969 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22975 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
22978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
22979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
22983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
22984 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
22985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
22986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
22988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22989 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22990 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22991 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22992 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22993 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22994 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22997 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22998 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22999 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23000 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23001 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23002 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23004 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23005 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23006 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23007 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23015 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23016 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23018 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23021 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23022 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23025 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23026 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23028 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23029 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23030 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23031 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23032 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23033 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23036 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23037 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23038 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23039 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23040 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23041 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23043 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23044 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23045 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23046 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23047 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23048 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23049 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23050 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23051 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23052 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23053 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23054 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23055 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23056 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23057 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23058 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23059 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23060 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23061 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23062 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23063 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23064 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23065 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23066 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23069 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23070 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23073 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23074 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23075 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23076 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23077 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23078 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23079 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23080 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23081 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23082 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23084 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23085 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23086 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23087 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23088 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23089 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23090 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23091 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23092 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23093 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23094 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23095 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23096 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23098 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23099 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23100 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23101 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23102 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23103 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23104 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23105 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23106 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23107 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23108 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23109 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23112 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23113 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23114 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23115 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23117 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23118 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23119 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23122 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23123 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23124 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23125 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23126 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23129 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23130 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23131 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23132 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23135 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23136 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23138 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23139 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23140 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23141 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23144 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23147 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23148 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23149 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23150 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23151 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23152 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23153 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23154 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23155 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23156 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23157 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23158 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23159 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23160 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23161 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23162 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23163 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23164 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23165 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23166 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23167 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23168 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23169 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23170 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23171 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23172 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23174 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23175 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23176 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23177 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23179 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23180 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23181 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23182 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23183 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23184 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23185 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23186 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23187 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23188 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23189 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23190 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23191 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23192 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23193 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23194 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23195 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23196 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23197 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23198 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23199 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23200 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23201 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23202 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23203 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23204 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23205 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23206 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23207 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23208 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23209 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23210 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23214 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23215 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23216 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23218 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23219 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23220 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23221 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23222 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23224 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23232 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23235 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23236 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23237 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23238 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23239 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23241 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23242 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23244 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23245 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23246 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23247 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23250 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23252 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23253 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23254 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23257 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23260 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23263 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23264 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23265 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23266 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23269 /* FMA4 and XOP. */
23270 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23271 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23272 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23273 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23274 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23275 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23276 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23277 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23278 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23279 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23280 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23281 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23282 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23283 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23284 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23285 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23286 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23287 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23288 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23289 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23290 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23291 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23292 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23293 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23294 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23295 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23296 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23297 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23298 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23299 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23300 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23301 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23302 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23303 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23304 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23305 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23306 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23307 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23308 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23309 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23310 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23311 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23312 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23313 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23314 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23315 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23316 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23317 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23318 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23319 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23320 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23321 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23323 static const struct builtin_description bdesc_multi_arg[] =
23325 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23326 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23327 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23328 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23329 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23330 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23331 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23332 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23334 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23335 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23336 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23337 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23338 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23339 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23340 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23341 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23343 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23344 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23345 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23346 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23348 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23349 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23350 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23351 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23353 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23354 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23355 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23356 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23358 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23359 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23360 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23361 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23363 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23364 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23365 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23366 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23367 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23368 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23369 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23371 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23372 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23373 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23374 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23375 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23376 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23377 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23379 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23382 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23387 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
23455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
23456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
23458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
23462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
23463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
23464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
23466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
23467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
23470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
23471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
23472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
23474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
23475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
23478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
23479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
23480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
23482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
23483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
23486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
23487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
23488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
23490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
23494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
23495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
23496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
23498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
23517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
23518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
23519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
23523 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
23524 in the current target ISA to allow the user to compile particular modules
23525 with different target specific options that differ from the command line
23528 ix86_init_mmx_sse_builtins (void)
23530 const struct builtin_description * d;
23531 enum ix86_builtin_func_type ftype;
23534 /* Add all special builtins with variable number of operands. */
23535 for (i = 0, d = bdesc_special_args;
23536 i < ARRAY_SIZE (bdesc_special_args);
23542 ftype = (enum ix86_builtin_func_type) d->flag;
23543 def_builtin (d->mask, d->name, ftype, d->code);
23546 /* Add all builtins with variable number of operands. */
23547 for (i = 0, d = bdesc_args;
23548 i < ARRAY_SIZE (bdesc_args);
23554 ftype = (enum ix86_builtin_func_type) d->flag;
23555 def_builtin_const (d->mask, d->name, ftype, d->code);
23558 /* pcmpestr[im] insns. */
23559 for (i = 0, d = bdesc_pcmpestr;
23560 i < ARRAY_SIZE (bdesc_pcmpestr);
23563 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23564 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
23566 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
23567 def_builtin_const (d->mask, d->name, ftype, d->code);
23570 /* pcmpistr[im] insns. */
23571 for (i = 0, d = bdesc_pcmpistr;
23572 i < ARRAY_SIZE (bdesc_pcmpistr);
23575 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23576 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
23578 ftype = INT_FTYPE_V16QI_V16QI_INT;
23579 def_builtin_const (d->mask, d->name, ftype, d->code);
23582 /* comi/ucomi insns. */
23583 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23585 if (d->mask == OPTION_MASK_ISA_SSE2)
23586 ftype = INT_FTYPE_V2DF_V2DF;
23588 ftype = INT_FTYPE_V4SF_V4SF;
23589 def_builtin_const (d->mask, d->name, ftype, d->code);
23593 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
23594 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
23595 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
23596 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
23598 /* SSE or 3DNow!A */
23599 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23600 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
23601 IX86_BUILTIN_MASKMOVQ);
23604 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
23605 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
23607 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
23608 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
23609 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
23610 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
23613 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
23614 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
23615 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
23616 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
23619 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
23620 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
23621 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
23622 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
23623 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
23624 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
23625 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
23626 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
23627 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
23628 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
23629 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
23630 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
23633 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
23634 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
23636 /* MMX access to the vec_init patterns. */
23637 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
23638 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
23640 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
23641 V4HI_FTYPE_HI_HI_HI_HI,
23642 IX86_BUILTIN_VEC_INIT_V4HI);
23644 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
23645 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
23646 IX86_BUILTIN_VEC_INIT_V8QI);
23648 /* Access to the vec_extract patterns. */
23649 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
23650 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
23651 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
23652 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
23653 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
23654 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
23655 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
23656 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
23657 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
23658 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
23660 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23661 "__builtin_ia32_vec_ext_v4hi",
23662 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
23664 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
23665 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
23667 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
23668 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
23670 /* Access to the vec_set patterns. */
23671 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
23672 "__builtin_ia32_vec_set_v2di",
23673 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
23675 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
23676 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
23678 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
23679 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
23681 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
23682 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
23684 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23685 "__builtin_ia32_vec_set_v4hi",
23686 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
23688 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
23689 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
23691 /* Add FMA4 multi-arg argument instructions */
23692 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23697 ftype = (enum ix86_builtin_func_type) d->flag;
23698 def_builtin_const (d->mask, d->name, ftype, d->code);
23702 /* Internal method for ix86_init_builtins. */
23705 ix86_init_builtins_va_builtins_abi (void)
23707 tree ms_va_ref, sysv_va_ref;
23708 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23709 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23710 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23711 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23715 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23716 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23717 ms_va_ref = build_reference_type (ms_va_list_type_node);
23719 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23722 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23723 fnvoid_va_start_ms =
23724 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23725 fnvoid_va_end_sysv =
23726 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23727 fnvoid_va_start_sysv =
23728 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23730 fnvoid_va_copy_ms =
23731 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23733 fnvoid_va_copy_sysv =
23734 build_function_type_list (void_type_node, sysv_va_ref,
23735 sysv_va_ref, NULL_TREE);
23737 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23738 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23739 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23740 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23741 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23742 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23743 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23744 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23745 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23746 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23747 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23748 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23752 ix86_init_builtin_types (void)
23754 tree float128_type_node, float80_type_node;
23756 /* The __float80 type. */
23757 float80_type_node = long_double_type_node;
23758 if (TYPE_MODE (float80_type_node) != XFmode)
23760 /* The __float80 type. */
23761 float80_type_node = make_node (REAL_TYPE);
23763 TYPE_PRECISION (float80_type_node) = 80;
23764 layout_type (float80_type_node);
23766 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
23768 /* The __float128 type. */
23769 float128_type_node = make_node (REAL_TYPE);
23770 TYPE_PRECISION (float128_type_node) = 128;
23771 layout_type (float128_type_node);
23772 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
23774 /* This macro is built by i386-builtin-types.awk. */
23775 DEFINE_BUILTIN_PRIMITIVE_TYPES;
23779 ix86_init_builtins (void)
23783 ix86_init_builtin_types ();
23785 /* TFmode support builtins. */
23786 def_builtin_const (0, "__builtin_infq",
23787 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
23788 def_builtin_const (0, "__builtin_huge_valq",
23789 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
23791 /* We will expand them to normal call if SSE2 isn't available since
23792 they are used by libgcc. */
23793 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
23794 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
23795 BUILT_IN_MD, "__fabstf2", NULL_TREE);
23796 TREE_READONLY (t) = 1;
23797 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
23799 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
23800 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
23801 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
23802 TREE_READONLY (t) = 1;
23803 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
23805 ix86_init_mmx_sse_builtins ();
23808 ix86_init_builtins_va_builtins_abi ();
23811 /* Return the ix86 builtin for CODE. */
23814 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
23816 if (code >= IX86_BUILTIN_MAX)
23817 return error_mark_node;
23819 return ix86_builtins[code];
23822 /* Errors in the source file can cause expand_expr to return const0_rtx
23823 where we expect a vector. To avoid crashing, use one of the vector
23824 clear instructions. */
23826 safe_vector_operand (rtx x, enum machine_mode mode)
23828 if (x == const0_rtx)
23829 x = CONST0_RTX (mode);
23833 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23836 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23839 tree arg0 = CALL_EXPR_ARG (exp, 0);
23840 tree arg1 = CALL_EXPR_ARG (exp, 1);
23841 rtx op0 = expand_normal (arg0);
23842 rtx op1 = expand_normal (arg1);
23843 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23844 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23845 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23847 if (VECTOR_MODE_P (mode0))
23848 op0 = safe_vector_operand (op0, mode0);
23849 if (VECTOR_MODE_P (mode1))
23850 op1 = safe_vector_operand (op1, mode1);
23852 if (optimize || !target
23853 || GET_MODE (target) != tmode
23854 || !insn_data[icode].operand[0].predicate (target, tmode))
23855 target = gen_reg_rtx (tmode);
23857 if (GET_MODE (op1) == SImode && mode1 == TImode)
23859 rtx x = gen_reg_rtx (V4SImode);
23860 emit_insn (gen_sse2_loadd (x, op1));
23861 op1 = gen_lowpart (TImode, x);
23864 if (!insn_data[icode].operand[1].predicate (op0, mode0))
23865 op0 = copy_to_mode_reg (mode0, op0);
23866 if (!insn_data[icode].operand[2].predicate (op1, mode1))
23867 op1 = copy_to_mode_reg (mode1, op1);
23869 pat = GEN_FCN (icode) (target, op0, op1);
23878 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23881 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23882 enum ix86_builtin_func_type m_type,
23883 enum rtx_code sub_code)
23888 bool comparison_p = false;
23890 bool last_arg_constant = false;
23891 int num_memory = 0;
23894 enum machine_mode mode;
23897 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23901 case MULTI_ARG_4_DF2_DI_I:
23902 case MULTI_ARG_4_DF2_DI_I1:
23903 case MULTI_ARG_4_SF2_SI_I:
23904 case MULTI_ARG_4_SF2_SI_I1:
23906 last_arg_constant = true;
23909 case MULTI_ARG_3_SF:
23910 case MULTI_ARG_3_DF:
23911 case MULTI_ARG_3_SF2:
23912 case MULTI_ARG_3_DF2:
23913 case MULTI_ARG_3_DI:
23914 case MULTI_ARG_3_SI:
23915 case MULTI_ARG_3_SI_DI:
23916 case MULTI_ARG_3_HI:
23917 case MULTI_ARG_3_HI_SI:
23918 case MULTI_ARG_3_QI:
23919 case MULTI_ARG_3_DI2:
23920 case MULTI_ARG_3_SI2:
23921 case MULTI_ARG_3_HI2:
23922 case MULTI_ARG_3_QI2:
23926 case MULTI_ARG_2_SF:
23927 case MULTI_ARG_2_DF:
23928 case MULTI_ARG_2_DI:
23929 case MULTI_ARG_2_SI:
23930 case MULTI_ARG_2_HI:
23931 case MULTI_ARG_2_QI:
23935 case MULTI_ARG_2_DI_IMM:
23936 case MULTI_ARG_2_SI_IMM:
23937 case MULTI_ARG_2_HI_IMM:
23938 case MULTI_ARG_2_QI_IMM:
23940 last_arg_constant = true;
23943 case MULTI_ARG_1_SF:
23944 case MULTI_ARG_1_DF:
23945 case MULTI_ARG_1_SF2:
23946 case MULTI_ARG_1_DF2:
23947 case MULTI_ARG_1_DI:
23948 case MULTI_ARG_1_SI:
23949 case MULTI_ARG_1_HI:
23950 case MULTI_ARG_1_QI:
23951 case MULTI_ARG_1_SI_DI:
23952 case MULTI_ARG_1_HI_DI:
23953 case MULTI_ARG_1_HI_SI:
23954 case MULTI_ARG_1_QI_DI:
23955 case MULTI_ARG_1_QI_SI:
23956 case MULTI_ARG_1_QI_HI:
23960 case MULTI_ARG_2_DI_CMP:
23961 case MULTI_ARG_2_SI_CMP:
23962 case MULTI_ARG_2_HI_CMP:
23963 case MULTI_ARG_2_QI_CMP:
23965 comparison_p = true;
23968 case MULTI_ARG_2_SF_TF:
23969 case MULTI_ARG_2_DF_TF:
23970 case MULTI_ARG_2_DI_TF:
23971 case MULTI_ARG_2_SI_TF:
23972 case MULTI_ARG_2_HI_TF:
23973 case MULTI_ARG_2_QI_TF:
23979 gcc_unreachable ();
23982 if (optimize || !target
23983 || GET_MODE (target) != tmode
23984 || !insn_data[icode].operand[0].predicate (target, tmode))
23985 target = gen_reg_rtx (tmode);
23987 gcc_assert (nargs <= 4);
23989 for (i = 0; i < nargs; i++)
23991 tree arg = CALL_EXPR_ARG (exp, i);
23992 rtx op = expand_normal (arg);
23993 int adjust = (comparison_p) ? 1 : 0;
23994 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23996 if (last_arg_constant && i == nargs-1)
23998 if (!CONST_INT_P (op))
24000 error ("last argument must be an immediate");
24001 return gen_reg_rtx (tmode);
24006 if (VECTOR_MODE_P (mode))
24007 op = safe_vector_operand (op, mode);
24009 /* If we aren't optimizing, only allow one memory operand to be
24011 if (memory_operand (op, mode))
24014 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24017 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24019 op = force_reg (mode, op);
24023 args[i].mode = mode;
24029 pat = GEN_FCN (icode) (target, args[0].op);
24034 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24035 GEN_INT ((int)sub_code));
24036 else if (! comparison_p)
24037 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24040 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24044 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24049 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24053 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24057 gcc_unreachable ();
24067 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24068 insns with vec_merge. */
24071 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24075 tree arg0 = CALL_EXPR_ARG (exp, 0);
24076 rtx op1, op0 = expand_normal (arg0);
24077 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24078 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24080 if (optimize || !target
24081 || GET_MODE (target) != tmode
24082 || !insn_data[icode].operand[0].predicate (target, tmode))
24083 target = gen_reg_rtx (tmode);
24085 if (VECTOR_MODE_P (mode0))
24086 op0 = safe_vector_operand (op0, mode0);
24088 if ((optimize && !register_operand (op0, mode0))
24089 || !insn_data[icode].operand[1].predicate (op0, mode0))
24090 op0 = copy_to_mode_reg (mode0, op0);
24093 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24094 op1 = copy_to_mode_reg (mode0, op1);
24096 pat = GEN_FCN (icode) (target, op0, op1);
24103 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24106 ix86_expand_sse_compare (const struct builtin_description *d,
24107 tree exp, rtx target, bool swap)
24110 tree arg0 = CALL_EXPR_ARG (exp, 0);
24111 tree arg1 = CALL_EXPR_ARG (exp, 1);
24112 rtx op0 = expand_normal (arg0);
24113 rtx op1 = expand_normal (arg1);
24115 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24116 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24117 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24118 enum rtx_code comparison = d->comparison;
24120 if (VECTOR_MODE_P (mode0))
24121 op0 = safe_vector_operand (op0, mode0);
24122 if (VECTOR_MODE_P (mode1))
24123 op1 = safe_vector_operand (op1, mode1);
24125 /* Swap operands if we have a comparison that isn't available in
24129 rtx tmp = gen_reg_rtx (mode1);
24130 emit_move_insn (tmp, op1);
24135 if (optimize || !target
24136 || GET_MODE (target) != tmode
24137 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24138 target = gen_reg_rtx (tmode);
24140 if ((optimize && !register_operand (op0, mode0))
24141 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24142 op0 = copy_to_mode_reg (mode0, op0);
24143 if ((optimize && !register_operand (op1, mode1))
24144 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24145 op1 = copy_to_mode_reg (mode1, op1);
24147 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24148 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24155 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24158 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24162 tree arg0 = CALL_EXPR_ARG (exp, 0);
24163 tree arg1 = CALL_EXPR_ARG (exp, 1);
24164 rtx op0 = expand_normal (arg0);
24165 rtx op1 = expand_normal (arg1);
24166 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24167 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24168 enum rtx_code comparison = d->comparison;
24170 if (VECTOR_MODE_P (mode0))
24171 op0 = safe_vector_operand (op0, mode0);
24172 if (VECTOR_MODE_P (mode1))
24173 op1 = safe_vector_operand (op1, mode1);
24175 /* Swap operands if we have a comparison that isn't available in
24177 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24184 target = gen_reg_rtx (SImode);
24185 emit_move_insn (target, const0_rtx);
24186 target = gen_rtx_SUBREG (QImode, target, 0);
24188 if ((optimize && !register_operand (op0, mode0))
24189 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24190 op0 = copy_to_mode_reg (mode0, op0);
24191 if ((optimize && !register_operand (op1, mode1))
24192 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24193 op1 = copy_to_mode_reg (mode1, op1);
24195 pat = GEN_FCN (d->icode) (op0, op1);
24199 emit_insn (gen_rtx_SET (VOIDmode,
24200 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24201 gen_rtx_fmt_ee (comparison, QImode,
24205 return SUBREG_REG (target);
24208 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24211 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24215 tree arg0 = CALL_EXPR_ARG (exp, 0);
24216 tree arg1 = CALL_EXPR_ARG (exp, 1);
24217 rtx op0 = expand_normal (arg0);
24218 rtx op1 = expand_normal (arg1);
24219 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24220 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24221 enum rtx_code comparison = d->comparison;
24223 if (VECTOR_MODE_P (mode0))
24224 op0 = safe_vector_operand (op0, mode0);
24225 if (VECTOR_MODE_P (mode1))
24226 op1 = safe_vector_operand (op1, mode1);
24228 target = gen_reg_rtx (SImode);
24229 emit_move_insn (target, const0_rtx);
24230 target = gen_rtx_SUBREG (QImode, target, 0);
24232 if ((optimize && !register_operand (op0, mode0))
24233 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24234 op0 = copy_to_mode_reg (mode0, op0);
24235 if ((optimize && !register_operand (op1, mode1))
24236 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24237 op1 = copy_to_mode_reg (mode1, op1);
24239 pat = GEN_FCN (d->icode) (op0, op1);
24243 emit_insn (gen_rtx_SET (VOIDmode,
24244 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24245 gen_rtx_fmt_ee (comparison, QImode,
24249 return SUBREG_REG (target);
24252 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24255 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24256 tree exp, rtx target)
24259 tree arg0 = CALL_EXPR_ARG (exp, 0);
24260 tree arg1 = CALL_EXPR_ARG (exp, 1);
24261 tree arg2 = CALL_EXPR_ARG (exp, 2);
24262 tree arg3 = CALL_EXPR_ARG (exp, 3);
24263 tree arg4 = CALL_EXPR_ARG (exp, 4);
24264 rtx scratch0, scratch1;
24265 rtx op0 = expand_normal (arg0);
24266 rtx op1 = expand_normal (arg1);
24267 rtx op2 = expand_normal (arg2);
24268 rtx op3 = expand_normal (arg3);
24269 rtx op4 = expand_normal (arg4);
24270 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24272 tmode0 = insn_data[d->icode].operand[0].mode;
24273 tmode1 = insn_data[d->icode].operand[1].mode;
24274 modev2 = insn_data[d->icode].operand[2].mode;
24275 modei3 = insn_data[d->icode].operand[3].mode;
24276 modev4 = insn_data[d->icode].operand[4].mode;
24277 modei5 = insn_data[d->icode].operand[5].mode;
24278 modeimm = insn_data[d->icode].operand[6].mode;
24280 if (VECTOR_MODE_P (modev2))
24281 op0 = safe_vector_operand (op0, modev2);
24282 if (VECTOR_MODE_P (modev4))
24283 op2 = safe_vector_operand (op2, modev4);
24285 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24286 op0 = copy_to_mode_reg (modev2, op0);
24287 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24288 op1 = copy_to_mode_reg (modei3, op1);
24289 if ((optimize && !register_operand (op2, modev4))
24290 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24291 op2 = copy_to_mode_reg (modev4, op2);
24292 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24293 op3 = copy_to_mode_reg (modei5, op3);
24295 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24297 error ("the fifth argument must be a 8-bit immediate");
24301 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24303 if (optimize || !target
24304 || GET_MODE (target) != tmode0
24305 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24306 target = gen_reg_rtx (tmode0);
24308 scratch1 = gen_reg_rtx (tmode1);
24310 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24312 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24314 if (optimize || !target
24315 || GET_MODE (target) != tmode1
24316 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24317 target = gen_reg_rtx (tmode1);
24319 scratch0 = gen_reg_rtx (tmode0);
24321 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24325 gcc_assert (d->flag);
24327 scratch0 = gen_reg_rtx (tmode0);
24328 scratch1 = gen_reg_rtx (tmode1);
24330 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24340 target = gen_reg_rtx (SImode);
24341 emit_move_insn (target, const0_rtx);
24342 target = gen_rtx_SUBREG (QImode, target, 0);
24345 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24346 gen_rtx_fmt_ee (EQ, QImode,
24347 gen_rtx_REG ((enum machine_mode) d->flag,
24350 return SUBREG_REG (target);
24357 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24360 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24361 tree exp, rtx target)
24364 tree arg0 = CALL_EXPR_ARG (exp, 0);
24365 tree arg1 = CALL_EXPR_ARG (exp, 1);
24366 tree arg2 = CALL_EXPR_ARG (exp, 2);
24367 rtx scratch0, scratch1;
24368 rtx op0 = expand_normal (arg0);
24369 rtx op1 = expand_normal (arg1);
24370 rtx op2 = expand_normal (arg2);
24371 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24373 tmode0 = insn_data[d->icode].operand[0].mode;
24374 tmode1 = insn_data[d->icode].operand[1].mode;
24375 modev2 = insn_data[d->icode].operand[2].mode;
24376 modev3 = insn_data[d->icode].operand[3].mode;
24377 modeimm = insn_data[d->icode].operand[4].mode;
24379 if (VECTOR_MODE_P (modev2))
24380 op0 = safe_vector_operand (op0, modev2);
24381 if (VECTOR_MODE_P (modev3))
24382 op1 = safe_vector_operand (op1, modev3);
24384 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24385 op0 = copy_to_mode_reg (modev2, op0);
24386 if ((optimize && !register_operand (op1, modev3))
24387 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24388 op1 = copy_to_mode_reg (modev3, op1);
24390 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24392 error ("the third argument must be a 8-bit immediate");
24396 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24398 if (optimize || !target
24399 || GET_MODE (target) != tmode0
24400 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24401 target = gen_reg_rtx (tmode0);
24403 scratch1 = gen_reg_rtx (tmode1);
24405 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24407 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24409 if (optimize || !target
24410 || GET_MODE (target) != tmode1
24411 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24412 target = gen_reg_rtx (tmode1);
24414 scratch0 = gen_reg_rtx (tmode0);
24416 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24420 gcc_assert (d->flag);
24422 scratch0 = gen_reg_rtx (tmode0);
24423 scratch1 = gen_reg_rtx (tmode1);
24425 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24435 target = gen_reg_rtx (SImode);
24436 emit_move_insn (target, const0_rtx);
24437 target = gen_rtx_SUBREG (QImode, target, 0);
24440 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24441 gen_rtx_fmt_ee (EQ, QImode,
24442 gen_rtx_REG ((enum machine_mode) d->flag,
24445 return SUBREG_REG (target);
24451 /* Subroutine of ix86_expand_builtin to take care of insns with
24452 variable number of operands. */
24455 ix86_expand_args_builtin (const struct builtin_description *d,
24456 tree exp, rtx target)
24458 rtx pat, real_target;
24459 unsigned int i, nargs;
24460 unsigned int nargs_constant = 0;
24461 int num_memory = 0;
24465 enum machine_mode mode;
24467 bool last_arg_count = false;
24468 enum insn_code icode = d->icode;
24469 const struct insn_data_d *insn_p = &insn_data[icode];
24470 enum machine_mode tmode = insn_p->operand[0].mode;
24471 enum machine_mode rmode = VOIDmode;
24473 enum rtx_code comparison = d->comparison;
24475 switch ((enum ix86_builtin_func_type) d->flag)
24477 case INT_FTYPE_V8SF_V8SF_PTEST:
24478 case INT_FTYPE_V4DI_V4DI_PTEST:
24479 case INT_FTYPE_V4DF_V4DF_PTEST:
24480 case INT_FTYPE_V4SF_V4SF_PTEST:
24481 case INT_FTYPE_V2DI_V2DI_PTEST:
24482 case INT_FTYPE_V2DF_V2DF_PTEST:
24483 return ix86_expand_sse_ptest (d, exp, target);
24484 case FLOAT128_FTYPE_FLOAT128:
24485 case FLOAT_FTYPE_FLOAT:
24486 case INT_FTYPE_INT:
24487 case UINT64_FTYPE_INT:
24488 case UINT16_FTYPE_UINT16:
24489 case INT64_FTYPE_INT64:
24490 case INT64_FTYPE_V4SF:
24491 case INT64_FTYPE_V2DF:
24492 case INT_FTYPE_V16QI:
24493 case INT_FTYPE_V8QI:
24494 case INT_FTYPE_V8SF:
24495 case INT_FTYPE_V4DF:
24496 case INT_FTYPE_V4SF:
24497 case INT_FTYPE_V2DF:
24498 case V16QI_FTYPE_V16QI:
24499 case V8SI_FTYPE_V8SF:
24500 case V8SI_FTYPE_V4SI:
24501 case V8HI_FTYPE_V8HI:
24502 case V8HI_FTYPE_V16QI:
24503 case V8QI_FTYPE_V8QI:
24504 case V8SF_FTYPE_V8SF:
24505 case V8SF_FTYPE_V8SI:
24506 case V8SF_FTYPE_V4SF:
24507 case V8SF_FTYPE_V8HI:
24508 case V4SI_FTYPE_V4SI:
24509 case V4SI_FTYPE_V16QI:
24510 case V4SI_FTYPE_V4SF:
24511 case V4SI_FTYPE_V8SI:
24512 case V4SI_FTYPE_V8HI:
24513 case V4SI_FTYPE_V4DF:
24514 case V4SI_FTYPE_V2DF:
24515 case V4HI_FTYPE_V4HI:
24516 case V4DF_FTYPE_V4DF:
24517 case V4DF_FTYPE_V4SI:
24518 case V4DF_FTYPE_V4SF:
24519 case V4DF_FTYPE_V2DF:
24520 case V4SF_FTYPE_V4SF:
24521 case V4SF_FTYPE_V4SI:
24522 case V4SF_FTYPE_V8SF:
24523 case V4SF_FTYPE_V4DF:
24524 case V4SF_FTYPE_V8HI:
24525 case V4SF_FTYPE_V2DF:
24526 case V2DI_FTYPE_V2DI:
24527 case V2DI_FTYPE_V16QI:
24528 case V2DI_FTYPE_V8HI:
24529 case V2DI_FTYPE_V4SI:
24530 case V2DF_FTYPE_V2DF:
24531 case V2DF_FTYPE_V4SI:
24532 case V2DF_FTYPE_V4DF:
24533 case V2DF_FTYPE_V4SF:
24534 case V2DF_FTYPE_V2SI:
24535 case V2SI_FTYPE_V2SI:
24536 case V2SI_FTYPE_V4SF:
24537 case V2SI_FTYPE_V2SF:
24538 case V2SI_FTYPE_V2DF:
24539 case V2SF_FTYPE_V2SF:
24540 case V2SF_FTYPE_V2SI:
24543 case V4SF_FTYPE_V4SF_VEC_MERGE:
24544 case V2DF_FTYPE_V2DF_VEC_MERGE:
24545 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24546 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24547 case V16QI_FTYPE_V16QI_V16QI:
24548 case V16QI_FTYPE_V8HI_V8HI:
24549 case V8QI_FTYPE_V8QI_V8QI:
24550 case V8QI_FTYPE_V4HI_V4HI:
24551 case V8HI_FTYPE_V8HI_V8HI:
24552 case V8HI_FTYPE_V16QI_V16QI:
24553 case V8HI_FTYPE_V4SI_V4SI:
24554 case V8SF_FTYPE_V8SF_V8SF:
24555 case V8SF_FTYPE_V8SF_V8SI:
24556 case V4SI_FTYPE_V4SI_V4SI:
24557 case V4SI_FTYPE_V8HI_V8HI:
24558 case V4SI_FTYPE_V4SF_V4SF:
24559 case V4SI_FTYPE_V2DF_V2DF:
24560 case V4HI_FTYPE_V4HI_V4HI:
24561 case V4HI_FTYPE_V8QI_V8QI:
24562 case V4HI_FTYPE_V2SI_V2SI:
24563 case V4DF_FTYPE_V4DF_V4DF:
24564 case V4DF_FTYPE_V4DF_V4DI:
24565 case V4SF_FTYPE_V4SF_V4SF:
24566 case V4SF_FTYPE_V4SF_V4SI:
24567 case V4SF_FTYPE_V4SF_V2SI:
24568 case V4SF_FTYPE_V4SF_V2DF:
24569 case V4SF_FTYPE_V4SF_DI:
24570 case V4SF_FTYPE_V4SF_SI:
24571 case V2DI_FTYPE_V2DI_V2DI:
24572 case V2DI_FTYPE_V16QI_V16QI:
24573 case V2DI_FTYPE_V4SI_V4SI:
24574 case V2DI_FTYPE_V2DI_V16QI:
24575 case V2DI_FTYPE_V2DF_V2DF:
24576 case V2SI_FTYPE_V2SI_V2SI:
24577 case V2SI_FTYPE_V4HI_V4HI:
24578 case V2SI_FTYPE_V2SF_V2SF:
24579 case V2DF_FTYPE_V2DF_V2DF:
24580 case V2DF_FTYPE_V2DF_V4SF:
24581 case V2DF_FTYPE_V2DF_V2DI:
24582 case V2DF_FTYPE_V2DF_DI:
24583 case V2DF_FTYPE_V2DF_SI:
24584 case V2SF_FTYPE_V2SF_V2SF:
24585 case V1DI_FTYPE_V1DI_V1DI:
24586 case V1DI_FTYPE_V8QI_V8QI:
24587 case V1DI_FTYPE_V2SI_V2SI:
24588 if (comparison == UNKNOWN)
24589 return ix86_expand_binop_builtin (icode, exp, target);
24592 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24593 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24594 gcc_assert (comparison != UNKNOWN);
24598 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24599 case V8HI_FTYPE_V8HI_SI_COUNT:
24600 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24601 case V4SI_FTYPE_V4SI_SI_COUNT:
24602 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24603 case V4HI_FTYPE_V4HI_SI_COUNT:
24604 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24605 case V2DI_FTYPE_V2DI_SI_COUNT:
24606 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24607 case V2SI_FTYPE_V2SI_SI_COUNT:
24608 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24609 case V1DI_FTYPE_V1DI_SI_COUNT:
24611 last_arg_count = true;
24613 case UINT64_FTYPE_UINT64_UINT64:
24614 case UINT_FTYPE_UINT_UINT:
24615 case UINT_FTYPE_UINT_USHORT:
24616 case UINT_FTYPE_UINT_UCHAR:
24617 case UINT16_FTYPE_UINT16_INT:
24618 case UINT8_FTYPE_UINT8_INT:
24621 case V2DI_FTYPE_V2DI_INT_CONVERT:
24624 nargs_constant = 1;
24626 case V8HI_FTYPE_V8HI_INT:
24627 case V8HI_FTYPE_V8SF_INT:
24628 case V8HI_FTYPE_V4SF_INT:
24629 case V8SF_FTYPE_V8SF_INT:
24630 case V4SI_FTYPE_V4SI_INT:
24631 case V4SI_FTYPE_V8SI_INT:
24632 case V4HI_FTYPE_V4HI_INT:
24633 case V4DF_FTYPE_V4DF_INT:
24634 case V4SF_FTYPE_V4SF_INT:
24635 case V4SF_FTYPE_V8SF_INT:
24636 case V2DI_FTYPE_V2DI_INT:
24637 case V2DF_FTYPE_V2DF_INT:
24638 case V2DF_FTYPE_V4DF_INT:
24640 nargs_constant = 1;
24642 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24643 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24644 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24645 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24646 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24649 case V16QI_FTYPE_V16QI_V16QI_INT:
24650 case V8HI_FTYPE_V8HI_V8HI_INT:
24651 case V8SI_FTYPE_V8SI_V8SI_INT:
24652 case V8SI_FTYPE_V8SI_V4SI_INT:
24653 case V8SF_FTYPE_V8SF_V8SF_INT:
24654 case V8SF_FTYPE_V8SF_V4SF_INT:
24655 case V4SI_FTYPE_V4SI_V4SI_INT:
24656 case V4DF_FTYPE_V4DF_V4DF_INT:
24657 case V4DF_FTYPE_V4DF_V2DF_INT:
24658 case V4SF_FTYPE_V4SF_V4SF_INT:
24659 case V2DI_FTYPE_V2DI_V2DI_INT:
24660 case V2DF_FTYPE_V2DF_V2DF_INT:
24662 nargs_constant = 1;
24664 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
24667 nargs_constant = 1;
24669 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
24672 nargs_constant = 1;
24674 case V2DI_FTYPE_V2DI_UINT_UINT:
24676 nargs_constant = 2;
24678 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
24679 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
24680 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
24681 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
24683 nargs_constant = 1;
24685 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24687 nargs_constant = 2;
24690 gcc_unreachable ();
24693 gcc_assert (nargs <= ARRAY_SIZE (args));
24695 if (comparison != UNKNOWN)
24697 gcc_assert (nargs == 2);
24698 return ix86_expand_sse_compare (d, exp, target, swap);
24701 if (rmode == VOIDmode || rmode == tmode)
24705 || GET_MODE (target) != tmode
24706 || !insn_p->operand[0].predicate (target, tmode))
24707 target = gen_reg_rtx (tmode);
24708 real_target = target;
24712 target = gen_reg_rtx (rmode);
24713 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24716 for (i = 0; i < nargs; i++)
24718 tree arg = CALL_EXPR_ARG (exp, i);
24719 rtx op = expand_normal (arg);
24720 enum machine_mode mode = insn_p->operand[i + 1].mode;
24721 bool match = insn_p->operand[i + 1].predicate (op, mode);
24723 if (last_arg_count && (i + 1) == nargs)
24725 /* SIMD shift insns take either an 8-bit immediate or
24726 register as count. But builtin functions take int as
24727 count. If count doesn't match, we put it in register. */
24730 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24731 if (!insn_p->operand[i + 1].predicate (op, mode))
24732 op = copy_to_reg (op);
24735 else if ((nargs - i) <= nargs_constant)
24740 case CODE_FOR_sse4_1_roundpd:
24741 case CODE_FOR_sse4_1_roundps:
24742 case CODE_FOR_sse4_1_roundsd:
24743 case CODE_FOR_sse4_1_roundss:
24744 case CODE_FOR_sse4_1_blendps:
24745 case CODE_FOR_avx_blendpd256:
24746 case CODE_FOR_avx_vpermilv4df:
24747 case CODE_FOR_avx_roundpd256:
24748 case CODE_FOR_avx_roundps256:
24749 error ("the last argument must be a 4-bit immediate");
24752 case CODE_FOR_sse4_1_blendpd:
24753 case CODE_FOR_avx_vpermilv2df:
24754 case CODE_FOR_xop_vpermil2v2df3:
24755 case CODE_FOR_xop_vpermil2v4sf3:
24756 case CODE_FOR_xop_vpermil2v4df3:
24757 case CODE_FOR_xop_vpermil2v8sf3:
24758 error ("the last argument must be a 2-bit immediate");
24761 case CODE_FOR_avx_vextractf128v4df:
24762 case CODE_FOR_avx_vextractf128v8sf:
24763 case CODE_FOR_avx_vextractf128v8si:
24764 case CODE_FOR_avx_vinsertf128v4df:
24765 case CODE_FOR_avx_vinsertf128v8sf:
24766 case CODE_FOR_avx_vinsertf128v8si:
24767 error ("the last argument must be a 1-bit immediate");
24770 case CODE_FOR_avx_cmpsdv2df3:
24771 case CODE_FOR_avx_cmpssv4sf3:
24772 case CODE_FOR_avx_cmppdv2df3:
24773 case CODE_FOR_avx_cmppsv4sf3:
24774 case CODE_FOR_avx_cmppdv4df3:
24775 case CODE_FOR_avx_cmppsv8sf3:
24776 error ("the last argument must be a 5-bit immediate");
24780 switch (nargs_constant)
24783 if ((nargs - i) == nargs_constant)
24785 error ("the next to last argument must be an 8-bit immediate");
24789 error ("the last argument must be an 8-bit immediate");
24792 gcc_unreachable ();
24799 if (VECTOR_MODE_P (mode))
24800 op = safe_vector_operand (op, mode);
24802 /* If we aren't optimizing, only allow one memory operand to
24804 if (memory_operand (op, mode))
24807 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24809 if (optimize || !match || num_memory > 1)
24810 op = copy_to_mode_reg (mode, op);
24814 op = copy_to_reg (op);
24815 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24820 args[i].mode = mode;
24826 pat = GEN_FCN (icode) (real_target, args[0].op);
24829 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24832 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24836 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24837 args[2].op, args[3].op);
24840 gcc_unreachable ();
24850 /* Subroutine of ix86_expand_builtin to take care of special insns
24851 with variable number of operands. */
24854 ix86_expand_special_args_builtin (const struct builtin_description *d,
24855 tree exp, rtx target)
24859 unsigned int i, nargs, arg_adjust, memory;
24863 enum machine_mode mode;
24865 enum insn_code icode = d->icode;
24866 bool last_arg_constant = false;
24867 const struct insn_data_d *insn_p = &insn_data[icode];
24868 enum machine_mode tmode = insn_p->operand[0].mode;
24869 enum { load, store } klass;
24871 switch ((enum ix86_builtin_func_type) d->flag)
24873 case VOID_FTYPE_VOID:
24874 emit_insn (GEN_FCN (icode) (target));
24876 case VOID_FTYPE_UINT64:
24877 case VOID_FTYPE_UNSIGNED:
24883 case UINT64_FTYPE_VOID:
24884 case UNSIGNED_FTYPE_VOID:
24885 case UINT16_FTYPE_VOID:
24890 case UINT64_FTYPE_PUNSIGNED:
24891 case V2DI_FTYPE_PV2DI:
24892 case V32QI_FTYPE_PCCHAR:
24893 case V16QI_FTYPE_PCCHAR:
24894 case V8SF_FTYPE_PCV4SF:
24895 case V8SF_FTYPE_PCFLOAT:
24896 case V4SF_FTYPE_PCFLOAT:
24897 case V4DF_FTYPE_PCV2DF:
24898 case V4DF_FTYPE_PCDOUBLE:
24899 case V2DF_FTYPE_PCDOUBLE:
24900 case VOID_FTYPE_PVOID:
24905 case VOID_FTYPE_PV2SF_V4SF:
24906 case VOID_FTYPE_PV4DI_V4DI:
24907 case VOID_FTYPE_PV2DI_V2DI:
24908 case VOID_FTYPE_PCHAR_V32QI:
24909 case VOID_FTYPE_PCHAR_V16QI:
24910 case VOID_FTYPE_PFLOAT_V8SF:
24911 case VOID_FTYPE_PFLOAT_V4SF:
24912 case VOID_FTYPE_PDOUBLE_V4DF:
24913 case VOID_FTYPE_PDOUBLE_V2DF:
24914 case VOID_FTYPE_PULONGLONG_ULONGLONG:
24915 case VOID_FTYPE_PINT_INT:
24918 /* Reserve memory operand for target. */
24919 memory = ARRAY_SIZE (args);
24921 case V4SF_FTYPE_V4SF_PCV2SF:
24922 case V2DF_FTYPE_V2DF_PCDOUBLE:
24927 case V8SF_FTYPE_PCV8SF_V8SF:
24928 case V4DF_FTYPE_PCV4DF_V4DF:
24929 case V4SF_FTYPE_PCV4SF_V4SF:
24930 case V2DF_FTYPE_PCV2DF_V2DF:
24935 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24936 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24937 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24938 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24941 /* Reserve memory operand for target. */
24942 memory = ARRAY_SIZE (args);
24944 case VOID_FTYPE_UINT_UINT_UINT:
24945 case VOID_FTYPE_UINT64_UINT_UINT:
24946 case UCHAR_FTYPE_UINT_UINT_UINT:
24947 case UCHAR_FTYPE_UINT64_UINT_UINT:
24950 memory = ARRAY_SIZE (args);
24951 last_arg_constant = true;
24954 gcc_unreachable ();
24957 gcc_assert (nargs <= ARRAY_SIZE (args));
24959 if (klass == store)
24961 arg = CALL_EXPR_ARG (exp, 0);
24962 op = expand_normal (arg);
24963 gcc_assert (target == 0);
24965 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24967 target = force_reg (tmode, op);
24975 || GET_MODE (target) != tmode
24976 || !insn_p->operand[0].predicate (target, tmode))
24977 target = gen_reg_rtx (tmode);
24980 for (i = 0; i < nargs; i++)
24982 enum machine_mode mode = insn_p->operand[i + 1].mode;
24985 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24986 op = expand_normal (arg);
24987 match = insn_p->operand[i + 1].predicate (op, mode);
24989 if (last_arg_constant && (i + 1) == nargs)
24993 if (icode == CODE_FOR_lwp_lwpvalsi3
24994 || icode == CODE_FOR_lwp_lwpinssi3
24995 || icode == CODE_FOR_lwp_lwpvaldi3
24996 || icode == CODE_FOR_lwp_lwpinsdi3)
24997 error ("the last argument must be a 32-bit immediate");
24999 error ("the last argument must be an 8-bit immediate");
25007 /* This must be the memory operand. */
25008 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25009 gcc_assert (GET_MODE (op) == mode
25010 || GET_MODE (op) == VOIDmode);
25014 /* This must be register. */
25015 if (VECTOR_MODE_P (mode))
25016 op = safe_vector_operand (op, mode);
25018 gcc_assert (GET_MODE (op) == mode
25019 || GET_MODE (op) == VOIDmode);
25020 op = copy_to_mode_reg (mode, op);
25025 args[i].mode = mode;
25031 pat = GEN_FCN (icode) (target);
25034 pat = GEN_FCN (icode) (target, args[0].op);
25037 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25040 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25043 gcc_unreachable ();
25049 return klass == store ? 0 : target;
25052 /* Return the integer constant in ARG. Constrain it to be in the range
25053 of the subparts of VEC_TYPE; issue an error if not. */
25056 get_element_number (tree vec_type, tree arg)
25058 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25060 if (!host_integerp (arg, 1)
25061 || (elt = tree_low_cst (arg, 1), elt > max))
25063 error ("selector must be an integer constant in the range 0..%wi", max);
25070 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25071 ix86_expand_vector_init. We DO have language-level syntax for this, in
25072 the form of (type){ init-list }. Except that since we can't place emms
25073 instructions from inside the compiler, we can't allow the use of MMX
25074 registers unless the user explicitly asks for it. So we do *not* define
25075 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25076 we have builtins invoked by mmintrin.h that gives us license to emit
25077 these sorts of instructions. */
25080 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25082 enum machine_mode tmode = TYPE_MODE (type);
25083 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25084 int i, n_elt = GET_MODE_NUNITS (tmode);
25085 rtvec v = rtvec_alloc (n_elt);
25087 gcc_assert (VECTOR_MODE_P (tmode));
25088 gcc_assert (call_expr_nargs (exp) == n_elt);
25090 for (i = 0; i < n_elt; ++i)
25092 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25093 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25096 if (!target || !register_operand (target, tmode))
25097 target = gen_reg_rtx (tmode);
25099 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25103 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25104 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25105 had a language-level syntax for referencing vector elements. */
25108 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25110 enum machine_mode tmode, mode0;
25115 arg0 = CALL_EXPR_ARG (exp, 0);
25116 arg1 = CALL_EXPR_ARG (exp, 1);
25118 op0 = expand_normal (arg0);
25119 elt = get_element_number (TREE_TYPE (arg0), arg1);
25121 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25122 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25123 gcc_assert (VECTOR_MODE_P (mode0));
25125 op0 = force_reg (mode0, op0);
25127 if (optimize || !target || !register_operand (target, tmode))
25128 target = gen_reg_rtx (tmode);
25130 ix86_expand_vector_extract (true, target, op0, elt);
25135 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25136 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25137 a language-level syntax for referencing vector elements. */
25140 ix86_expand_vec_set_builtin (tree exp)
25142 enum machine_mode tmode, mode1;
25143 tree arg0, arg1, arg2;
25145 rtx op0, op1, target;
25147 arg0 = CALL_EXPR_ARG (exp, 0);
25148 arg1 = CALL_EXPR_ARG (exp, 1);
25149 arg2 = CALL_EXPR_ARG (exp, 2);
25151 tmode = TYPE_MODE (TREE_TYPE (arg0));
25152 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25153 gcc_assert (VECTOR_MODE_P (tmode));
25155 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25156 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25157 elt = get_element_number (TREE_TYPE (arg0), arg2);
25159 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25160 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25162 op0 = force_reg (tmode, op0);
25163 op1 = force_reg (mode1, op1);
25165 /* OP0 is the source of these builtin functions and shouldn't be
25166 modified. Create a copy, use it and return it as target. */
25167 target = gen_reg_rtx (tmode);
25168 emit_move_insn (target, op0);
25169 ix86_expand_vector_set (true, target, op1, elt);
25174 /* Expand an expression EXP that calls a built-in function,
25175 with result going to TARGET if that's convenient
25176 (and in mode MODE if that's convenient).
25177 SUBTARGET may be used as the target for computing one of EXP's operands.
25178 IGNORE is nonzero if the value is to be ignored. */
25181 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25182 enum machine_mode mode ATTRIBUTE_UNUSED,
25183 int ignore ATTRIBUTE_UNUSED)
25185 const struct builtin_description *d;
25187 enum insn_code icode;
25188 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25189 tree arg0, arg1, arg2;
25190 rtx op0, op1, op2, pat;
25191 enum machine_mode mode0, mode1, mode2;
25192 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25194 /* Determine whether the builtin function is available under the current ISA.
25195 Originally the builtin was not created if it wasn't applicable to the
25196 current ISA based on the command line switches. With function specific
25197 options, we need to check in the context of the function making the call
25198 whether it is supported. */
25199 if (ix86_builtins_isa[fcode].isa
25200 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25202 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25203 NULL, NULL, false);
25206 error ("%qE needs unknown isa option", fndecl);
25209 gcc_assert (opts != NULL);
25210 error ("%qE needs isa option %s", fndecl, opts);
25218 case IX86_BUILTIN_MASKMOVQ:
25219 case IX86_BUILTIN_MASKMOVDQU:
25220 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25221 ? CODE_FOR_mmx_maskmovq
25222 : CODE_FOR_sse2_maskmovdqu);
25223 /* Note the arg order is different from the operand order. */
25224 arg1 = CALL_EXPR_ARG (exp, 0);
25225 arg2 = CALL_EXPR_ARG (exp, 1);
25226 arg0 = CALL_EXPR_ARG (exp, 2);
25227 op0 = expand_normal (arg0);
25228 op1 = expand_normal (arg1);
25229 op2 = expand_normal (arg2);
25230 mode0 = insn_data[icode].operand[0].mode;
25231 mode1 = insn_data[icode].operand[1].mode;
25232 mode2 = insn_data[icode].operand[2].mode;
25234 op0 = force_reg (Pmode, op0);
25235 op0 = gen_rtx_MEM (mode1, op0);
25237 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25238 op0 = copy_to_mode_reg (mode0, op0);
25239 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25240 op1 = copy_to_mode_reg (mode1, op1);
25241 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25242 op2 = copy_to_mode_reg (mode2, op2);
25243 pat = GEN_FCN (icode) (op0, op1, op2);
25249 case IX86_BUILTIN_LDMXCSR:
25250 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25251 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25252 emit_move_insn (target, op0);
25253 emit_insn (gen_sse_ldmxcsr (target));
25256 case IX86_BUILTIN_STMXCSR:
25257 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25258 emit_insn (gen_sse_stmxcsr (target));
25259 return copy_to_mode_reg (SImode, target);
25261 case IX86_BUILTIN_CLFLUSH:
25262 arg0 = CALL_EXPR_ARG (exp, 0);
25263 op0 = expand_normal (arg0);
25264 icode = CODE_FOR_sse2_clflush;
25265 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25266 op0 = copy_to_mode_reg (Pmode, op0);
25268 emit_insn (gen_sse2_clflush (op0));
25271 case IX86_BUILTIN_MONITOR:
25272 arg0 = CALL_EXPR_ARG (exp, 0);
25273 arg1 = CALL_EXPR_ARG (exp, 1);
25274 arg2 = CALL_EXPR_ARG (exp, 2);
25275 op0 = expand_normal (arg0);
25276 op1 = expand_normal (arg1);
25277 op2 = expand_normal (arg2);
25279 op0 = copy_to_mode_reg (Pmode, op0);
25281 op1 = copy_to_mode_reg (SImode, op1);
25283 op2 = copy_to_mode_reg (SImode, op2);
25284 emit_insn (ix86_gen_monitor (op0, op1, op2));
25287 case IX86_BUILTIN_MWAIT:
25288 arg0 = CALL_EXPR_ARG (exp, 0);
25289 arg1 = CALL_EXPR_ARG (exp, 1);
25290 op0 = expand_normal (arg0);
25291 op1 = expand_normal (arg1);
25293 op0 = copy_to_mode_reg (SImode, op0);
25295 op1 = copy_to_mode_reg (SImode, op1);
25296 emit_insn (gen_sse3_mwait (op0, op1));
25299 case IX86_BUILTIN_VEC_INIT_V2SI:
25300 case IX86_BUILTIN_VEC_INIT_V4HI:
25301 case IX86_BUILTIN_VEC_INIT_V8QI:
25302 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25304 case IX86_BUILTIN_VEC_EXT_V2DF:
25305 case IX86_BUILTIN_VEC_EXT_V2DI:
25306 case IX86_BUILTIN_VEC_EXT_V4SF:
25307 case IX86_BUILTIN_VEC_EXT_V4SI:
25308 case IX86_BUILTIN_VEC_EXT_V8HI:
25309 case IX86_BUILTIN_VEC_EXT_V2SI:
25310 case IX86_BUILTIN_VEC_EXT_V4HI:
25311 case IX86_BUILTIN_VEC_EXT_V16QI:
25312 return ix86_expand_vec_ext_builtin (exp, target);
25314 case IX86_BUILTIN_VEC_SET_V2DI:
25315 case IX86_BUILTIN_VEC_SET_V4SF:
25316 case IX86_BUILTIN_VEC_SET_V4SI:
25317 case IX86_BUILTIN_VEC_SET_V8HI:
25318 case IX86_BUILTIN_VEC_SET_V4HI:
25319 case IX86_BUILTIN_VEC_SET_V16QI:
25320 return ix86_expand_vec_set_builtin (exp);
25322 case IX86_BUILTIN_VEC_PERM_V2DF:
25323 case IX86_BUILTIN_VEC_PERM_V4SF:
25324 case IX86_BUILTIN_VEC_PERM_V2DI:
25325 case IX86_BUILTIN_VEC_PERM_V4SI:
25326 case IX86_BUILTIN_VEC_PERM_V8HI:
25327 case IX86_BUILTIN_VEC_PERM_V16QI:
25328 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25329 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25330 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25331 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25332 case IX86_BUILTIN_VEC_PERM_V4DF:
25333 case IX86_BUILTIN_VEC_PERM_V8SF:
25334 return ix86_expand_vec_perm_builtin (exp);
25336 case IX86_BUILTIN_INFQ:
25337 case IX86_BUILTIN_HUGE_VALQ:
25339 REAL_VALUE_TYPE inf;
25343 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25345 tmp = validize_mem (force_const_mem (mode, tmp));
25348 target = gen_reg_rtx (mode);
25350 emit_move_insn (target, tmp);
25354 case IX86_BUILTIN_LLWPCB:
25355 arg0 = CALL_EXPR_ARG (exp, 0);
25356 op0 = expand_normal (arg0);
25357 icode = CODE_FOR_lwp_llwpcb;
25358 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25359 op0 = copy_to_mode_reg (Pmode, op0);
25360 emit_insn (gen_lwp_llwpcb (op0));
25363 case IX86_BUILTIN_SLWPCB:
25364 icode = CODE_FOR_lwp_slwpcb;
25366 || !insn_data[icode].operand[0].predicate (target, Pmode))
25367 target = gen_reg_rtx (Pmode);
25368 emit_insn (gen_lwp_slwpcb (target));
25375 for (i = 0, d = bdesc_special_args;
25376 i < ARRAY_SIZE (bdesc_special_args);
25378 if (d->code == fcode)
25379 return ix86_expand_special_args_builtin (d, exp, target);
25381 for (i = 0, d = bdesc_args;
25382 i < ARRAY_SIZE (bdesc_args);
25384 if (d->code == fcode)
25387 case IX86_BUILTIN_FABSQ:
25388 case IX86_BUILTIN_COPYSIGNQ:
25390 /* Emit a normal call if SSE2 isn't available. */
25391 return expand_call (exp, target, ignore);
25393 return ix86_expand_args_builtin (d, exp, target);
25396 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25397 if (d->code == fcode)
25398 return ix86_expand_sse_comi (d, exp, target);
25400 for (i = 0, d = bdesc_pcmpestr;
25401 i < ARRAY_SIZE (bdesc_pcmpestr);
25403 if (d->code == fcode)
25404 return ix86_expand_sse_pcmpestr (d, exp, target);
25406 for (i = 0, d = bdesc_pcmpistr;
25407 i < ARRAY_SIZE (bdesc_pcmpistr);
25409 if (d->code == fcode)
25410 return ix86_expand_sse_pcmpistr (d, exp, target);
25412 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25413 if (d->code == fcode)
25414 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25415 (enum ix86_builtin_func_type)
25416 d->flag, d->comparison);
25418 gcc_unreachable ();
25421 /* Returns a function decl for a vectorized version of the builtin function
25422 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25423 if it is not available. */
25426 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25429 enum machine_mode in_mode, out_mode;
25431 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25433 if (TREE_CODE (type_out) != VECTOR_TYPE
25434 || TREE_CODE (type_in) != VECTOR_TYPE
25435 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25438 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25439 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25440 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25441 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25445 case BUILT_IN_SQRT:
25446 if (out_mode == DFmode && out_n == 2
25447 && in_mode == DFmode && in_n == 2)
25448 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25451 case BUILT_IN_SQRTF:
25452 if (out_mode == SFmode && out_n == 4
25453 && in_mode == SFmode && in_n == 4)
25454 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25457 case BUILT_IN_LRINT:
25458 if (out_mode == SImode && out_n == 4
25459 && in_mode == DFmode && in_n == 2)
25460 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25463 case BUILT_IN_LRINTF:
25464 if (out_mode == SImode && out_n == 4
25465 && in_mode == SFmode && in_n == 4)
25466 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25469 case BUILT_IN_COPYSIGN:
25470 if (out_mode == DFmode && out_n == 2
25471 && in_mode == DFmode && in_n == 2)
25472 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
25475 case BUILT_IN_COPYSIGNF:
25476 if (out_mode == SFmode && out_n == 4
25477 && in_mode == SFmode && in_n == 4)
25478 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
25485 /* Dispatch to a handler for a vectorization library. */
25486 if (ix86_veclib_handler)
25487 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
25493 /* Handler for an SVML-style interface to
25494 a library with vectorized intrinsics. */
25497 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25500 tree fntype, new_fndecl, args;
25503 enum machine_mode el_mode, in_mode;
25506 /* The SVML is suitable for unsafe math only. */
25507 if (!flag_unsafe_math_optimizations)
25510 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25511 n = TYPE_VECTOR_SUBPARTS (type_out);
25512 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25513 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25514 if (el_mode != in_mode
25522 case BUILT_IN_LOG10:
25524 case BUILT_IN_TANH:
25526 case BUILT_IN_ATAN:
25527 case BUILT_IN_ATAN2:
25528 case BUILT_IN_ATANH:
25529 case BUILT_IN_CBRT:
25530 case BUILT_IN_SINH:
25532 case BUILT_IN_ASINH:
25533 case BUILT_IN_ASIN:
25534 case BUILT_IN_COSH:
25536 case BUILT_IN_ACOSH:
25537 case BUILT_IN_ACOS:
25538 if (el_mode != DFmode || n != 2)
25542 case BUILT_IN_EXPF:
25543 case BUILT_IN_LOGF:
25544 case BUILT_IN_LOG10F:
25545 case BUILT_IN_POWF:
25546 case BUILT_IN_TANHF:
25547 case BUILT_IN_TANF:
25548 case BUILT_IN_ATANF:
25549 case BUILT_IN_ATAN2F:
25550 case BUILT_IN_ATANHF:
25551 case BUILT_IN_CBRTF:
25552 case BUILT_IN_SINHF:
25553 case BUILT_IN_SINF:
25554 case BUILT_IN_ASINHF:
25555 case BUILT_IN_ASINF:
25556 case BUILT_IN_COSHF:
25557 case BUILT_IN_COSF:
25558 case BUILT_IN_ACOSHF:
25559 case BUILT_IN_ACOSF:
25560 if (el_mode != SFmode || n != 4)
25568 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25570 if (fn == BUILT_IN_LOGF)
25571 strcpy (name, "vmlsLn4");
25572 else if (fn == BUILT_IN_LOG)
25573 strcpy (name, "vmldLn2");
25576 sprintf (name, "vmls%s", bname+10);
25577 name[strlen (name)-1] = '4';
25580 sprintf (name, "vmld%s2", bname+10);
25582 /* Convert to uppercase. */
25586 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25587 args = TREE_CHAIN (args))
25591 fntype = build_function_type_list (type_out, type_in, NULL);
25593 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25595 /* Build a function declaration for the vectorized function. */
25596 new_fndecl = build_decl (BUILTINS_LOCATION,
25597 FUNCTION_DECL, get_identifier (name), fntype);
25598 TREE_PUBLIC (new_fndecl) = 1;
25599 DECL_EXTERNAL (new_fndecl) = 1;
25600 DECL_IS_NOVOPS (new_fndecl) = 1;
25601 TREE_READONLY (new_fndecl) = 1;
25606 /* Handler for an ACML-style interface to
25607 a library with vectorized intrinsics. */
25610 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25612 char name[20] = "__vr.._";
25613 tree fntype, new_fndecl, args;
25616 enum machine_mode el_mode, in_mode;
25619 /* The ACML is 64bits only and suitable for unsafe math only as
25620 it does not correctly support parts of IEEE with the required
25621 precision such as denormals. */
25623 || !flag_unsafe_math_optimizations)
25626 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25627 n = TYPE_VECTOR_SUBPARTS (type_out);
25628 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25629 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25630 if (el_mode != in_mode
25640 case BUILT_IN_LOG2:
25641 case BUILT_IN_LOG10:
25644 if (el_mode != DFmode
25649 case BUILT_IN_SINF:
25650 case BUILT_IN_COSF:
25651 case BUILT_IN_EXPF:
25652 case BUILT_IN_POWF:
25653 case BUILT_IN_LOGF:
25654 case BUILT_IN_LOG2F:
25655 case BUILT_IN_LOG10F:
25658 if (el_mode != SFmode
25667 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25668 sprintf (name + 7, "%s", bname+10);
25671 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25672 args = TREE_CHAIN (args))
25676 fntype = build_function_type_list (type_out, type_in, NULL);
25678 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25680 /* Build a function declaration for the vectorized function. */
25681 new_fndecl = build_decl (BUILTINS_LOCATION,
25682 FUNCTION_DECL, get_identifier (name), fntype);
25683 TREE_PUBLIC (new_fndecl) = 1;
25684 DECL_EXTERNAL (new_fndecl) = 1;
25685 DECL_IS_NOVOPS (new_fndecl) = 1;
25686 TREE_READONLY (new_fndecl) = 1;
25692 /* Returns a decl of a function that implements conversion of an integer vector
25693 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
25694 are the types involved when converting according to CODE.
25695 Return NULL_TREE if it is not available. */
25698 ix86_vectorize_builtin_conversion (unsigned int code,
25699 tree dest_type, tree src_type)
25707 switch (TYPE_MODE (src_type))
25710 switch (TYPE_MODE (dest_type))
25713 return (TYPE_UNSIGNED (src_type)
25714 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
25715 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25717 return (TYPE_UNSIGNED (src_type)
25719 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
25725 switch (TYPE_MODE (dest_type))
25728 return (TYPE_UNSIGNED (src_type)
25730 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25739 case FIX_TRUNC_EXPR:
25740 switch (TYPE_MODE (dest_type))
25743 switch (TYPE_MODE (src_type))
25746 return (TYPE_UNSIGNED (dest_type)
25748 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
25750 return (TYPE_UNSIGNED (dest_type)
25752 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
25759 switch (TYPE_MODE (src_type))
25762 return (TYPE_UNSIGNED (dest_type)
25764 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
25781 /* Returns a code for a target-specific builtin that implements
25782 reciprocal of the function, or NULL_TREE if not available. */
25785 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25786 bool sqrt ATTRIBUTE_UNUSED)
25788 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
25789 && flag_finite_math_only && !flag_trapping_math
25790 && flag_unsafe_math_optimizations))
25794 /* Machine dependent builtins. */
25797 /* Vectorized version of sqrt to rsqrt conversion. */
25798 case IX86_BUILTIN_SQRTPS_NR:
25799 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25805 /* Normal builtins. */
25808 /* Sqrt to rsqrt conversion. */
25809 case BUILT_IN_SQRTF:
25810 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25817 /* Helper for avx_vpermilps256_operand et al. This is also used by
25818 the expansion functions to turn the parallel back into a mask.
25819 The return value is 0 for no match and the imm8+1 for a match. */
25822 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
25824 unsigned i, nelt = GET_MODE_NUNITS (mode);
25826 unsigned char ipar[8];
25828 if (XVECLEN (par, 0) != (int) nelt)
25831 /* Validate that all of the elements are constants, and not totally
25832 out of range. Copy the data into an integral array to make the
25833 subsequent checks easier. */
25834 for (i = 0; i < nelt; ++i)
25836 rtx er = XVECEXP (par, 0, i);
25837 unsigned HOST_WIDE_INT ei;
25839 if (!CONST_INT_P (er))
25850 /* In the 256-bit DFmode case, we can only move elements within
25852 for (i = 0; i < 2; ++i)
25856 mask |= ipar[i] << i;
25858 for (i = 2; i < 4; ++i)
25862 mask |= (ipar[i] - 2) << i;
25867 /* In the 256-bit SFmode case, we have full freedom of movement
25868 within the low 128-bit lane, but the high 128-bit lane must
25869 mirror the exact same pattern. */
25870 for (i = 0; i < 4; ++i)
25871 if (ipar[i] + 4 != ipar[i + 4])
25878 /* In the 128-bit case, we've full freedom in the placement of
25879 the elements from the source operand. */
25880 for (i = 0; i < nelt; ++i)
25881 mask |= ipar[i] << (i * (nelt / 2));
25885 gcc_unreachable ();
25888 /* Make sure success has a non-zero value by adding one. */
25892 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
25893 the expansion functions to turn the parallel back into a mask.
25894 The return value is 0 for no match and the imm8+1 for a match. */
25897 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
25899 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
25901 unsigned char ipar[8];
25903 if (XVECLEN (par, 0) != (int) nelt)
25906 /* Validate that all of the elements are constants, and not totally
25907 out of range. Copy the data into an integral array to make the
25908 subsequent checks easier. */
25909 for (i = 0; i < nelt; ++i)
25911 rtx er = XVECEXP (par, 0, i);
25912 unsigned HOST_WIDE_INT ei;
25914 if (!CONST_INT_P (er))
25917 if (ei >= 2 * nelt)
25922 /* Validate that the halves of the permute are halves. */
25923 for (i = 0; i < nelt2 - 1; ++i)
25924 if (ipar[i] + 1 != ipar[i + 1])
25926 for (i = nelt2; i < nelt - 1; ++i)
25927 if (ipar[i] + 1 != ipar[i + 1])
25930 /* Reconstruct the mask. */
25931 for (i = 0; i < 2; ++i)
25933 unsigned e = ipar[i * nelt2];
25937 mask |= e << (i * 4);
25940 /* Make sure success has a non-zero value by adding one. */
25945 /* Store OPERAND to the memory after reload is completed. This means
25946 that we can't easily use assign_stack_local. */
25948 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25952 gcc_assert (reload_completed);
25953 if (ix86_using_red_zone ())
25955 result = gen_rtx_MEM (mode,
25956 gen_rtx_PLUS (Pmode,
25958 GEN_INT (-RED_ZONE_SIZE)));
25959 emit_move_insn (result, operand);
25961 else if (TARGET_64BIT)
25967 operand = gen_lowpart (DImode, operand);
25971 gen_rtx_SET (VOIDmode,
25972 gen_rtx_MEM (DImode,
25973 gen_rtx_PRE_DEC (DImode,
25974 stack_pointer_rtx)),
25978 gcc_unreachable ();
25980 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25989 split_di (&operand, 1, operands, operands + 1);
25991 gen_rtx_SET (VOIDmode,
25992 gen_rtx_MEM (SImode,
25993 gen_rtx_PRE_DEC (Pmode,
25994 stack_pointer_rtx)),
25997 gen_rtx_SET (VOIDmode,
25998 gen_rtx_MEM (SImode,
25999 gen_rtx_PRE_DEC (Pmode,
26000 stack_pointer_rtx)),
26005 /* Store HImodes as SImodes. */
26006 operand = gen_lowpart (SImode, operand);
26010 gen_rtx_SET (VOIDmode,
26011 gen_rtx_MEM (GET_MODE (operand),
26012 gen_rtx_PRE_DEC (SImode,
26013 stack_pointer_rtx)),
26017 gcc_unreachable ();
26019 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26024 /* Free operand from the memory. */
26026 ix86_free_from_memory (enum machine_mode mode)
26028 if (!ix86_using_red_zone ())
26032 if (mode == DImode || TARGET_64BIT)
26036 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26037 to pop or add instruction if registers are available. */
26038 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26039 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26044 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26045 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26047 static const reg_class_t *
26048 i386_ira_cover_classes (void)
26050 static const reg_class_t sse_fpmath_classes[] = {
26051 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26053 static const reg_class_t no_sse_fpmath_classes[] = {
26054 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26057 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26060 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26061 QImode must go into class Q_REGS.
26062 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26063 movdf to do mem-to-mem moves through integer regs. */
26065 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26067 enum machine_mode mode = GET_MODE (x);
26069 /* We're only allowed to return a subclass of CLASS. Many of the
26070 following checks fail for NO_REGS, so eliminate that early. */
26071 if (regclass == NO_REGS)
26074 /* All classes can load zeros. */
26075 if (x == CONST0_RTX (mode))
26078 /* Force constants into memory if we are loading a (nonzero) constant into
26079 an MMX or SSE register. This is because there are no MMX/SSE instructions
26080 to load from a constant. */
26082 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26085 /* Prefer SSE regs only, if we can use them for math. */
26086 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26087 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26089 /* Floating-point constants need more complex checks. */
26090 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26092 /* General regs can load everything. */
26093 if (reg_class_subset_p (regclass, GENERAL_REGS))
26096 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26097 zero above. We only want to wind up preferring 80387 registers if
26098 we plan on doing computation with them. */
26100 && standard_80387_constant_p (x))
26102 /* Limit class to non-sse. */
26103 if (regclass == FLOAT_SSE_REGS)
26105 if (regclass == FP_TOP_SSE_REGS)
26107 if (regclass == FP_SECOND_SSE_REGS)
26108 return FP_SECOND_REG;
26109 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26116 /* Generally when we see PLUS here, it's the function invariant
26117 (plus soft-fp const_int). Which can only be computed into general
26119 if (GET_CODE (x) == PLUS)
26120 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26122 /* QImode constants are easy to load, but non-constant QImode data
26123 must go into Q_REGS. */
26124 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26126 if (reg_class_subset_p (regclass, Q_REGS))
26128 if (reg_class_subset_p (Q_REGS, regclass))
26136 /* Discourage putting floating-point values in SSE registers unless
26137 SSE math is being used, and likewise for the 387 registers. */
26139 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26141 enum machine_mode mode = GET_MODE (x);
26143 /* Restrict the output reload class to the register bank that we are doing
26144 math on. If we would like not to return a subset of CLASS, reject this
26145 alternative: if reload cannot do this, it will still use its choice. */
26146 mode = GET_MODE (x);
26147 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26148 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26150 if (X87_FLOAT_MODE_P (mode))
26152 if (regclass == FP_TOP_SSE_REGS)
26154 else if (regclass == FP_SECOND_SSE_REGS)
26155 return FP_SECOND_REG;
26157 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26164 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26165 enum machine_mode mode,
26166 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26168 /* QImode spills from non-QI registers require
26169 intermediate register on 32bit targets. */
26170 if (!in_p && mode == QImode && !TARGET_64BIT
26171 && (rclass == GENERAL_REGS
26172 || rclass == LEGACY_REGS
26173 || rclass == INDEX_REGS))
26182 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26183 regno = true_regnum (x);
26185 /* Return Q_REGS if the operand is in memory. */
26193 /* If we are copying between general and FP registers, we need a memory
26194 location. The same is true for SSE and MMX registers.
26196 To optimize register_move_cost performance, allow inline variant.
26198 The macro can't work reliably when one of the CLASSES is class containing
26199 registers from multiple units (SSE, MMX, integer). We avoid this by never
26200 combining those units in single alternative in the machine description.
26201 Ensure that this constraint holds to avoid unexpected surprises.
26203 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26204 enforce these sanity checks. */
26207 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26208 enum machine_mode mode, int strict)
26210 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26211 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26212 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26213 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26214 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26215 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26217 gcc_assert (!strict);
26221 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26224 /* ??? This is a lie. We do have moves between mmx/general, and for
26225 mmx/sse2. But by saying we need secondary memory we discourage the
26226 register allocator from using the mmx registers unless needed. */
26227 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26230 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26232 /* SSE1 doesn't have any direct moves from other classes. */
26236 /* If the target says that inter-unit moves are more expensive
26237 than moving through memory, then don't generate them. */
26238 if (!TARGET_INTER_UNIT_MOVES)
26241 /* Between SSE and general, we have moves no larger than word size. */
26242 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26250 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26251 enum machine_mode mode, int strict)
26253 return inline_secondary_memory_needed (class1, class2, mode, strict);
26256 /* Return true if the registers in CLASS cannot represent the change from
26257 modes FROM to TO. */
26260 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26261 enum reg_class regclass)
26266 /* x87 registers can't do subreg at all, as all values are reformatted
26267 to extended precision. */
26268 if (MAYBE_FLOAT_CLASS_P (regclass))
26271 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26273 /* Vector registers do not support QI or HImode loads. If we don't
26274 disallow a change to these modes, reload will assume it's ok to
26275 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26276 the vec_dupv4hi pattern. */
26277 if (GET_MODE_SIZE (from) < 4)
26280 /* Vector registers do not support subreg with nonzero offsets, which
26281 are otherwise valid for integer registers. Since we can't see
26282 whether we have a nonzero offset from here, prohibit all
26283 nonparadoxical subregs changing size. */
26284 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26291 /* Return the cost of moving data of mode M between a
26292 register and memory. A value of 2 is the default; this cost is
26293 relative to those in `REGISTER_MOVE_COST'.
26295 This function is used extensively by register_move_cost that is used to
26296 build tables at startup. Make it inline in this case.
26297 When IN is 2, return maximum of in and out move cost.
26299 If moving between registers and memory is more expensive than
26300 between two registers, you should define this macro to express the
26303 Model also increased moving costs of QImode registers in non
26307 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26311 if (FLOAT_CLASS_P (regclass))
26329 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26330 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26332 if (SSE_CLASS_P (regclass))
26335 switch (GET_MODE_SIZE (mode))
26350 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26351 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26353 if (MMX_CLASS_P (regclass))
26356 switch (GET_MODE_SIZE (mode))
26368 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26369 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26371 switch (GET_MODE_SIZE (mode))
26374 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26377 return ix86_cost->int_store[0];
26378 if (TARGET_PARTIAL_REG_DEPENDENCY
26379 && optimize_function_for_speed_p (cfun))
26380 cost = ix86_cost->movzbl_load;
26382 cost = ix86_cost->int_load[0];
26384 return MAX (cost, ix86_cost->int_store[0]);
26390 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26392 return ix86_cost->movzbl_load;
26394 return ix86_cost->int_store[0] + 4;
26399 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26400 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26402 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26403 if (mode == TFmode)
26406 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26408 cost = ix86_cost->int_load[2];
26410 cost = ix86_cost->int_store[2];
26411 return (cost * (((int) GET_MODE_SIZE (mode)
26412 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26417 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26420 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26424 /* Return the cost of moving data from a register in class CLASS1 to
26425 one in class CLASS2.
26427 It is not required that the cost always equal 2 when FROM is the same as TO;
26428 on some machines it is expensive to move between registers if they are not
26429 general registers. */
26432 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
26433 reg_class_t class2_i)
26435 enum reg_class class1 = (enum reg_class) class1_i;
26436 enum reg_class class2 = (enum reg_class) class2_i;
26438 /* In case we require secondary memory, compute cost of the store followed
26439 by load. In order to avoid bad register allocation choices, we need
26440 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26442 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26446 cost += inline_memory_move_cost (mode, class1, 2);
26447 cost += inline_memory_move_cost (mode, class2, 2);
26449 /* In case of copying from general_purpose_register we may emit multiple
26450 stores followed by single load causing memory size mismatch stall.
26451 Count this as arbitrarily high cost of 20. */
26452 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26455 /* In the case of FP/MMX moves, the registers actually overlap, and we
26456 have to switch modes in order to treat them differently. */
26457 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26458 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26464 /* Moves between SSE/MMX and integer unit are expensive. */
26465 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26466 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26468 /* ??? By keeping returned value relatively high, we limit the number
26469 of moves between integer and MMX/SSE registers for all targets.
26470 Additionally, high value prevents problem with x86_modes_tieable_p(),
26471 where integer modes in MMX/SSE registers are not tieable
26472 because of missing QImode and HImode moves to, from or between
26473 MMX/SSE registers. */
26474 return MAX (8, ix86_cost->mmxsse_to_integer);
26476 if (MAYBE_FLOAT_CLASS_P (class1))
26477 return ix86_cost->fp_move;
26478 if (MAYBE_SSE_CLASS_P (class1))
26479 return ix86_cost->sse_move;
26480 if (MAYBE_MMX_CLASS_P (class1))
26481 return ix86_cost->mmx_move;
26485 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26488 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26490 /* Flags and only flags can only hold CCmode values. */
26491 if (CC_REGNO_P (regno))
26492 return GET_MODE_CLASS (mode) == MODE_CC;
26493 if (GET_MODE_CLASS (mode) == MODE_CC
26494 || GET_MODE_CLASS (mode) == MODE_RANDOM
26495 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26497 if (FP_REGNO_P (regno))
26498 return VALID_FP_MODE_P (mode);
26499 if (SSE_REGNO_P (regno))
26501 /* We implement the move patterns for all vector modes into and
26502 out of SSE registers, even when no operation instructions
26503 are available. OImode move is available only when AVX is
26505 return ((TARGET_AVX && mode == OImode)
26506 || VALID_AVX256_REG_MODE (mode)
26507 || VALID_SSE_REG_MODE (mode)
26508 || VALID_SSE2_REG_MODE (mode)
26509 || VALID_MMX_REG_MODE (mode)
26510 || VALID_MMX_REG_MODE_3DNOW (mode));
26512 if (MMX_REGNO_P (regno))
26514 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26515 so if the register is available at all, then we can move data of
26516 the given mode into or out of it. */
26517 return (VALID_MMX_REG_MODE (mode)
26518 || VALID_MMX_REG_MODE_3DNOW (mode));
26521 if (mode == QImode)
26523 /* Take care for QImode values - they can be in non-QI regs,
26524 but then they do cause partial register stalls. */
26525 if (regno <= BX_REG || TARGET_64BIT)
26527 if (!TARGET_PARTIAL_REG_STALL)
26529 return reload_in_progress || reload_completed;
26531 /* We handle both integer and floats in the general purpose registers. */
26532 else if (VALID_INT_MODE_P (mode))
26534 else if (VALID_FP_MODE_P (mode))
26536 else if (VALID_DFP_MODE_P (mode))
26538 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26539 on to use that value in smaller contexts, this can easily force a
26540 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26541 supporting DImode, allow it. */
26542 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26548 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26549 tieable integer mode. */
26552 ix86_tieable_integer_mode_p (enum machine_mode mode)
26561 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26564 return TARGET_64BIT;
26571 /* Return true if MODE1 is accessible in a register that can hold MODE2
26572 without copying. That is, all register classes that can hold MODE2
26573 can also hold MODE1. */
26576 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26578 if (mode1 == mode2)
26581 if (ix86_tieable_integer_mode_p (mode1)
26582 && ix86_tieable_integer_mode_p (mode2))
26585 /* MODE2 being XFmode implies fp stack or general regs, which means we
26586 can tie any smaller floating point modes to it. Note that we do not
26587 tie this with TFmode. */
26588 if (mode2 == XFmode)
26589 return mode1 == SFmode || mode1 == DFmode;
26591 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26592 that we can tie it with SFmode. */
26593 if (mode2 == DFmode)
26594 return mode1 == SFmode;
26596 /* If MODE2 is only appropriate for an SSE register, then tie with
26597 any other mode acceptable to SSE registers. */
26598 if (GET_MODE_SIZE (mode2) == 16
26599 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26600 return (GET_MODE_SIZE (mode1) == 16
26601 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26603 /* If MODE2 is appropriate for an MMX register, then tie
26604 with any other mode acceptable to MMX registers. */
26605 if (GET_MODE_SIZE (mode2) == 8
26606 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26607 return (GET_MODE_SIZE (mode1) == 8
26608 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26613 /* Compute a (partial) cost for rtx X. Return true if the complete
26614 cost has been computed, and false if subexpressions should be
26615 scanned. In either case, *TOTAL contains the cost result. */
26618 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26620 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26621 enum machine_mode mode = GET_MODE (x);
26622 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26630 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26632 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26634 else if (flag_pic && SYMBOLIC_CONST (x)
26636 || (!GET_CODE (x) != LABEL_REF
26637 && (GET_CODE (x) != SYMBOL_REF
26638 || !SYMBOL_REF_LOCAL_P (x)))))
26645 if (mode == VOIDmode)
26648 switch (standard_80387_constant_p (x))
26653 default: /* Other constants */
26658 /* Start with (MEM (SYMBOL_REF)), since that's where
26659 it'll probably end up. Add a penalty for size. */
26660 *total = (COSTS_N_INSNS (1)
26661 + (flag_pic != 0 && !TARGET_64BIT)
26662 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26668 /* The zero extensions is often completely free on x86_64, so make
26669 it as cheap as possible. */
26670 if (TARGET_64BIT && mode == DImode
26671 && GET_MODE (XEXP (x, 0)) == SImode)
26673 else if (TARGET_ZERO_EXTEND_WITH_AND)
26674 *total = cost->add;
26676 *total = cost->movzx;
26680 *total = cost->movsx;
26684 if (CONST_INT_P (XEXP (x, 1))
26685 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26687 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26690 *total = cost->add;
26693 if ((value == 2 || value == 3)
26694 && cost->lea <= cost->shift_const)
26696 *total = cost->lea;
26706 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26708 if (CONST_INT_P (XEXP (x, 1)))
26710 if (INTVAL (XEXP (x, 1)) > 32)
26711 *total = cost->shift_const + COSTS_N_INSNS (2);
26713 *total = cost->shift_const * 2;
26717 if (GET_CODE (XEXP (x, 1)) == AND)
26718 *total = cost->shift_var * 2;
26720 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26725 if (CONST_INT_P (XEXP (x, 1)))
26726 *total = cost->shift_const;
26728 *total = cost->shift_var;
26733 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26735 /* ??? SSE scalar cost should be used here. */
26736 *total = cost->fmul;
26739 else if (X87_FLOAT_MODE_P (mode))
26741 *total = cost->fmul;
26744 else if (FLOAT_MODE_P (mode))
26746 /* ??? SSE vector cost should be used here. */
26747 *total = cost->fmul;
26752 rtx op0 = XEXP (x, 0);
26753 rtx op1 = XEXP (x, 1);
26755 if (CONST_INT_P (XEXP (x, 1)))
26757 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26758 for (nbits = 0; value != 0; value &= value - 1)
26762 /* This is arbitrary. */
26765 /* Compute costs correctly for widening multiplication. */
26766 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26767 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26768 == GET_MODE_SIZE (mode))
26770 int is_mulwiden = 0;
26771 enum machine_mode inner_mode = GET_MODE (op0);
26773 if (GET_CODE (op0) == GET_CODE (op1))
26774 is_mulwiden = 1, op1 = XEXP (op1, 0);
26775 else if (CONST_INT_P (op1))
26777 if (GET_CODE (op0) == SIGN_EXTEND)
26778 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26781 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26785 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26788 *total = (cost->mult_init[MODE_INDEX (mode)]
26789 + nbits * cost->mult_bit
26790 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26799 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26800 /* ??? SSE cost should be used here. */
26801 *total = cost->fdiv;
26802 else if (X87_FLOAT_MODE_P (mode))
26803 *total = cost->fdiv;
26804 else if (FLOAT_MODE_P (mode))
26805 /* ??? SSE vector cost should be used here. */
26806 *total = cost->fdiv;
26808 *total = cost->divide[MODE_INDEX (mode)];
26812 if (GET_MODE_CLASS (mode) == MODE_INT
26813 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26815 if (GET_CODE (XEXP (x, 0)) == PLUS
26816 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26817 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26818 && CONSTANT_P (XEXP (x, 1)))
26820 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26821 if (val == 2 || val == 4 || val == 8)
26823 *total = cost->lea;
26824 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26825 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26826 outer_code, speed);
26827 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26831 else if (GET_CODE (XEXP (x, 0)) == MULT
26832 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26834 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26835 if (val == 2 || val == 4 || val == 8)
26837 *total = cost->lea;
26838 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26839 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26843 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26845 *total = cost->lea;
26846 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26847 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26848 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26855 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26857 /* ??? SSE cost should be used here. */
26858 *total = cost->fadd;
26861 else if (X87_FLOAT_MODE_P (mode))
26863 *total = cost->fadd;
26866 else if (FLOAT_MODE_P (mode))
26868 /* ??? SSE vector cost should be used here. */
26869 *total = cost->fadd;
26877 if (!TARGET_64BIT && mode == DImode)
26879 *total = (cost->add * 2
26880 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26881 << (GET_MODE (XEXP (x, 0)) != DImode))
26882 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26883 << (GET_MODE (XEXP (x, 1)) != DImode)));
26889 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26891 /* ??? SSE cost should be used here. */
26892 *total = cost->fchs;
26895 else if (X87_FLOAT_MODE_P (mode))
26897 *total = cost->fchs;
26900 else if (FLOAT_MODE_P (mode))
26902 /* ??? SSE vector cost should be used here. */
26903 *total = cost->fchs;
26909 if (!TARGET_64BIT && mode == DImode)
26910 *total = cost->add * 2;
26912 *total = cost->add;
26916 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26917 && XEXP (XEXP (x, 0), 1) == const1_rtx
26918 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26919 && XEXP (x, 1) == const0_rtx)
26921 /* This kind of construct is implemented using test[bwl].
26922 Treat it as if we had an AND. */
26923 *total = (cost->add
26924 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26925 + rtx_cost (const1_rtx, outer_code, speed));
26931 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26936 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26937 /* ??? SSE cost should be used here. */
26938 *total = cost->fabs;
26939 else if (X87_FLOAT_MODE_P (mode))
26940 *total = cost->fabs;
26941 else if (FLOAT_MODE_P (mode))
26942 /* ??? SSE vector cost should be used here. */
26943 *total = cost->fabs;
26947 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26948 /* ??? SSE cost should be used here. */
26949 *total = cost->fsqrt;
26950 else if (X87_FLOAT_MODE_P (mode))
26951 *total = cost->fsqrt;
26952 else if (FLOAT_MODE_P (mode))
26953 /* ??? SSE vector cost should be used here. */
26954 *total = cost->fsqrt;
26958 if (XINT (x, 1) == UNSPEC_TP)
26965 case VEC_DUPLICATE:
26966 /* ??? Assume all of these vector manipulation patterns are
26967 recognizable. In which case they all pretty much have the
26969 *total = COSTS_N_INSNS (1);
26979 static int current_machopic_label_num;
26981 /* Given a symbol name and its associated stub, write out the
26982 definition of the stub. */
26985 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26987 unsigned int length;
26988 char *binder_name, *symbol_name, lazy_ptr_name[32];
26989 int label = ++current_machopic_label_num;
26991 /* For 64-bit we shouldn't get here. */
26992 gcc_assert (!TARGET_64BIT);
26994 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26995 symb = targetm.strip_name_encoding (symb);
26997 length = strlen (stub);
26998 binder_name = XALLOCAVEC (char, length + 32);
26999 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27001 length = strlen (symb);
27002 symbol_name = XALLOCAVEC (char, length + 32);
27003 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27005 sprintf (lazy_ptr_name, "L%d$lz", label);
27008 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27010 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27012 fprintf (file, "%s:\n", stub);
27013 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27017 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27018 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27019 fprintf (file, "\tjmp\t*%%edx\n");
27022 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27024 fprintf (file, "%s:\n", binder_name);
27028 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27029 fputs ("\tpushl\t%eax\n", file);
27032 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27034 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27036 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27037 fprintf (file, "%s:\n", lazy_ptr_name);
27038 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27039 fprintf (file, ASM_LONG "%s\n", binder_name);
27041 #endif /* TARGET_MACHO */
27043 /* Order the registers for register allocator. */
27046 x86_order_regs_for_local_alloc (void)
27051 /* First allocate the local general purpose registers. */
27052 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27053 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27054 reg_alloc_order [pos++] = i;
27056 /* Global general purpose registers. */
27057 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27058 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27059 reg_alloc_order [pos++] = i;
27061 /* x87 registers come first in case we are doing FP math
27063 if (!TARGET_SSE_MATH)
27064 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27065 reg_alloc_order [pos++] = i;
27067 /* SSE registers. */
27068 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27069 reg_alloc_order [pos++] = i;
27070 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27071 reg_alloc_order [pos++] = i;
27073 /* x87 registers. */
27074 if (TARGET_SSE_MATH)
27075 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27076 reg_alloc_order [pos++] = i;
27078 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27079 reg_alloc_order [pos++] = i;
27081 /* Initialize the rest of array as we do not allocate some registers
27083 while (pos < FIRST_PSEUDO_REGISTER)
27084 reg_alloc_order [pos++] = 0;
27087 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27088 struct attribute_spec.handler. */
27090 ix86_handle_abi_attribute (tree *node, tree name,
27091 tree args ATTRIBUTE_UNUSED,
27092 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27094 if (TREE_CODE (*node) != FUNCTION_TYPE
27095 && TREE_CODE (*node) != METHOD_TYPE
27096 && TREE_CODE (*node) != FIELD_DECL
27097 && TREE_CODE (*node) != TYPE_DECL)
27099 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27101 *no_add_attrs = true;
27106 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27108 *no_add_attrs = true;
27112 /* Can combine regparm with all attributes but fastcall. */
27113 if (is_attribute_p ("ms_abi", name))
27115 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27117 error ("ms_abi and sysv_abi attributes are not compatible");
27122 else if (is_attribute_p ("sysv_abi", name))
27124 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27126 error ("ms_abi and sysv_abi attributes are not compatible");
27135 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27136 struct attribute_spec.handler. */
27138 ix86_handle_struct_attribute (tree *node, tree name,
27139 tree args ATTRIBUTE_UNUSED,
27140 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27143 if (DECL_P (*node))
27145 if (TREE_CODE (*node) == TYPE_DECL)
27146 type = &TREE_TYPE (*node);
27151 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27152 || TREE_CODE (*type) == UNION_TYPE)))
27154 warning (OPT_Wattributes, "%qE attribute ignored",
27156 *no_add_attrs = true;
27159 else if ((is_attribute_p ("ms_struct", name)
27160 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27161 || ((is_attribute_p ("gcc_struct", name)
27162 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27164 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27166 *no_add_attrs = true;
27173 ix86_handle_fndecl_attribute (tree *node, tree name,
27174 tree args ATTRIBUTE_UNUSED,
27175 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27177 if (TREE_CODE (*node) != FUNCTION_DECL)
27179 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27181 *no_add_attrs = true;
27187 ix86_ms_bitfield_layout_p (const_tree record_type)
27189 return ((TARGET_MS_BITFIELD_LAYOUT
27190 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27191 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27194 /* Returns an expression indicating where the this parameter is
27195 located on entry to the FUNCTION. */
27198 x86_this_parameter (tree function)
27200 tree type = TREE_TYPE (function);
27201 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27206 const int *parm_regs;
27208 if (ix86_function_type_abi (type) == MS_ABI)
27209 parm_regs = x86_64_ms_abi_int_parameter_registers;
27211 parm_regs = x86_64_int_parameter_registers;
27212 return gen_rtx_REG (DImode, parm_regs[aggr]);
27215 nregs = ix86_function_regparm (type, function);
27217 if (nregs > 0 && !stdarg_p (type))
27221 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27222 regno = aggr ? DX_REG : CX_REG;
27223 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27227 return gen_rtx_MEM (SImode,
27228 plus_constant (stack_pointer_rtx, 4));
27237 return gen_rtx_MEM (SImode,
27238 plus_constant (stack_pointer_rtx, 4));
27241 return gen_rtx_REG (SImode, regno);
27244 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27247 /* Determine whether x86_output_mi_thunk can succeed. */
27250 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27251 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27252 HOST_WIDE_INT vcall_offset, const_tree function)
27254 /* 64-bit can handle anything. */
27258 /* For 32-bit, everything's fine if we have one free register. */
27259 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27262 /* Need a free register for vcall_offset. */
27266 /* Need a free register for GOT references. */
27267 if (flag_pic && !targetm.binds_local_p (function))
27270 /* Otherwise ok. */
27274 /* Output the assembler code for a thunk function. THUNK_DECL is the
27275 declaration for the thunk function itself, FUNCTION is the decl for
27276 the target function. DELTA is an immediate constant offset to be
27277 added to THIS. If VCALL_OFFSET is nonzero, the word at
27278 *(*this + vcall_offset) should be added to THIS. */
27281 x86_output_mi_thunk (FILE *file,
27282 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27283 HOST_WIDE_INT vcall_offset, tree function)
27286 rtx this_param = x86_this_parameter (function);
27289 /* Make sure unwind info is emitted for the thunk if needed. */
27290 final_start_function (emit_barrier (), file, 1);
27292 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27293 pull it in now and let DELTA benefit. */
27294 if (REG_P (this_param))
27295 this_reg = this_param;
27296 else if (vcall_offset)
27298 /* Put the this parameter into %eax. */
27299 xops[0] = this_param;
27300 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27301 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27304 this_reg = NULL_RTX;
27306 /* Adjust the this parameter by a fixed constant. */
27309 xops[0] = GEN_INT (delta);
27310 xops[1] = this_reg ? this_reg : this_param;
27313 if (!x86_64_general_operand (xops[0], DImode))
27315 tmp = gen_rtx_REG (DImode, R10_REG);
27317 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27319 xops[1] = this_param;
27321 if (x86_maybe_negate_const_int (&xops[0], DImode))
27322 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27324 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27326 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27327 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27329 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27332 /* Adjust the this parameter by a value stored in the vtable. */
27336 tmp = gen_rtx_REG (DImode, R10_REG);
27339 int tmp_regno = CX_REG;
27340 if (lookup_attribute ("fastcall",
27341 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27342 || lookup_attribute ("thiscall",
27343 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27344 tmp_regno = AX_REG;
27345 tmp = gen_rtx_REG (SImode, tmp_regno);
27348 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27350 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27352 /* Adjust the this parameter. */
27353 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27354 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27356 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27357 xops[0] = GEN_INT (vcall_offset);
27359 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27360 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27362 xops[1] = this_reg;
27363 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27366 /* If necessary, drop THIS back to its stack slot. */
27367 if (this_reg && this_reg != this_param)
27369 xops[0] = this_reg;
27370 xops[1] = this_param;
27371 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27374 xops[0] = XEXP (DECL_RTL (function), 0);
27377 if (!flag_pic || targetm.binds_local_p (function))
27378 output_asm_insn ("jmp\t%P0", xops);
27379 /* All thunks should be in the same object as their target,
27380 and thus binds_local_p should be true. */
27381 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27382 gcc_unreachable ();
27385 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27386 tmp = gen_rtx_CONST (Pmode, tmp);
27387 tmp = gen_rtx_MEM (QImode, tmp);
27389 output_asm_insn ("jmp\t%A0", xops);
27394 if (!flag_pic || targetm.binds_local_p (function))
27395 output_asm_insn ("jmp\t%P0", xops);
27400 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27401 if (TARGET_MACHO_BRANCH_ISLANDS)
27402 sym_ref = (gen_rtx_SYMBOL_REF
27404 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27405 tmp = gen_rtx_MEM (QImode, sym_ref);
27407 output_asm_insn ("jmp\t%0", xops);
27410 #endif /* TARGET_MACHO */
27412 tmp = gen_rtx_REG (SImode, CX_REG);
27413 output_set_got (tmp, NULL_RTX);
27416 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27417 output_asm_insn ("jmp\t{*}%1", xops);
27420 final_end_function ();
27424 x86_file_start (void)
27426 default_file_start ();
27428 darwin_file_start ();
27430 if (X86_FILE_START_VERSION_DIRECTIVE)
27431 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27432 if (X86_FILE_START_FLTUSED)
27433 fputs ("\t.global\t__fltused\n", asm_out_file);
27434 if (ix86_asm_dialect == ASM_INTEL)
27435 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27439 x86_field_alignment (tree field, int computed)
27441 enum machine_mode mode;
27442 tree type = TREE_TYPE (field);
27444 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27446 mode = TYPE_MODE (strip_array_types (type));
27447 if (mode == DFmode || mode == DCmode
27448 || GET_MODE_CLASS (mode) == MODE_INT
27449 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27450 return MIN (32, computed);
27454 /* Output assembler code to FILE to increment profiler label # LABELNO
27455 for profiling a function entry. */
27457 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27459 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
27464 #ifndef NO_PROFILE_COUNTERS
27465 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
27468 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27469 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
27471 fprintf (file, "\tcall\t%s\n", mcount_name);
27475 #ifndef NO_PROFILE_COUNTERS
27476 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
27479 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
27483 #ifndef NO_PROFILE_COUNTERS
27484 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
27487 fprintf (file, "\tcall\t%s\n", mcount_name);
27491 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27492 /* We don't have exact information about the insn sizes, but we may assume
27493 quite safely that we are informed about all 1 byte insns and memory
27494 address sizes. This is enough to eliminate unnecessary padding in
27498 min_insn_size (rtx insn)
27502 if (!INSN_P (insn) || !active_insn_p (insn))
27505 /* Discard alignments we've emit and jump instructions. */
27506 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27507 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27509 if (JUMP_TABLE_DATA_P (insn))
27512 /* Important case - calls are always 5 bytes.
27513 It is common to have many calls in the row. */
27515 && symbolic_reference_mentioned_p (PATTERN (insn))
27516 && !SIBLING_CALL_P (insn))
27518 len = get_attr_length (insn);
27522 /* For normal instructions we rely on get_attr_length being exact,
27523 with a few exceptions. */
27524 if (!JUMP_P (insn))
27526 enum attr_type type = get_attr_type (insn);
27531 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27532 || asm_noperands (PATTERN (insn)) >= 0)
27539 /* Otherwise trust get_attr_length. */
27543 l = get_attr_length_address (insn);
27544 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27553 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27557 ix86_avoid_jump_mispredicts (void)
27559 rtx insn, start = get_insns ();
27560 int nbytes = 0, njumps = 0;
27563 /* Look for all minimal intervals of instructions containing 4 jumps.
27564 The intervals are bounded by START and INSN. NBYTES is the total
27565 size of instructions in the interval including INSN and not including
27566 START. When the NBYTES is smaller than 16 bytes, it is possible
27567 that the end of START and INSN ends up in the same 16byte page.
27569 The smallest offset in the page INSN can start is the case where START
27570 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27571 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27573 for (insn = start; insn; insn = NEXT_INSN (insn))
27577 if (LABEL_P (insn))
27579 int align = label_to_alignment (insn);
27580 int max_skip = label_to_max_skip (insn);
27584 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27585 already in the current 16 byte page, because otherwise
27586 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27587 bytes to reach 16 byte boundary. */
27589 || (align <= 3 && max_skip != (1 << align) - 1))
27592 fprintf (dump_file, "Label %i with max_skip %i\n",
27593 INSN_UID (insn), max_skip);
27596 while (nbytes + max_skip >= 16)
27598 start = NEXT_INSN (start);
27599 if ((JUMP_P (start)
27600 && GET_CODE (PATTERN (start)) != ADDR_VEC
27601 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27603 njumps--, isjump = 1;
27606 nbytes -= min_insn_size (start);
27612 min_size = min_insn_size (insn);
27613 nbytes += min_size;
27615 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27616 INSN_UID (insn), min_size);
27618 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27619 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27627 start = NEXT_INSN (start);
27628 if ((JUMP_P (start)
27629 && GET_CODE (PATTERN (start)) != ADDR_VEC
27630 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27632 njumps--, isjump = 1;
27635 nbytes -= min_insn_size (start);
27637 gcc_assert (njumps >= 0);
27639 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27640 INSN_UID (start), INSN_UID (insn), nbytes);
27642 if (njumps == 3 && isjump && nbytes < 16)
27644 int padsize = 15 - nbytes + min_insn_size (insn);
27647 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27648 INSN_UID (insn), padsize);
27649 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27655 /* AMD Athlon works faster
27656 when RET is not destination of conditional jump or directly preceded
27657 by other jump instruction. We avoid the penalty by inserting NOP just
27658 before the RET instructions in such cases. */
27660 ix86_pad_returns (void)
27665 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27667 basic_block bb = e->src;
27668 rtx ret = BB_END (bb);
27670 bool replace = false;
27672 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27673 || optimize_bb_for_size_p (bb))
27675 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27676 if (active_insn_p (prev) || LABEL_P (prev))
27678 if (prev && LABEL_P (prev))
27683 FOR_EACH_EDGE (e, ei, bb->preds)
27684 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27685 && !(e->flags & EDGE_FALLTHRU))
27690 prev = prev_active_insn (ret);
27692 && ((JUMP_P (prev) && any_condjump_p (prev))
27695 /* Empty functions get branch mispredict even when the jump destination
27696 is not visible to us. */
27697 if (!prev && !optimize_function_for_size_p (cfun))
27702 emit_jump_insn_before (gen_return_internal_long (), ret);
27708 /* Implement machine specific optimizations. We implement padding of returns
27709 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27713 if (optimize && optimize_function_for_speed_p (cfun))
27715 if (TARGET_PAD_RETURNS)
27716 ix86_pad_returns ();
27717 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27718 if (TARGET_FOUR_JUMP_LIMIT)
27719 ix86_avoid_jump_mispredicts ();
27724 /* Return nonzero when QImode register that must be represented via REX prefix
27727 x86_extended_QIreg_mentioned_p (rtx insn)
27730 extract_insn_cached (insn);
27731 for (i = 0; i < recog_data.n_operands; i++)
27732 if (REG_P (recog_data.operand[i])
27733 && REGNO (recog_data.operand[i]) > BX_REG)
27738 /* Return nonzero when P points to register encoded via REX prefix.
27739 Called via for_each_rtx. */
27741 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27743 unsigned int regno;
27746 regno = REGNO (*p);
27747 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27750 /* Return true when INSN mentions register that must be encoded using REX
27753 x86_extended_reg_mentioned_p (rtx insn)
27755 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27756 extended_reg_mentioned_1, NULL);
27759 /* If profitable, negate (without causing overflow) integer constant
27760 of mode MODE at location LOC. Return true in this case. */
27762 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
27766 if (!CONST_INT_P (*loc))
27772 /* DImode x86_64 constants must fit in 32 bits. */
27773 gcc_assert (x86_64_immediate_operand (*loc, mode));
27784 gcc_unreachable ();
27787 /* Avoid overflows. */
27788 if (mode_signbit_p (mode, *loc))
27791 val = INTVAL (*loc);
27793 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
27794 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
27795 if ((val < 0 && val != -128)
27798 *loc = GEN_INT (-val);
27805 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27806 optabs would emit if we didn't have TFmode patterns. */
27809 x86_emit_floatuns (rtx operands[2])
27811 rtx neglab, donelab, i0, i1, f0, in, out;
27812 enum machine_mode mode, inmode;
27814 inmode = GET_MODE (operands[1]);
27815 gcc_assert (inmode == SImode || inmode == DImode);
27818 in = force_reg (inmode, operands[1]);
27819 mode = GET_MODE (out);
27820 neglab = gen_label_rtx ();
27821 donelab = gen_label_rtx ();
27822 f0 = gen_reg_rtx (mode);
27824 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27826 expand_float (out, in, 0);
27828 emit_jump_insn (gen_jump (donelab));
27831 emit_label (neglab);
27833 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27835 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27837 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27839 expand_float (f0, i0, 0);
27841 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27843 emit_label (donelab);
27846 /* AVX does not support 32-byte integer vector operations,
27847 thus the longest vector we are faced with is V16QImode. */
27848 #define MAX_VECT_LEN 16
27850 struct expand_vec_perm_d
27852 rtx target, op0, op1;
27853 unsigned char perm[MAX_VECT_LEN];
27854 enum machine_mode vmode;
27855 unsigned char nelt;
27859 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
27860 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
27862 /* Get a vector mode of the same size as the original but with elements
27863 twice as wide. This is only guaranteed to apply to integral vectors. */
27865 static inline enum machine_mode
27866 get_mode_wider_vector (enum machine_mode o)
27868 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
27869 enum machine_mode n = GET_MODE_WIDER_MODE (o);
27870 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
27871 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
27875 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27876 with all elements equal to VAR. Return true if successful. */
27879 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27880 rtx target, rtx val)
27903 /* First attempt to recognize VAL as-is. */
27904 dup = gen_rtx_VEC_DUPLICATE (mode, val);
27905 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
27906 if (recog_memoized (insn) < 0)
27909 /* If that fails, force VAL into a register. */
27912 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
27913 seq = get_insns ();
27916 emit_insn_before (seq, insn);
27918 ok = recog_memoized (insn) >= 0;
27927 if (TARGET_SSE || TARGET_3DNOW_A)
27931 val = gen_lowpart (SImode, val);
27932 x = gen_rtx_TRUNCATE (HImode, val);
27933 x = gen_rtx_VEC_DUPLICATE (mode, x);
27934 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27947 struct expand_vec_perm_d dperm;
27951 memset (&dperm, 0, sizeof (dperm));
27952 dperm.target = target;
27953 dperm.vmode = mode;
27954 dperm.nelt = GET_MODE_NUNITS (mode);
27955 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
27957 /* Extend to SImode using a paradoxical SUBREG. */
27958 tmp1 = gen_reg_rtx (SImode);
27959 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27961 /* Insert the SImode value as low element of a V4SImode vector. */
27962 tmp2 = gen_lowpart (V4SImode, dperm.op0);
27963 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
27965 ok = (expand_vec_perm_1 (&dperm)
27966 || expand_vec_perm_broadcast_1 (&dperm));
27978 /* Replicate the value once into the next wider mode and recurse. */
27980 enum machine_mode smode, wsmode, wvmode;
27983 smode = GET_MODE_INNER (mode);
27984 wvmode = get_mode_wider_vector (mode);
27985 wsmode = GET_MODE_INNER (wvmode);
27987 val = convert_modes (wsmode, smode, val, true);
27988 x = expand_simple_binop (wsmode, ASHIFT, val,
27989 GEN_INT (GET_MODE_BITSIZE (smode)),
27990 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27991 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27993 x = gen_lowpart (wvmode, target);
27994 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28002 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28003 rtx x = gen_reg_rtx (hvmode);
28005 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28008 x = gen_rtx_VEC_CONCAT (mode, x, x);
28009 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28018 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28019 whose ONE_VAR element is VAR, and other elements are zero. Return true
28023 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28024 rtx target, rtx var, int one_var)
28026 enum machine_mode vsimode;
28029 bool use_vector_set = false;
28034 /* For SSE4.1, we normally use vector set. But if the second
28035 element is zero and inter-unit moves are OK, we use movq
28037 use_vector_set = (TARGET_64BIT
28039 && !(TARGET_INTER_UNIT_MOVES
28045 use_vector_set = TARGET_SSE4_1;
28048 use_vector_set = TARGET_SSE2;
28051 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28058 use_vector_set = TARGET_AVX;
28061 /* Use ix86_expand_vector_set in 64bit mode only. */
28062 use_vector_set = TARGET_AVX && TARGET_64BIT;
28068 if (use_vector_set)
28070 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28071 var = force_reg (GET_MODE_INNER (mode), var);
28072 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28088 var = force_reg (GET_MODE_INNER (mode), var);
28089 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28090 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28095 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28096 new_target = gen_reg_rtx (mode);
28098 new_target = target;
28099 var = force_reg (GET_MODE_INNER (mode), var);
28100 x = gen_rtx_VEC_DUPLICATE (mode, var);
28101 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28102 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28105 /* We need to shuffle the value to the correct position, so
28106 create a new pseudo to store the intermediate result. */
28108 /* With SSE2, we can use the integer shuffle insns. */
28109 if (mode != V4SFmode && TARGET_SSE2)
28111 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28113 GEN_INT (one_var == 1 ? 0 : 1),
28114 GEN_INT (one_var == 2 ? 0 : 1),
28115 GEN_INT (one_var == 3 ? 0 : 1)));
28116 if (target != new_target)
28117 emit_move_insn (target, new_target);
28121 /* Otherwise convert the intermediate result to V4SFmode and
28122 use the SSE1 shuffle instructions. */
28123 if (mode != V4SFmode)
28125 tmp = gen_reg_rtx (V4SFmode);
28126 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28131 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28133 GEN_INT (one_var == 1 ? 0 : 1),
28134 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28135 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28137 if (mode != V4SFmode)
28138 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28139 else if (tmp != target)
28140 emit_move_insn (target, tmp);
28142 else if (target != new_target)
28143 emit_move_insn (target, new_target);
28148 vsimode = V4SImode;
28154 vsimode = V2SImode;
28160 /* Zero extend the variable element to SImode and recurse. */
28161 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28163 x = gen_reg_rtx (vsimode);
28164 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28166 gcc_unreachable ();
28168 emit_move_insn (target, gen_lowpart (mode, x));
28176 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28177 consisting of the values in VALS. It is known that all elements
28178 except ONE_VAR are constants. Return true if successful. */
28181 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28182 rtx target, rtx vals, int one_var)
28184 rtx var = XVECEXP (vals, 0, one_var);
28185 enum machine_mode wmode;
28188 const_vec = copy_rtx (vals);
28189 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28190 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28198 /* For the two element vectors, it's just as easy to use
28199 the general case. */
28203 /* Use ix86_expand_vector_set in 64bit mode only. */
28226 /* There's no way to set one QImode entry easily. Combine
28227 the variable value with its adjacent constant value, and
28228 promote to an HImode set. */
28229 x = XVECEXP (vals, 0, one_var ^ 1);
28232 var = convert_modes (HImode, QImode, var, true);
28233 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28234 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28235 x = GEN_INT (INTVAL (x) & 0xff);
28239 var = convert_modes (HImode, QImode, var, true);
28240 x = gen_int_mode (INTVAL (x) << 8, HImode);
28242 if (x != const0_rtx)
28243 var = expand_simple_binop (HImode, IOR, var, x, var,
28244 1, OPTAB_LIB_WIDEN);
28246 x = gen_reg_rtx (wmode);
28247 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28248 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28250 emit_move_insn (target, gen_lowpart (mode, x));
28257 emit_move_insn (target, const_vec);
28258 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28262 /* A subroutine of ix86_expand_vector_init_general. Use vector
28263 concatenate to handle the most general case: all values variable,
28264 and none identical. */
28267 ix86_expand_vector_init_concat (enum machine_mode mode,
28268 rtx target, rtx *ops, int n)
28270 enum machine_mode cmode, hmode = VOIDmode;
28271 rtx first[8], second[4];
28311 gcc_unreachable ();
28314 if (!register_operand (ops[1], cmode))
28315 ops[1] = force_reg (cmode, ops[1]);
28316 if (!register_operand (ops[0], cmode))
28317 ops[0] = force_reg (cmode, ops[0]);
28318 emit_insn (gen_rtx_SET (VOIDmode, target,
28319 gen_rtx_VEC_CONCAT (mode, ops[0],
28339 gcc_unreachable ();
28355 gcc_unreachable ();
28360 /* FIXME: We process inputs backward to help RA. PR 36222. */
28363 for (; i > 0; i -= 2, j--)
28365 first[j] = gen_reg_rtx (cmode);
28366 v = gen_rtvec (2, ops[i - 1], ops[i]);
28367 ix86_expand_vector_init (false, first[j],
28368 gen_rtx_PARALLEL (cmode, v));
28374 gcc_assert (hmode != VOIDmode);
28375 for (i = j = 0; i < n; i += 2, j++)
28377 second[j] = gen_reg_rtx (hmode);
28378 ix86_expand_vector_init_concat (hmode, second [j],
28382 ix86_expand_vector_init_concat (mode, target, second, n);
28385 ix86_expand_vector_init_concat (mode, target, first, n);
28389 gcc_unreachable ();
28393 /* A subroutine of ix86_expand_vector_init_general. Use vector
28394 interleave to handle the most general case: all values variable,
28395 and none identical. */
28398 ix86_expand_vector_init_interleave (enum machine_mode mode,
28399 rtx target, rtx *ops, int n)
28401 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28404 rtx (*gen_load_even) (rtx, rtx, rtx);
28405 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28406 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28411 gen_load_even = gen_vec_setv8hi;
28412 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28413 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28414 inner_mode = HImode;
28415 first_imode = V4SImode;
28416 second_imode = V2DImode;
28417 third_imode = VOIDmode;
28420 gen_load_even = gen_vec_setv16qi;
28421 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28422 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28423 inner_mode = QImode;
28424 first_imode = V8HImode;
28425 second_imode = V4SImode;
28426 third_imode = V2DImode;
28429 gcc_unreachable ();
28432 for (i = 0; i < n; i++)
28434 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28435 op0 = gen_reg_rtx (SImode);
28436 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28438 /* Insert the SImode value as low element of V4SImode vector. */
28439 op1 = gen_reg_rtx (V4SImode);
28440 op0 = gen_rtx_VEC_MERGE (V4SImode,
28441 gen_rtx_VEC_DUPLICATE (V4SImode,
28443 CONST0_RTX (V4SImode),
28445 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28447 /* Cast the V4SImode vector back to a vector in orignal mode. */
28448 op0 = gen_reg_rtx (mode);
28449 emit_move_insn (op0, gen_lowpart (mode, op1));
28451 /* Load even elements into the second positon. */
28452 emit_insn (gen_load_even (op0,
28453 force_reg (inner_mode,
28457 /* Cast vector to FIRST_IMODE vector. */
28458 ops[i] = gen_reg_rtx (first_imode);
28459 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28462 /* Interleave low FIRST_IMODE vectors. */
28463 for (i = j = 0; i < n; i += 2, j++)
28465 op0 = gen_reg_rtx (first_imode);
28466 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
28468 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28469 ops[j] = gen_reg_rtx (second_imode);
28470 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28473 /* Interleave low SECOND_IMODE vectors. */
28474 switch (second_imode)
28477 for (i = j = 0; i < n / 2; i += 2, j++)
28479 op0 = gen_reg_rtx (second_imode);
28480 emit_insn (gen_interleave_second_low (op0, ops[i],
28483 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28485 ops[j] = gen_reg_rtx (third_imode);
28486 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28488 second_imode = V2DImode;
28489 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28493 op0 = gen_reg_rtx (second_imode);
28494 emit_insn (gen_interleave_second_low (op0, ops[0],
28497 /* Cast the SECOND_IMODE vector back to a vector on original
28499 emit_insn (gen_rtx_SET (VOIDmode, target,
28500 gen_lowpart (mode, op0)));
28504 gcc_unreachable ();
28508 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28509 all values variable, and none identical. */
28512 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28513 rtx target, rtx vals)
28515 rtx ops[32], op0, op1;
28516 enum machine_mode half_mode = VOIDmode;
28523 if (!mmx_ok && !TARGET_SSE)
28535 n = GET_MODE_NUNITS (mode);
28536 for (i = 0; i < n; i++)
28537 ops[i] = XVECEXP (vals, 0, i);
28538 ix86_expand_vector_init_concat (mode, target, ops, n);
28542 half_mode = V16QImode;
28546 half_mode = V8HImode;
28550 n = GET_MODE_NUNITS (mode);
28551 for (i = 0; i < n; i++)
28552 ops[i] = XVECEXP (vals, 0, i);
28553 op0 = gen_reg_rtx (half_mode);
28554 op1 = gen_reg_rtx (half_mode);
28555 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28557 ix86_expand_vector_init_interleave (half_mode, op1,
28558 &ops [n >> 1], n >> 2);
28559 emit_insn (gen_rtx_SET (VOIDmode, target,
28560 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28564 if (!TARGET_SSE4_1)
28572 /* Don't use ix86_expand_vector_init_interleave if we can't
28573 move from GPR to SSE register directly. */
28574 if (!TARGET_INTER_UNIT_MOVES)
28577 n = GET_MODE_NUNITS (mode);
28578 for (i = 0; i < n; i++)
28579 ops[i] = XVECEXP (vals, 0, i);
28580 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28588 gcc_unreachable ();
28592 int i, j, n_elts, n_words, n_elt_per_word;
28593 enum machine_mode inner_mode;
28594 rtx words[4], shift;
28596 inner_mode = GET_MODE_INNER (mode);
28597 n_elts = GET_MODE_NUNITS (mode);
28598 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28599 n_elt_per_word = n_elts / n_words;
28600 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28602 for (i = 0; i < n_words; ++i)
28604 rtx word = NULL_RTX;
28606 for (j = 0; j < n_elt_per_word; ++j)
28608 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28609 elt = convert_modes (word_mode, inner_mode, elt, true);
28615 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28616 word, 1, OPTAB_LIB_WIDEN);
28617 word = expand_simple_binop (word_mode, IOR, word, elt,
28618 word, 1, OPTAB_LIB_WIDEN);
28626 emit_move_insn (target, gen_lowpart (mode, words[0]));
28627 else if (n_words == 2)
28629 rtx tmp = gen_reg_rtx (mode);
28630 emit_clobber (tmp);
28631 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28632 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28633 emit_move_insn (target, tmp);
28635 else if (n_words == 4)
28637 rtx tmp = gen_reg_rtx (V4SImode);
28638 gcc_assert (word_mode == SImode);
28639 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28640 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28641 emit_move_insn (target, gen_lowpart (mode, tmp));
28644 gcc_unreachable ();
28648 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28649 instructions unless MMX_OK is true. */
28652 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28654 enum machine_mode mode = GET_MODE (target);
28655 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28656 int n_elts = GET_MODE_NUNITS (mode);
28657 int n_var = 0, one_var = -1;
28658 bool all_same = true, all_const_zero = true;
28662 for (i = 0; i < n_elts; ++i)
28664 x = XVECEXP (vals, 0, i);
28665 if (!(CONST_INT_P (x)
28666 || GET_CODE (x) == CONST_DOUBLE
28667 || GET_CODE (x) == CONST_FIXED))
28668 n_var++, one_var = i;
28669 else if (x != CONST0_RTX (inner_mode))
28670 all_const_zero = false;
28671 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28675 /* Constants are best loaded from the constant pool. */
28678 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28682 /* If all values are identical, broadcast the value. */
28684 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28685 XVECEXP (vals, 0, 0)))
28688 /* Values where only one field is non-constant are best loaded from
28689 the pool and overwritten via move later. */
28693 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28694 XVECEXP (vals, 0, one_var),
28698 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28702 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28706 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28708 enum machine_mode mode = GET_MODE (target);
28709 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28710 enum machine_mode half_mode;
28711 bool use_vec_merge = false;
28713 static rtx (*gen_extract[6][2]) (rtx, rtx)
28715 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28716 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28717 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28718 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28719 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28720 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28722 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28724 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28725 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28726 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28727 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28728 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28729 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28739 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28740 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28742 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28744 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28745 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28751 use_vec_merge = TARGET_SSE4_1;
28759 /* For the two element vectors, we implement a VEC_CONCAT with
28760 the extraction of the other element. */
28762 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28763 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28766 op0 = val, op1 = tmp;
28768 op0 = tmp, op1 = val;
28770 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28771 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28776 use_vec_merge = TARGET_SSE4_1;
28783 use_vec_merge = true;
28787 /* tmp = target = A B C D */
28788 tmp = copy_to_reg (target);
28789 /* target = A A B B */
28790 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
28791 /* target = X A B B */
28792 ix86_expand_vector_set (false, target, val, 0);
28793 /* target = A X C D */
28794 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28795 const1_rtx, const0_rtx,
28796 GEN_INT (2+4), GEN_INT (3+4)));
28800 /* tmp = target = A B C D */
28801 tmp = copy_to_reg (target);
28802 /* tmp = X B C D */
28803 ix86_expand_vector_set (false, tmp, val, 0);
28804 /* target = A B X D */
28805 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28806 const0_rtx, const1_rtx,
28807 GEN_INT (0+4), GEN_INT (3+4)));
28811 /* tmp = target = A B C D */
28812 tmp = copy_to_reg (target);
28813 /* tmp = X B C D */
28814 ix86_expand_vector_set (false, tmp, val, 0);
28815 /* target = A B X D */
28816 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28817 const0_rtx, const1_rtx,
28818 GEN_INT (2+4), GEN_INT (0+4)));
28822 gcc_unreachable ();
28827 use_vec_merge = TARGET_SSE4_1;
28831 /* Element 0 handled by vec_merge below. */
28834 use_vec_merge = true;
28840 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28841 store into element 0, then shuffle them back. */
28845 order[0] = GEN_INT (elt);
28846 order[1] = const1_rtx;
28847 order[2] = const2_rtx;
28848 order[3] = GEN_INT (3);
28849 order[elt] = const0_rtx;
28851 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28852 order[1], order[2], order[3]));
28854 ix86_expand_vector_set (false, target, val, 0);
28856 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28857 order[1], order[2], order[3]));
28861 /* For SSE1, we have to reuse the V4SF code. */
28862 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28863 gen_lowpart (SFmode, val), elt);
28868 use_vec_merge = TARGET_SSE2;
28871 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28875 use_vec_merge = TARGET_SSE4_1;
28882 half_mode = V16QImode;
28888 half_mode = V8HImode;
28894 half_mode = V4SImode;
28900 half_mode = V2DImode;
28906 half_mode = V4SFmode;
28912 half_mode = V2DFmode;
28918 /* Compute offset. */
28922 gcc_assert (i <= 1);
28924 /* Extract the half. */
28925 tmp = gen_reg_rtx (half_mode);
28926 emit_insn (gen_extract[j][i] (tmp, target));
28928 /* Put val in tmp at elt. */
28929 ix86_expand_vector_set (false, tmp, val, elt);
28932 emit_insn (gen_insert[j][i] (target, target, tmp));
28941 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28942 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28943 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28947 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28949 emit_move_insn (mem, target);
28951 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28952 emit_move_insn (tmp, val);
28954 emit_move_insn (target, mem);
28959 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28961 enum machine_mode mode = GET_MODE (vec);
28962 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28963 bool use_vec_extr = false;
28976 use_vec_extr = true;
28980 use_vec_extr = TARGET_SSE4_1;
28992 tmp = gen_reg_rtx (mode);
28993 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28994 GEN_INT (elt), GEN_INT (elt),
28995 GEN_INT (elt+4), GEN_INT (elt+4)));
28999 tmp = gen_reg_rtx (mode);
29000 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29004 gcc_unreachable ();
29007 use_vec_extr = true;
29012 use_vec_extr = TARGET_SSE4_1;
29026 tmp = gen_reg_rtx (mode);
29027 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29028 GEN_INT (elt), GEN_INT (elt),
29029 GEN_INT (elt), GEN_INT (elt)));
29033 tmp = gen_reg_rtx (mode);
29034 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29038 gcc_unreachable ();
29041 use_vec_extr = true;
29046 /* For SSE1, we have to reuse the V4SF code. */
29047 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29048 gen_lowpart (V4SFmode, vec), elt);
29054 use_vec_extr = TARGET_SSE2;
29057 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29061 use_vec_extr = TARGET_SSE4_1;
29065 /* ??? Could extract the appropriate HImode element and shift. */
29072 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29073 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29075 /* Let the rtl optimizers know about the zero extension performed. */
29076 if (inner_mode == QImode || inner_mode == HImode)
29078 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29079 target = gen_lowpart (SImode, target);
29082 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29086 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29088 emit_move_insn (mem, vec);
29090 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29091 emit_move_insn (target, tmp);
29095 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29096 pattern to reduce; DEST is the destination; IN is the input vector. */
29099 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29101 rtx tmp1, tmp2, tmp3;
29103 tmp1 = gen_reg_rtx (V4SFmode);
29104 tmp2 = gen_reg_rtx (V4SFmode);
29105 tmp3 = gen_reg_rtx (V4SFmode);
29107 emit_insn (gen_sse_movhlps (tmp1, in, in));
29108 emit_insn (fn (tmp2, tmp1, in));
29110 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29111 const1_rtx, const1_rtx,
29112 GEN_INT (1+4), GEN_INT (1+4)));
29113 emit_insn (fn (dest, tmp2, tmp3));
29116 /* Target hook for scalar_mode_supported_p. */
29118 ix86_scalar_mode_supported_p (enum machine_mode mode)
29120 if (DECIMAL_FLOAT_MODE_P (mode))
29121 return default_decimal_float_supported_p ();
29122 else if (mode == TFmode)
29125 return default_scalar_mode_supported_p (mode);
29128 /* Implements target hook vector_mode_supported_p. */
29130 ix86_vector_mode_supported_p (enum machine_mode mode)
29132 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29134 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29136 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29138 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29140 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29145 /* Target hook for c_mode_for_suffix. */
29146 static enum machine_mode
29147 ix86_c_mode_for_suffix (char suffix)
29157 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29159 We do this in the new i386 backend to maintain source compatibility
29160 with the old cc0-based compiler. */
29163 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29164 tree inputs ATTRIBUTE_UNUSED,
29167 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29169 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29174 /* Implements target vector targetm.asm.encode_section_info. This
29175 is not used by netware. */
29177 static void ATTRIBUTE_UNUSED
29178 ix86_encode_section_info (tree decl, rtx rtl, int first)
29180 default_encode_section_info (decl, rtl, first);
29182 if (TREE_CODE (decl) == VAR_DECL
29183 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29184 && ix86_in_large_data_p (decl))
29185 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29188 /* Worker function for REVERSE_CONDITION. */
29191 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29193 return (mode != CCFPmode && mode != CCFPUmode
29194 ? reverse_condition (code)
29195 : reverse_condition_maybe_unordered (code));
29198 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29202 output_387_reg_move (rtx insn, rtx *operands)
29204 if (REG_P (operands[0]))
29206 if (REG_P (operands[1])
29207 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29209 if (REGNO (operands[0]) == FIRST_STACK_REG)
29210 return output_387_ffreep (operands, 0);
29211 return "fstp\t%y0";
29213 if (STACK_TOP_P (operands[0]))
29214 return "fld%Z1\t%y1";
29217 else if (MEM_P (operands[0]))
29219 gcc_assert (REG_P (operands[1]));
29220 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29221 return "fstp%Z0\t%y0";
29224 /* There is no non-popping store to memory for XFmode.
29225 So if we need one, follow the store with a load. */
29226 if (GET_MODE (operands[0]) == XFmode)
29227 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29229 return "fst%Z0\t%y0";
29236 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29237 FP status register is set. */
29240 ix86_emit_fp_unordered_jump (rtx label)
29242 rtx reg = gen_reg_rtx (HImode);
29245 emit_insn (gen_x86_fnstsw_1 (reg));
29247 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29249 emit_insn (gen_x86_sahf_1 (reg));
29251 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29252 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29256 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29258 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29259 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29262 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29263 gen_rtx_LABEL_REF (VOIDmode, label),
29265 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29267 emit_jump_insn (temp);
29268 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29271 /* Output code to perform a log1p XFmode calculation. */
29273 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29275 rtx label1 = gen_label_rtx ();
29276 rtx label2 = gen_label_rtx ();
29278 rtx tmp = gen_reg_rtx (XFmode);
29279 rtx tmp2 = gen_reg_rtx (XFmode);
29282 emit_insn (gen_absxf2 (tmp, op1));
29283 test = gen_rtx_GE (VOIDmode, tmp,
29284 CONST_DOUBLE_FROM_REAL_VALUE (
29285 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29287 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29289 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29290 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29291 emit_jump (label2);
29293 emit_label (label1);
29294 emit_move_insn (tmp, CONST1_RTX (XFmode));
29295 emit_insn (gen_addxf3 (tmp, op1, tmp));
29296 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29297 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29299 emit_label (label2);
29302 /* Output code to perform a Newton-Rhapson approximation of a single precision
29303 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29305 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29307 rtx x0, x1, e0, e1, two;
29309 x0 = gen_reg_rtx (mode);
29310 e0 = gen_reg_rtx (mode);
29311 e1 = gen_reg_rtx (mode);
29312 x1 = gen_reg_rtx (mode);
29314 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29316 if (VECTOR_MODE_P (mode))
29317 two = ix86_build_const_vector (SFmode, true, two);
29319 two = force_reg (mode, two);
29321 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29323 /* x0 = rcp(b) estimate */
29324 emit_insn (gen_rtx_SET (VOIDmode, x0,
29325 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29328 emit_insn (gen_rtx_SET (VOIDmode, e0,
29329 gen_rtx_MULT (mode, x0, a)));
29331 emit_insn (gen_rtx_SET (VOIDmode, e1,
29332 gen_rtx_MULT (mode, x0, b)));
29334 emit_insn (gen_rtx_SET (VOIDmode, x1,
29335 gen_rtx_MINUS (mode, two, e1)));
29336 /* res = e0 * x1 */
29337 emit_insn (gen_rtx_SET (VOIDmode, res,
29338 gen_rtx_MULT (mode, e0, x1)));
29341 /* Output code to perform a Newton-Rhapson approximation of a
29342 single precision floating point [reciprocal] square root. */
29344 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29347 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29350 x0 = gen_reg_rtx (mode);
29351 e0 = gen_reg_rtx (mode);
29352 e1 = gen_reg_rtx (mode);
29353 e2 = gen_reg_rtx (mode);
29354 e3 = gen_reg_rtx (mode);
29356 real_from_integer (&r, VOIDmode, -3, -1, 0);
29357 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29359 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29360 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29362 if (VECTOR_MODE_P (mode))
29364 mthree = ix86_build_const_vector (SFmode, true, mthree);
29365 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29368 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29369 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29371 /* x0 = rsqrt(a) estimate */
29372 emit_insn (gen_rtx_SET (VOIDmode, x0,
29373 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29376 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29381 zero = gen_reg_rtx (mode);
29382 mask = gen_reg_rtx (mode);
29384 zero = force_reg (mode, CONST0_RTX(mode));
29385 emit_insn (gen_rtx_SET (VOIDmode, mask,
29386 gen_rtx_NE (mode, zero, a)));
29388 emit_insn (gen_rtx_SET (VOIDmode, x0,
29389 gen_rtx_AND (mode, x0, mask)));
29393 emit_insn (gen_rtx_SET (VOIDmode, e0,
29394 gen_rtx_MULT (mode, x0, a)));
29396 emit_insn (gen_rtx_SET (VOIDmode, e1,
29397 gen_rtx_MULT (mode, e0, x0)));
29400 mthree = force_reg (mode, mthree);
29401 emit_insn (gen_rtx_SET (VOIDmode, e2,
29402 gen_rtx_PLUS (mode, e1, mthree)));
29404 mhalf = force_reg (mode, mhalf);
29406 /* e3 = -.5 * x0 */
29407 emit_insn (gen_rtx_SET (VOIDmode, e3,
29408 gen_rtx_MULT (mode, x0, mhalf)));
29410 /* e3 = -.5 * e0 */
29411 emit_insn (gen_rtx_SET (VOIDmode, e3,
29412 gen_rtx_MULT (mode, e0, mhalf)));
29413 /* ret = e2 * e3 */
29414 emit_insn (gen_rtx_SET (VOIDmode, res,
29415 gen_rtx_MULT (mode, e2, e3)));
29418 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29420 static void ATTRIBUTE_UNUSED
29421 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29424 /* With Binutils 2.15, the "@unwind" marker must be specified on
29425 every occurrence of the ".eh_frame" section, not just the first
29428 && strcmp (name, ".eh_frame") == 0)
29430 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29431 flags & SECTION_WRITE ? "aw" : "a");
29434 default_elf_asm_named_section (name, flags, decl);
29437 /* Return the mangling of TYPE if it is an extended fundamental type. */
29439 static const char *
29440 ix86_mangle_type (const_tree type)
29442 type = TYPE_MAIN_VARIANT (type);
29444 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29445 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29448 switch (TYPE_MODE (type))
29451 /* __float128 is "g". */
29454 /* "long double" or __float80 is "e". */
29461 /* For 32-bit code we can save PIC register setup by using
29462 __stack_chk_fail_local hidden function instead of calling
29463 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29464 register, so it is better to call __stack_chk_fail directly. */
29467 ix86_stack_protect_fail (void)
29469 return TARGET_64BIT
29470 ? default_external_stack_protect_fail ()
29471 : default_hidden_stack_protect_fail ();
29474 /* Select a format to encode pointers in exception handling data. CODE
29475 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29476 true if the symbol may be affected by dynamic relocations.
29478 ??? All x86 object file formats are capable of representing this.
29479 After all, the relocation needed is the same as for the call insn.
29480 Whether or not a particular assembler allows us to enter such, I
29481 guess we'll have to see. */
29483 asm_preferred_eh_data_format (int code, int global)
29487 int type = DW_EH_PE_sdata8;
29489 || ix86_cmodel == CM_SMALL_PIC
29490 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29491 type = DW_EH_PE_sdata4;
29492 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29494 if (ix86_cmodel == CM_SMALL
29495 || (ix86_cmodel == CM_MEDIUM && code))
29496 return DW_EH_PE_udata4;
29497 return DW_EH_PE_absptr;
29500 /* Expand copysign from SIGN to the positive value ABS_VALUE
29501 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29504 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29506 enum machine_mode mode = GET_MODE (sign);
29507 rtx sgn = gen_reg_rtx (mode);
29508 if (mask == NULL_RTX)
29510 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29511 if (!VECTOR_MODE_P (mode))
29513 /* We need to generate a scalar mode mask in this case. */
29514 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29515 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29516 mask = gen_reg_rtx (mode);
29517 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29521 mask = gen_rtx_NOT (mode, mask);
29522 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29523 gen_rtx_AND (mode, mask, sign)));
29524 emit_insn (gen_rtx_SET (VOIDmode, result,
29525 gen_rtx_IOR (mode, abs_value, sgn)));
29528 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29529 mask for masking out the sign-bit is stored in *SMASK, if that is
29532 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29534 enum machine_mode mode = GET_MODE (op0);
29537 xa = gen_reg_rtx (mode);
29538 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29539 if (!VECTOR_MODE_P (mode))
29541 /* We need to generate a scalar mode mask in this case. */
29542 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29543 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29544 mask = gen_reg_rtx (mode);
29545 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29547 emit_insn (gen_rtx_SET (VOIDmode, xa,
29548 gen_rtx_AND (mode, op0, mask)));
29556 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29557 swapping the operands if SWAP_OPERANDS is true. The expanded
29558 code is a forward jump to a newly created label in case the
29559 comparison is true. The generated label rtx is returned. */
29561 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29562 bool swap_operands)
29573 label = gen_label_rtx ();
29574 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29575 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29576 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29577 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29578 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29579 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29580 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29581 JUMP_LABEL (tmp) = label;
29586 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29587 using comparison code CODE. Operands are swapped for the comparison if
29588 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29590 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29591 bool swap_operands)
29593 enum machine_mode mode = GET_MODE (op0);
29594 rtx mask = gen_reg_rtx (mode);
29603 if (mode == DFmode)
29604 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29605 gen_rtx_fmt_ee (code, mode, op0, op1)));
29607 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29608 gen_rtx_fmt_ee (code, mode, op0, op1)));
29613 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29614 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29616 ix86_gen_TWO52 (enum machine_mode mode)
29618 REAL_VALUE_TYPE TWO52r;
29621 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29622 TWO52 = const_double_from_real_value (TWO52r, mode);
29623 TWO52 = force_reg (mode, TWO52);
29628 /* Expand SSE sequence for computing lround from OP1 storing
29631 ix86_expand_lround (rtx op0, rtx op1)
29633 /* C code for the stuff we're doing below:
29634 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29637 enum machine_mode mode = GET_MODE (op1);
29638 const struct real_format *fmt;
29639 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29642 /* load nextafter (0.5, 0.0) */
29643 fmt = REAL_MODE_FORMAT (mode);
29644 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29645 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29647 /* adj = copysign (0.5, op1) */
29648 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29649 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29651 /* adj = op1 + adj */
29652 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29654 /* op0 = (imode)adj */
29655 expand_fix (op0, adj, 0);
29658 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29661 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29663 /* C code for the stuff we're doing below (for do_floor):
29665 xi -= (double)xi > op1 ? 1 : 0;
29668 enum machine_mode fmode = GET_MODE (op1);
29669 enum machine_mode imode = GET_MODE (op0);
29670 rtx ireg, freg, label, tmp;
29672 /* reg = (long)op1 */
29673 ireg = gen_reg_rtx (imode);
29674 expand_fix (ireg, op1, 0);
29676 /* freg = (double)reg */
29677 freg = gen_reg_rtx (fmode);
29678 expand_float (freg, ireg, 0);
29680 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29681 label = ix86_expand_sse_compare_and_jump (UNLE,
29682 freg, op1, !do_floor);
29683 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29684 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29685 emit_move_insn (ireg, tmp);
29687 emit_label (label);
29688 LABEL_NUSES (label) = 1;
29690 emit_move_insn (op0, ireg);
29693 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29694 result in OPERAND0. */
29696 ix86_expand_rint (rtx operand0, rtx operand1)
29698 /* C code for the stuff we're doing below:
29699 xa = fabs (operand1);
29700 if (!isless (xa, 2**52))
29702 xa = xa + 2**52 - 2**52;
29703 return copysign (xa, operand1);
29705 enum machine_mode mode = GET_MODE (operand0);
29706 rtx res, xa, label, TWO52, mask;
29708 res = gen_reg_rtx (mode);
29709 emit_move_insn (res, operand1);
29711 /* xa = abs (operand1) */
29712 xa = ix86_expand_sse_fabs (res, &mask);
29714 /* if (!isless (xa, TWO52)) goto label; */
29715 TWO52 = ix86_gen_TWO52 (mode);
29716 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29718 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29719 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29721 ix86_sse_copysign_to_positive (res, xa, res, mask);
29723 emit_label (label);
29724 LABEL_NUSES (label) = 1;
29726 emit_move_insn (operand0, res);
29729 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29732 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29734 /* C code for the stuff we expand below.
29735 double xa = fabs (x), x2;
29736 if (!isless (xa, TWO52))
29738 xa = xa + TWO52 - TWO52;
29739 x2 = copysign (xa, x);
29748 enum machine_mode mode = GET_MODE (operand0);
29749 rtx xa, TWO52, tmp, label, one, res, mask;
29751 TWO52 = ix86_gen_TWO52 (mode);
29753 /* Temporary for holding the result, initialized to the input
29754 operand to ease control flow. */
29755 res = gen_reg_rtx (mode);
29756 emit_move_insn (res, operand1);
29758 /* xa = abs (operand1) */
29759 xa = ix86_expand_sse_fabs (res, &mask);
29761 /* if (!isless (xa, TWO52)) goto label; */
29762 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29764 /* xa = xa + TWO52 - TWO52; */
29765 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29766 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29768 /* xa = copysign (xa, operand1) */
29769 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29771 /* generate 1.0 or -1.0 */
29772 one = force_reg (mode,
29773 const_double_from_real_value (do_floor
29774 ? dconst1 : dconstm1, mode));
29776 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29777 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29778 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29779 gen_rtx_AND (mode, one, tmp)));
29780 /* We always need to subtract here to preserve signed zero. */
29781 tmp = expand_simple_binop (mode, MINUS,
29782 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29783 emit_move_insn (res, tmp);
29785 emit_label (label);
29786 LABEL_NUSES (label) = 1;
29788 emit_move_insn (operand0, res);
29791 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29794 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29796 /* C code for the stuff we expand below.
29797 double xa = fabs (x), x2;
29798 if (!isless (xa, TWO52))
29800 x2 = (double)(long)x;
29807 if (HONOR_SIGNED_ZEROS (mode))
29808 return copysign (x2, x);
29811 enum machine_mode mode = GET_MODE (operand0);
29812 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29814 TWO52 = ix86_gen_TWO52 (mode);
29816 /* Temporary for holding the result, initialized to the input
29817 operand to ease control flow. */
29818 res = gen_reg_rtx (mode);
29819 emit_move_insn (res, operand1);
29821 /* xa = abs (operand1) */
29822 xa = ix86_expand_sse_fabs (res, &mask);
29824 /* if (!isless (xa, TWO52)) goto label; */
29825 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29827 /* xa = (double)(long)x */
29828 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29829 expand_fix (xi, res, 0);
29830 expand_float (xa, xi, 0);
29833 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29835 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29836 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29837 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29838 gen_rtx_AND (mode, one, tmp)));
29839 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29840 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29841 emit_move_insn (res, tmp);
29843 if (HONOR_SIGNED_ZEROS (mode))
29844 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29846 emit_label (label);
29847 LABEL_NUSES (label) = 1;
29849 emit_move_insn (operand0, res);
29852 /* Expand SSE sequence for computing round from OPERAND1 storing
29853 into OPERAND0. Sequence that works without relying on DImode truncation
29854 via cvttsd2siq that is only available on 64bit targets. */
29856 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29858 /* C code for the stuff we expand below.
29859 double xa = fabs (x), xa2, x2;
29860 if (!isless (xa, TWO52))
29862 Using the absolute value and copying back sign makes
29863 -0.0 -> -0.0 correct.
29864 xa2 = xa + TWO52 - TWO52;
29869 else if (dxa > 0.5)
29871 x2 = copysign (xa2, x);
29874 enum machine_mode mode = GET_MODE (operand0);
29875 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29877 TWO52 = ix86_gen_TWO52 (mode);
29879 /* Temporary for holding the result, initialized to the input
29880 operand to ease control flow. */
29881 res = gen_reg_rtx (mode);
29882 emit_move_insn (res, operand1);
29884 /* xa = abs (operand1) */
29885 xa = ix86_expand_sse_fabs (res, &mask);
29887 /* if (!isless (xa, TWO52)) goto label; */
29888 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29890 /* xa2 = xa + TWO52 - TWO52; */
29891 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29892 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29894 /* dxa = xa2 - xa; */
29895 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29897 /* generate 0.5, 1.0 and -0.5 */
29898 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29899 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29900 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29904 tmp = gen_reg_rtx (mode);
29905 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29906 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29907 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29908 gen_rtx_AND (mode, one, tmp)));
29909 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29910 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29911 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29912 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29913 gen_rtx_AND (mode, one, tmp)));
29914 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29916 /* res = copysign (xa2, operand1) */
29917 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29919 emit_label (label);
29920 LABEL_NUSES (label) = 1;
29922 emit_move_insn (operand0, res);
29925 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29928 ix86_expand_trunc (rtx operand0, rtx operand1)
29930 /* C code for SSE variant we expand below.
29931 double xa = fabs (x), x2;
29932 if (!isless (xa, TWO52))
29934 x2 = (double)(long)x;
29935 if (HONOR_SIGNED_ZEROS (mode))
29936 return copysign (x2, x);
29939 enum machine_mode mode = GET_MODE (operand0);
29940 rtx xa, xi, TWO52, label, res, mask;
29942 TWO52 = ix86_gen_TWO52 (mode);
29944 /* Temporary for holding the result, initialized to the input
29945 operand to ease control flow. */
29946 res = gen_reg_rtx (mode);
29947 emit_move_insn (res, operand1);
29949 /* xa = abs (operand1) */
29950 xa = ix86_expand_sse_fabs (res, &mask);
29952 /* if (!isless (xa, TWO52)) goto label; */
29953 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29955 /* x = (double)(long)x */
29956 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29957 expand_fix (xi, res, 0);
29958 expand_float (res, xi, 0);
29960 if (HONOR_SIGNED_ZEROS (mode))
29961 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29963 emit_label (label);
29964 LABEL_NUSES (label) = 1;
29966 emit_move_insn (operand0, res);
29969 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29972 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29974 enum machine_mode mode = GET_MODE (operand0);
29975 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29977 /* C code for SSE variant we expand below.
29978 double xa = fabs (x), x2;
29979 if (!isless (xa, TWO52))
29981 xa2 = xa + TWO52 - TWO52;
29985 x2 = copysign (xa2, x);
29989 TWO52 = ix86_gen_TWO52 (mode);
29991 /* Temporary for holding the result, initialized to the input
29992 operand to ease control flow. */
29993 res = gen_reg_rtx (mode);
29994 emit_move_insn (res, operand1);
29996 /* xa = abs (operand1) */
29997 xa = ix86_expand_sse_fabs (res, &smask);
29999 /* if (!isless (xa, TWO52)) goto label; */
30000 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30002 /* res = xa + TWO52 - TWO52; */
30003 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30004 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30005 emit_move_insn (res, tmp);
30008 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30010 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30011 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30012 emit_insn (gen_rtx_SET (VOIDmode, mask,
30013 gen_rtx_AND (mode, mask, one)));
30014 tmp = expand_simple_binop (mode, MINUS,
30015 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30016 emit_move_insn (res, tmp);
30018 /* res = copysign (res, operand1) */
30019 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30021 emit_label (label);
30022 LABEL_NUSES (label) = 1;
30024 emit_move_insn (operand0, res);
30027 /* Expand SSE sequence for computing round from OPERAND1 storing
30030 ix86_expand_round (rtx operand0, rtx operand1)
30032 /* C code for the stuff we're doing below:
30033 double xa = fabs (x);
30034 if (!isless (xa, TWO52))
30036 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30037 return copysign (xa, x);
30039 enum machine_mode mode = GET_MODE (operand0);
30040 rtx res, TWO52, xa, label, xi, half, mask;
30041 const struct real_format *fmt;
30042 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30044 /* Temporary for holding the result, initialized to the input
30045 operand to ease control flow. */
30046 res = gen_reg_rtx (mode);
30047 emit_move_insn (res, operand1);
30049 TWO52 = ix86_gen_TWO52 (mode);
30050 xa = ix86_expand_sse_fabs (res, &mask);
30051 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30053 /* load nextafter (0.5, 0.0) */
30054 fmt = REAL_MODE_FORMAT (mode);
30055 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30056 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30058 /* xa = xa + 0.5 */
30059 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30060 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30062 /* xa = (double)(int64_t)xa */
30063 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30064 expand_fix (xi, xa, 0);
30065 expand_float (xa, xi, 0);
30067 /* res = copysign (xa, operand1) */
30068 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30070 emit_label (label);
30071 LABEL_NUSES (label) = 1;
30073 emit_move_insn (operand0, res);
30077 /* Table of valid machine attributes. */
30078 static const struct attribute_spec ix86_attribute_table[] =
30080 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30081 /* Stdcall attribute says callee is responsible for popping arguments
30082 if they are not variable. */
30083 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30084 /* Fastcall attribute says callee is responsible for popping arguments
30085 if they are not variable. */
30086 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30087 /* Thiscall attribute says callee is responsible for popping arguments
30088 if they are not variable. */
30089 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30090 /* Cdecl attribute says the callee is a normal C declaration */
30091 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30092 /* Regparm attribute specifies how many integer arguments are to be
30093 passed in registers. */
30094 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30095 /* Sseregparm attribute says we are using x86_64 calling conventions
30096 for FP arguments. */
30097 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30098 /* force_align_arg_pointer says this function realigns the stack at entry. */
30099 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30100 false, true, true, ix86_handle_cconv_attribute },
30101 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30102 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30103 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30104 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30106 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30107 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30108 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30109 SUBTARGET_ATTRIBUTE_TABLE,
30111 /* ms_abi and sysv_abi calling convention function attributes. */
30112 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30113 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30114 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30116 { NULL, 0, 0, false, false, false, NULL }
30119 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30121 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30122 tree vectype ATTRIBUTE_UNUSED,
30123 int misalign ATTRIBUTE_UNUSED)
30125 switch (type_of_cost)
30128 return ix86_cost->scalar_stmt_cost;
30131 return ix86_cost->scalar_load_cost;
30134 return ix86_cost->scalar_store_cost;
30137 return ix86_cost->vec_stmt_cost;
30140 return ix86_cost->vec_align_load_cost;
30143 return ix86_cost->vec_store_cost;
30145 case vec_to_scalar:
30146 return ix86_cost->vec_to_scalar_cost;
30148 case scalar_to_vec:
30149 return ix86_cost->scalar_to_vec_cost;
30151 case unaligned_load:
30152 case unaligned_store:
30153 return ix86_cost->vec_unalign_load_cost;
30155 case cond_branch_taken:
30156 return ix86_cost->cond_taken_branch_cost;
30158 case cond_branch_not_taken:
30159 return ix86_cost->cond_not_taken_branch_cost;
30165 gcc_unreachable ();
30170 /* Implement targetm.vectorize.builtin_vec_perm. */
30173 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30175 tree itype = TREE_TYPE (vec_type);
30176 bool u = TYPE_UNSIGNED (itype);
30177 enum machine_mode vmode = TYPE_MODE (vec_type);
30178 enum ix86_builtins fcode;
30179 bool ok = TARGET_SSE2;
30185 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30188 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30190 itype = ix86_get_builtin_type (IX86_BT_DI);
30195 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30199 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30201 itype = ix86_get_builtin_type (IX86_BT_SI);
30205 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30208 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30211 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30214 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30224 *mask_type = itype;
30225 return ix86_builtins[(int) fcode];
30228 /* Return a vector mode with twice as many elements as VMODE. */
30229 /* ??? Consider moving this to a table generated by genmodes.c. */
30231 static enum machine_mode
30232 doublesize_vector_mode (enum machine_mode vmode)
30236 case V2SFmode: return V4SFmode;
30237 case V1DImode: return V2DImode;
30238 case V2SImode: return V4SImode;
30239 case V4HImode: return V8HImode;
30240 case V8QImode: return V16QImode;
30242 case V2DFmode: return V4DFmode;
30243 case V4SFmode: return V8SFmode;
30244 case V2DImode: return V4DImode;
30245 case V4SImode: return V8SImode;
30246 case V8HImode: return V16HImode;
30247 case V16QImode: return V32QImode;
30249 case V4DFmode: return V8DFmode;
30250 case V8SFmode: return V16SFmode;
30251 case V4DImode: return V8DImode;
30252 case V8SImode: return V16SImode;
30253 case V16HImode: return V32HImode;
30254 case V32QImode: return V64QImode;
30257 gcc_unreachable ();
30261 /* Construct (set target (vec_select op0 (parallel perm))) and
30262 return true if that's a valid instruction in the active ISA. */
30265 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30267 rtx rperm[MAX_VECT_LEN], x;
30270 for (i = 0; i < nelt; ++i)
30271 rperm[i] = GEN_INT (perm[i]);
30273 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30274 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30275 x = gen_rtx_SET (VOIDmode, target, x);
30278 if (recog_memoized (x) < 0)
30286 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30289 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30290 const unsigned char *perm, unsigned nelt)
30292 enum machine_mode v2mode;
30295 v2mode = doublesize_vector_mode (GET_MODE (op0));
30296 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30297 return expand_vselect (target, x, perm, nelt);
30300 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30301 in terms of blendp[sd] / pblendw / pblendvb. */
30304 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30306 enum machine_mode vmode = d->vmode;
30307 unsigned i, mask, nelt = d->nelt;
30308 rtx target, op0, op1, x;
30310 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30312 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
30315 /* This is a blend, not a permute. Elements must stay in their
30316 respective lanes. */
30317 for (i = 0; i < nelt; ++i)
30319 unsigned e = d->perm[i];
30320 if (!(e == i || e == i + nelt))
30327 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
30328 decision should be extracted elsewhere, so that we only try that
30329 sequence once all budget==3 options have been tried. */
30331 /* For bytes, see if bytes move in pairs so we can use pblendw with
30332 an immediate argument, rather than pblendvb with a vector argument. */
30333 if (vmode == V16QImode)
30335 bool pblendw_ok = true;
30336 for (i = 0; i < 16 && pblendw_ok; i += 2)
30337 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
30341 rtx rperm[16], vperm;
30343 for (i = 0; i < nelt; ++i)
30344 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
30346 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30347 vperm = force_reg (V16QImode, vperm);
30349 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
30354 target = d->target;
30366 for (i = 0; i < nelt; ++i)
30367 mask |= (d->perm[i] >= nelt) << i;
30371 for (i = 0; i < 2; ++i)
30372 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
30376 for (i = 0; i < 4; ++i)
30377 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
30381 for (i = 0; i < 8; ++i)
30382 mask |= (d->perm[i * 2] >= 16) << i;
30386 target = gen_lowpart (vmode, target);
30387 op0 = gen_lowpart (vmode, op0);
30388 op1 = gen_lowpart (vmode, op1);
30392 gcc_unreachable ();
30395 /* This matches five different patterns with the different modes. */
30396 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
30397 x = gen_rtx_SET (VOIDmode, target, x);
30403 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30404 in terms of the variable form of vpermilps.
30406 Note that we will have already failed the immediate input vpermilps,
30407 which requires that the high and low part shuffle be identical; the
30408 variable form doesn't require that. */
30411 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
30413 rtx rperm[8], vperm;
30416 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
30419 /* We can only permute within the 128-bit lane. */
30420 for (i = 0; i < 8; ++i)
30422 unsigned e = d->perm[i];
30423 if (i < 4 ? e >= 4 : e < 4)
30430 for (i = 0; i < 8; ++i)
30432 unsigned e = d->perm[i];
30434 /* Within each 128-bit lane, the elements of op0 are numbered
30435 from 0 and the elements of op1 are numbered from 4. */
30441 rperm[i] = GEN_INT (e);
30444 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
30445 vperm = force_reg (V8SImode, vperm);
30446 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
30451 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30452 in terms of pshufb or vpperm. */
30455 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
30457 unsigned i, nelt, eltsz;
30458 rtx rperm[16], vperm, target, op0, op1;
30460 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
30462 if (GET_MODE_SIZE (d->vmode) != 16)
30469 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30471 for (i = 0; i < nelt; ++i)
30473 unsigned j, e = d->perm[i];
30474 for (j = 0; j < eltsz; ++j)
30475 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
30478 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30479 vperm = force_reg (V16QImode, vperm);
30481 target = gen_lowpart (V16QImode, d->target);
30482 op0 = gen_lowpart (V16QImode, d->op0);
30483 if (d->op0 == d->op1)
30484 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
30487 op1 = gen_lowpart (V16QImode, d->op1);
30488 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
30494 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
30495 in a single instruction. */
30498 expand_vec_perm_1 (struct expand_vec_perm_d *d)
30500 unsigned i, nelt = d->nelt;
30501 unsigned char perm2[MAX_VECT_LEN];
30503 /* Check plain VEC_SELECT first, because AVX has instructions that could
30504 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
30505 input where SEL+CONCAT may not. */
30506 if (d->op0 == d->op1)
30508 int mask = nelt - 1;
30510 for (i = 0; i < nelt; i++)
30511 perm2[i] = d->perm[i] & mask;
30513 if (expand_vselect (d->target, d->op0, perm2, nelt))
30516 /* There are plenty of patterns in sse.md that are written for
30517 SEL+CONCAT and are not replicated for a single op. Perhaps
30518 that should be changed, to avoid the nastiness here. */
30520 /* Recognize interleave style patterns, which means incrementing
30521 every other permutation operand. */
30522 for (i = 0; i < nelt; i += 2)
30524 perm2[i] = d->perm[i] & mask;
30525 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
30527 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30530 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
30533 for (i = 0; i < nelt; i += 4)
30535 perm2[i + 0] = d->perm[i + 0] & mask;
30536 perm2[i + 1] = d->perm[i + 1] & mask;
30537 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
30538 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
30541 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30546 /* Finally, try the fully general two operand permute. */
30547 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
30550 /* Recognize interleave style patterns with reversed operands. */
30551 if (d->op0 != d->op1)
30553 for (i = 0; i < nelt; ++i)
30555 unsigned e = d->perm[i];
30563 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
30567 /* Try the SSE4.1 blend variable merge instructions. */
30568 if (expand_vec_perm_blend (d))
30571 /* Try one of the AVX vpermil variable permutations. */
30572 if (expand_vec_perm_vpermil (d))
30575 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
30576 if (expand_vec_perm_pshufb (d))
30582 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30583 in terms of a pair of pshuflw + pshufhw instructions. */
30586 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
30588 unsigned char perm2[MAX_VECT_LEN];
30592 if (d->vmode != V8HImode || d->op0 != d->op1)
30595 /* The two permutations only operate in 64-bit lanes. */
30596 for (i = 0; i < 4; ++i)
30597 if (d->perm[i] >= 4)
30599 for (i = 4; i < 8; ++i)
30600 if (d->perm[i] < 4)
30606 /* Emit the pshuflw. */
30607 memcpy (perm2, d->perm, 4);
30608 for (i = 4; i < 8; ++i)
30610 ok = expand_vselect (d->target, d->op0, perm2, 8);
30613 /* Emit the pshufhw. */
30614 memcpy (perm2 + 4, d->perm + 4, 4);
30615 for (i = 0; i < 4; ++i)
30617 ok = expand_vselect (d->target, d->target, perm2, 8);
30623 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30624 the permutation using the SSSE3 palignr instruction. This succeeds
30625 when all of the elements in PERM fit within one vector and we merely
30626 need to shift them down so that a single vector permutation has a
30627 chance to succeed. */
30630 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
30632 unsigned i, nelt = d->nelt;
30637 /* Even with AVX, palignr only operates on 128-bit vectors. */
30638 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30641 min = nelt, max = 0;
30642 for (i = 0; i < nelt; ++i)
30644 unsigned e = d->perm[i];
30650 if (min == 0 || max - min >= nelt)
30653 /* Given that we have SSSE3, we know we'll be able to implement the
30654 single operand permutation after the palignr with pshufb. */
30658 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
30659 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
30660 gen_lowpart (TImode, d->op1),
30661 gen_lowpart (TImode, d->op0), shift));
30663 d->op0 = d->op1 = d->target;
30666 for (i = 0; i < nelt; ++i)
30668 unsigned e = d->perm[i] - min;
30674 /* Test for the degenerate case where the alignment by itself
30675 produces the desired permutation. */
30679 ok = expand_vec_perm_1 (d);
30685 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30686 a two vector permutation into a single vector permutation by using
30687 an interleave operation to merge the vectors. */
30690 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
30692 struct expand_vec_perm_d dremap, dfinal;
30693 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
30694 unsigned contents, h1, h2, h3, h4;
30695 unsigned char remap[2 * MAX_VECT_LEN];
30699 if (d->op0 == d->op1)
30702 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
30703 lanes. We can use similar techniques with the vperm2f128 instruction,
30704 but it requires slightly different logic. */
30705 if (GET_MODE_SIZE (d->vmode) != 16)
30708 /* Examine from whence the elements come. */
30710 for (i = 0; i < nelt; ++i)
30711 contents |= 1u << d->perm[i];
30713 /* Split the two input vectors into 4 halves. */
30714 h1 = (1u << nelt2) - 1;
30719 memset (remap, 0xff, sizeof (remap));
30722 /* If the elements from the low halves use interleave low, and similarly
30723 for interleave high. If the elements are from mis-matched halves, we
30724 can use shufps for V4SF/V4SI or do a DImode shuffle. */
30725 if ((contents & (h1 | h3)) == contents)
30727 for (i = 0; i < nelt2; ++i)
30730 remap[i + nelt] = i * 2 + 1;
30731 dremap.perm[i * 2] = i;
30732 dremap.perm[i * 2 + 1] = i + nelt;
30735 else if ((contents & (h2 | h4)) == contents)
30737 for (i = 0; i < nelt2; ++i)
30739 remap[i + nelt2] = i * 2;
30740 remap[i + nelt + nelt2] = i * 2 + 1;
30741 dremap.perm[i * 2] = i + nelt2;
30742 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
30745 else if ((contents & (h1 | h4)) == contents)
30747 for (i = 0; i < nelt2; ++i)
30750 remap[i + nelt + nelt2] = i + nelt2;
30751 dremap.perm[i] = i;
30752 dremap.perm[i + nelt2] = i + nelt + nelt2;
30756 dremap.vmode = V2DImode;
30758 dremap.perm[0] = 0;
30759 dremap.perm[1] = 3;
30762 else if ((contents & (h2 | h3)) == contents)
30764 for (i = 0; i < nelt2; ++i)
30766 remap[i + nelt2] = i;
30767 remap[i + nelt] = i + nelt2;
30768 dremap.perm[i] = i + nelt2;
30769 dremap.perm[i + nelt2] = i + nelt;
30773 dremap.vmode = V2DImode;
30775 dremap.perm[0] = 1;
30776 dremap.perm[1] = 2;
30782 /* Use the remapping array set up above to move the elements from their
30783 swizzled locations into their final destinations. */
30785 for (i = 0; i < nelt; ++i)
30787 unsigned e = remap[d->perm[i]];
30788 gcc_assert (e < nelt);
30789 dfinal.perm[i] = e;
30791 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
30792 dfinal.op1 = dfinal.op0;
30793 dremap.target = dfinal.op0;
30795 /* Test if the final remap can be done with a single insn. For V4SFmode or
30796 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
30798 ok = expand_vec_perm_1 (&dfinal);
30799 seq = get_insns ();
30805 if (dremap.vmode != dfinal.vmode)
30807 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
30808 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
30809 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
30812 ok = expand_vec_perm_1 (&dremap);
30819 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
30820 permutation with two pshufb insns and an ior. We should have already
30821 failed all two instruction sequences. */
30824 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
30826 rtx rperm[2][16], vperm, l, h, op, m128;
30827 unsigned int i, nelt, eltsz;
30829 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30831 gcc_assert (d->op0 != d->op1);
30834 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30836 /* Generate two permutation masks. If the required element is within
30837 the given vector it is shuffled into the proper lane. If the required
30838 element is in the other vector, force a zero into the lane by setting
30839 bit 7 in the permutation mask. */
30840 m128 = GEN_INT (-128);
30841 for (i = 0; i < nelt; ++i)
30843 unsigned j, e = d->perm[i];
30844 unsigned which = (e >= nelt);
30848 for (j = 0; j < eltsz; ++j)
30850 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
30851 rperm[1-which][i*eltsz + j] = m128;
30855 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
30856 vperm = force_reg (V16QImode, vperm);
30858 l = gen_reg_rtx (V16QImode);
30859 op = gen_lowpart (V16QImode, d->op0);
30860 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
30862 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
30863 vperm = force_reg (V16QImode, vperm);
30865 h = gen_reg_rtx (V16QImode);
30866 op = gen_lowpart (V16QImode, d->op1);
30867 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
30869 op = gen_lowpart (V16QImode, d->target);
30870 emit_insn (gen_iorv16qi3 (op, l, h));
30875 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
30876 and extract-odd permutations. */
30879 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
30881 rtx t1, t2, t3, t4;
30886 t1 = gen_reg_rtx (V4DFmode);
30887 t2 = gen_reg_rtx (V4DFmode);
30889 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
30890 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
30891 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
30893 /* Now an unpck[lh]pd will produce the result required. */
30895 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
30897 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
30903 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
30904 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
30905 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
30907 t1 = gen_reg_rtx (V8SFmode);
30908 t2 = gen_reg_rtx (V8SFmode);
30909 t3 = gen_reg_rtx (V8SFmode);
30910 t4 = gen_reg_rtx (V8SFmode);
30912 /* Shuffle within the 128-bit lanes to produce:
30913 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
30914 expand_vselect (t1, d->op0, perm1, 8);
30915 expand_vselect (t2, d->op1, perm1, 8);
30917 /* Shuffle the lanes around to produce:
30918 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
30919 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
30920 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
30922 /* Now a vpermil2p will produce the result required. */
30923 /* ??? The vpermil2p requires a vector constant. Another option
30924 is a unpck[lh]ps to merge the two vectors to produce
30925 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
30926 vpermilps to get the elements into the final order. */
30929 memcpy (d->perm, odd ? permo: perme, 8);
30930 expand_vec_perm_vpermil (d);
30938 /* These are always directly implementable by expand_vec_perm_1. */
30939 gcc_unreachable ();
30943 return expand_vec_perm_pshufb2 (d);
30946 /* We need 2*log2(N)-1 operations to achieve odd/even
30947 with interleave. */
30948 t1 = gen_reg_rtx (V8HImode);
30949 t2 = gen_reg_rtx (V8HImode);
30950 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
30951 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
30952 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
30953 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
30955 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
30957 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
30964 return expand_vec_perm_pshufb2 (d);
30967 t1 = gen_reg_rtx (V16QImode);
30968 t2 = gen_reg_rtx (V16QImode);
30969 t3 = gen_reg_rtx (V16QImode);
30970 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
30971 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
30972 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
30973 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
30974 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
30975 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
30977 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
30979 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
30985 gcc_unreachable ();
30991 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
30992 extract-even and extract-odd permutations. */
30995 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
30997 unsigned i, odd, nelt = d->nelt;
31000 if (odd != 0 && odd != 1)
31003 for (i = 1; i < nelt; ++i)
31004 if (d->perm[i] != 2 * i + odd)
31007 return expand_vec_perm_even_odd_1 (d, odd);
31010 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31011 permutations. We assume that expand_vec_perm_1 has already failed. */
31014 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31016 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31017 enum machine_mode vmode = d->vmode;
31018 unsigned char perm2[4];
31026 /* These are special-cased in sse.md so that we can optionally
31027 use the vbroadcast instruction. They expand to two insns
31028 if the input happens to be in a register. */
31029 gcc_unreachable ();
31035 /* These are always implementable using standard shuffle patterns. */
31036 gcc_unreachable ();
31040 /* These can be implemented via interleave. We save one insn by
31041 stopping once we have promoted to V4SImode and then use pshufd. */
31044 optab otab = vec_interleave_low_optab;
31048 otab = vec_interleave_high_optab;
31053 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31054 vmode = get_mode_wider_vector (vmode);
31055 op0 = gen_lowpart (vmode, op0);
31057 while (vmode != V4SImode);
31059 memset (perm2, elt, 4);
31060 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31065 gcc_unreachable ();
31069 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31070 broadcast permutations. */
31073 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31075 unsigned i, elt, nelt = d->nelt;
31077 if (d->op0 != d->op1)
31081 for (i = 1; i < nelt; ++i)
31082 if (d->perm[i] != elt)
31085 return expand_vec_perm_broadcast_1 (d);
31088 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31089 With all of the interface bits taken care of, perform the expansion
31090 in D and return true on success. */
31093 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31095 /* Try a single instruction expansion. */
31096 if (expand_vec_perm_1 (d))
31099 /* Try sequences of two instructions. */
31101 if (expand_vec_perm_pshuflw_pshufhw (d))
31104 if (expand_vec_perm_palignr (d))
31107 if (expand_vec_perm_interleave2 (d))
31110 if (expand_vec_perm_broadcast (d))
31113 /* Try sequences of three instructions. */
31115 if (expand_vec_perm_pshufb2 (d))
31118 /* ??? Look for narrow permutations whose element orderings would
31119 allow the promotion to a wider mode. */
31121 /* ??? Look for sequences of interleave or a wider permute that place
31122 the data into the correct lanes for a half-vector shuffle like
31123 pshuf[lh]w or vpermilps. */
31125 /* ??? Look for sequences of interleave that produce the desired results.
31126 The combinatorics of punpck[lh] get pretty ugly... */
31128 if (expand_vec_perm_even_odd (d))
31134 /* Extract the values from the vector CST into the permutation array in D.
31135 Return 0 on error, 1 if all values from the permutation come from the
31136 first vector, 2 if all values from the second vector, and 3 otherwise. */
31139 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31141 tree list = TREE_VECTOR_CST_ELTS (cst);
31142 unsigned i, nelt = d->nelt;
31145 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31147 unsigned HOST_WIDE_INT e;
31149 if (!host_integerp (TREE_VALUE (list), 1))
31151 e = tree_low_cst (TREE_VALUE (list), 1);
31155 ret |= (e < nelt ? 1 : 2);
31158 gcc_assert (list == NULL);
31160 /* For all elements from second vector, fold the elements to first. */
31162 for (i = 0; i < nelt; ++i)
31163 d->perm[i] -= nelt;
31169 ix86_expand_vec_perm_builtin (tree exp)
31171 struct expand_vec_perm_d d;
31172 tree arg0, arg1, arg2;
31174 arg0 = CALL_EXPR_ARG (exp, 0);
31175 arg1 = CALL_EXPR_ARG (exp, 1);
31176 arg2 = CALL_EXPR_ARG (exp, 2);
31178 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31179 d.nelt = GET_MODE_NUNITS (d.vmode);
31180 d.testing_p = false;
31181 gcc_assert (VECTOR_MODE_P (d.vmode));
31183 if (TREE_CODE (arg2) != VECTOR_CST)
31185 error_at (EXPR_LOCATION (exp),
31186 "vector permutation requires vector constant");
31190 switch (extract_vec_perm_cst (&d, arg2))
31196 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31200 if (!operand_equal_p (arg0, arg1, 0))
31202 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31203 d.op0 = force_reg (d.vmode, d.op0);
31204 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31205 d.op1 = force_reg (d.vmode, d.op1);
31209 /* The elements of PERM do not suggest that only the first operand
31210 is used, but both operands are identical. Allow easier matching
31211 of the permutation by folding the permutation into the single
31214 unsigned i, nelt = d.nelt;
31215 for (i = 0; i < nelt; ++i)
31216 if (d.perm[i] >= nelt)
31222 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31223 d.op0 = force_reg (d.vmode, d.op0);
31228 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31229 d.op0 = force_reg (d.vmode, d.op0);
31234 d.target = gen_reg_rtx (d.vmode);
31235 if (ix86_expand_vec_perm_builtin_1 (&d))
31238 /* For compiler generated permutations, we should never got here, because
31239 the compiler should also be checking the ok hook. But since this is a
31240 builtin the user has access too, so don't abort. */
31244 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31247 sorry ("vector permutation (%d %d %d %d)",
31248 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31251 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31252 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31253 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31256 sorry ("vector permutation "
31257 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31258 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31259 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31260 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31261 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31264 gcc_unreachable ();
31267 return CONST0_RTX (d.vmode);
31270 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31273 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31275 struct expand_vec_perm_d d;
31279 d.vmode = TYPE_MODE (vec_type);
31280 d.nelt = GET_MODE_NUNITS (d.vmode);
31281 d.testing_p = true;
31283 /* Given sufficient ISA support we can just return true here
31284 for selected vector modes. */
31285 if (GET_MODE_SIZE (d.vmode) == 16)
31287 /* All implementable with a single vpperm insn. */
31290 /* All implementable with 2 pshufb + 1 ior. */
31293 /* All implementable with shufpd or unpck[lh]pd. */
31298 vec_mask = extract_vec_perm_cst (&d, mask);
31300 /* This hook is cannot be called in response to something that the
31301 user does (unlike the builtin expander) so we shouldn't ever see
31302 an error generated from the extract. */
31303 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31304 one_vec = (vec_mask != 3);
31306 /* Implementable with shufps or pshufd. */
31307 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31310 /* Otherwise we have to go through the motions and see if we can
31311 figure out how to generate the requested permutation. */
31312 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31313 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31315 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31318 ret = ix86_expand_vec_perm_builtin_1 (&d);
31325 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
31327 struct expand_vec_perm_d d;
31333 d.vmode = GET_MODE (targ);
31334 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31335 d.testing_p = false;
31337 for (i = 0; i < nelt; ++i)
31338 d.perm[i] = i * 2 + odd;
31340 /* We'll either be able to implement the permutation directly... */
31341 if (expand_vec_perm_1 (&d))
31344 /* ... or we use the special-case patterns. */
31345 expand_vec_perm_even_odd_1 (&d, odd);
31348 /* This function returns the calling abi specific va_list type node.
31349 It returns the FNDECL specific va_list type. */
31352 ix86_fn_abi_va_list (tree fndecl)
31355 return va_list_type_node;
31356 gcc_assert (fndecl != NULL_TREE);
31358 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
31359 return ms_va_list_type_node;
31361 return sysv_va_list_type_node;
31364 /* Returns the canonical va_list type specified by TYPE. If there
31365 is no valid TYPE provided, it return NULL_TREE. */
31368 ix86_canonical_va_list_type (tree type)
31372 /* Resolve references and pointers to va_list type. */
31373 if (TREE_CODE (type) == MEM_REF)
31374 type = TREE_TYPE (type);
31375 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
31376 type = TREE_TYPE (type);
31377 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
31378 type = TREE_TYPE (type);
31382 wtype = va_list_type_node;
31383 gcc_assert (wtype != NULL_TREE);
31385 if (TREE_CODE (wtype) == ARRAY_TYPE)
31387 /* If va_list is an array type, the argument may have decayed
31388 to a pointer type, e.g. by being passed to another function.
31389 In that case, unwrap both types so that we can compare the
31390 underlying records. */
31391 if (TREE_CODE (htype) == ARRAY_TYPE
31392 || POINTER_TYPE_P (htype))
31394 wtype = TREE_TYPE (wtype);
31395 htype = TREE_TYPE (htype);
31398 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31399 return va_list_type_node;
31400 wtype = sysv_va_list_type_node;
31401 gcc_assert (wtype != NULL_TREE);
31403 if (TREE_CODE (wtype) == ARRAY_TYPE)
31405 /* If va_list is an array type, the argument may have decayed
31406 to a pointer type, e.g. by being passed to another function.
31407 In that case, unwrap both types so that we can compare the
31408 underlying records. */
31409 if (TREE_CODE (htype) == ARRAY_TYPE
31410 || POINTER_TYPE_P (htype))
31412 wtype = TREE_TYPE (wtype);
31413 htype = TREE_TYPE (htype);
31416 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31417 return sysv_va_list_type_node;
31418 wtype = ms_va_list_type_node;
31419 gcc_assert (wtype != NULL_TREE);
31421 if (TREE_CODE (wtype) == ARRAY_TYPE)
31423 /* If va_list is an array type, the argument may have decayed
31424 to a pointer type, e.g. by being passed to another function.
31425 In that case, unwrap both types so that we can compare the
31426 underlying records. */
31427 if (TREE_CODE (htype) == ARRAY_TYPE
31428 || POINTER_TYPE_P (htype))
31430 wtype = TREE_TYPE (wtype);
31431 htype = TREE_TYPE (htype);
31434 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31435 return ms_va_list_type_node;
31438 return std_canonical_va_list_type (type);
31441 /* Iterate through the target-specific builtin types for va_list.
31442 IDX denotes the iterator, *PTREE is set to the result type of
31443 the va_list builtin, and *PNAME to its internal type.
31444 Returns zero if there is no element for this index, otherwise
31445 IDX should be increased upon the next call.
31446 Note, do not iterate a base builtin's name like __builtin_va_list.
31447 Used from c_common_nodes_and_builtins. */
31450 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
31460 *ptree = ms_va_list_type_node;
31461 *pname = "__builtin_ms_va_list";
31465 *ptree = sysv_va_list_type_node;
31466 *pname = "__builtin_sysv_va_list";
31474 /* Initialize the GCC target structure. */
31475 #undef TARGET_RETURN_IN_MEMORY
31476 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
31478 #undef TARGET_LEGITIMIZE_ADDRESS
31479 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
31481 #undef TARGET_ATTRIBUTE_TABLE
31482 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
31483 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31484 # undef TARGET_MERGE_DECL_ATTRIBUTES
31485 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
31488 #undef TARGET_COMP_TYPE_ATTRIBUTES
31489 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
31491 #undef TARGET_INIT_BUILTINS
31492 #define TARGET_INIT_BUILTINS ix86_init_builtins
31493 #undef TARGET_BUILTIN_DECL
31494 #define TARGET_BUILTIN_DECL ix86_builtin_decl
31495 #undef TARGET_EXPAND_BUILTIN
31496 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
31498 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
31499 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
31500 ix86_builtin_vectorized_function
31502 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
31503 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
31505 #undef TARGET_BUILTIN_RECIPROCAL
31506 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
31508 #undef TARGET_ASM_FUNCTION_EPILOGUE
31509 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
31511 #undef TARGET_ENCODE_SECTION_INFO
31512 #ifndef SUBTARGET_ENCODE_SECTION_INFO
31513 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
31515 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
31518 #undef TARGET_ASM_OPEN_PAREN
31519 #define TARGET_ASM_OPEN_PAREN ""
31520 #undef TARGET_ASM_CLOSE_PAREN
31521 #define TARGET_ASM_CLOSE_PAREN ""
31523 #undef TARGET_ASM_BYTE_OP
31524 #define TARGET_ASM_BYTE_OP ASM_BYTE
31526 #undef TARGET_ASM_ALIGNED_HI_OP
31527 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
31528 #undef TARGET_ASM_ALIGNED_SI_OP
31529 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
31531 #undef TARGET_ASM_ALIGNED_DI_OP
31532 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
31535 #undef TARGET_PROFILE_BEFORE_PROLOGUE
31536 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
31538 #undef TARGET_ASM_UNALIGNED_HI_OP
31539 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
31540 #undef TARGET_ASM_UNALIGNED_SI_OP
31541 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
31542 #undef TARGET_ASM_UNALIGNED_DI_OP
31543 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
31545 #undef TARGET_PRINT_OPERAND
31546 #define TARGET_PRINT_OPERAND ix86_print_operand
31547 #undef TARGET_PRINT_OPERAND_ADDRESS
31548 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
31549 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
31550 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
31551 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
31552 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
31554 #undef TARGET_SCHED_ADJUST_COST
31555 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
31556 #undef TARGET_SCHED_ISSUE_RATE
31557 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
31558 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
31559 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
31560 ia32_multipass_dfa_lookahead
31562 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
31563 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
31566 #undef TARGET_HAVE_TLS
31567 #define TARGET_HAVE_TLS true
31569 #undef TARGET_CANNOT_FORCE_CONST_MEM
31570 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
31571 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
31572 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
31574 #undef TARGET_DELEGITIMIZE_ADDRESS
31575 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
31577 #undef TARGET_MS_BITFIELD_LAYOUT_P
31578 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
31581 #undef TARGET_BINDS_LOCAL_P
31582 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
31584 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31585 #undef TARGET_BINDS_LOCAL_P
31586 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
31589 #undef TARGET_ASM_OUTPUT_MI_THUNK
31590 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
31591 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
31592 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
31594 #undef TARGET_ASM_FILE_START
31595 #define TARGET_ASM_FILE_START x86_file_start
31597 #undef TARGET_DEFAULT_TARGET_FLAGS
31598 #define TARGET_DEFAULT_TARGET_FLAGS \
31600 | TARGET_SUBTARGET_DEFAULT \
31601 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
31604 #undef TARGET_HANDLE_OPTION
31605 #define TARGET_HANDLE_OPTION ix86_handle_option
31607 #undef TARGET_REGISTER_MOVE_COST
31608 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
31609 #undef TARGET_MEMORY_MOVE_COST
31610 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
31611 #undef TARGET_RTX_COSTS
31612 #define TARGET_RTX_COSTS ix86_rtx_costs
31613 #undef TARGET_ADDRESS_COST
31614 #define TARGET_ADDRESS_COST ix86_address_cost
31616 #undef TARGET_FIXED_CONDITION_CODE_REGS
31617 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
31618 #undef TARGET_CC_MODES_COMPATIBLE
31619 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
31621 #undef TARGET_MACHINE_DEPENDENT_REORG
31622 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
31624 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
31625 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
31627 #undef TARGET_BUILD_BUILTIN_VA_LIST
31628 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
31630 #undef TARGET_ENUM_VA_LIST_P
31631 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
31633 #undef TARGET_FN_ABI_VA_LIST
31634 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
31636 #undef TARGET_CANONICAL_VA_LIST_TYPE
31637 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
31639 #undef TARGET_EXPAND_BUILTIN_VA_START
31640 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
31642 #undef TARGET_MD_ASM_CLOBBERS
31643 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
31645 #undef TARGET_PROMOTE_PROTOTYPES
31646 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
31647 #undef TARGET_STRUCT_VALUE_RTX
31648 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
31649 #undef TARGET_SETUP_INCOMING_VARARGS
31650 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
31651 #undef TARGET_MUST_PASS_IN_STACK
31652 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
31653 #undef TARGET_FUNCTION_ARG_ADVANCE
31654 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
31655 #undef TARGET_FUNCTION_ARG
31656 #define TARGET_FUNCTION_ARG ix86_function_arg
31657 #undef TARGET_PASS_BY_REFERENCE
31658 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
31659 #undef TARGET_INTERNAL_ARG_POINTER
31660 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
31661 #undef TARGET_UPDATE_STACK_BOUNDARY
31662 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
31663 #undef TARGET_GET_DRAP_RTX
31664 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
31665 #undef TARGET_STRICT_ARGUMENT_NAMING
31666 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
31667 #undef TARGET_STATIC_CHAIN
31668 #define TARGET_STATIC_CHAIN ix86_static_chain
31669 #undef TARGET_TRAMPOLINE_INIT
31670 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
31671 #undef TARGET_RETURN_POPS_ARGS
31672 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
31674 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
31675 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
31677 #undef TARGET_SCALAR_MODE_SUPPORTED_P
31678 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
31680 #undef TARGET_VECTOR_MODE_SUPPORTED_P
31681 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
31683 #undef TARGET_C_MODE_FOR_SUFFIX
31684 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
31687 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
31688 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
31691 #ifdef SUBTARGET_INSERT_ATTRIBUTES
31692 #undef TARGET_INSERT_ATTRIBUTES
31693 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
31696 #undef TARGET_MANGLE_TYPE
31697 #define TARGET_MANGLE_TYPE ix86_mangle_type
31699 #undef TARGET_STACK_PROTECT_FAIL
31700 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
31702 #undef TARGET_FUNCTION_VALUE
31703 #define TARGET_FUNCTION_VALUE ix86_function_value
31705 #undef TARGET_FUNCTION_VALUE_REGNO_P
31706 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
31708 #undef TARGET_SECONDARY_RELOAD
31709 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
31711 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
31712 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
31713 ix86_builtin_vectorization_cost
31714 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
31715 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
31716 ix86_vectorize_builtin_vec_perm
31717 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
31718 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
31719 ix86_vectorize_builtin_vec_perm_ok
31721 #undef TARGET_SET_CURRENT_FUNCTION
31722 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
31724 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
31725 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
31727 #undef TARGET_OPTION_SAVE
31728 #define TARGET_OPTION_SAVE ix86_function_specific_save
31730 #undef TARGET_OPTION_RESTORE
31731 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
31733 #undef TARGET_OPTION_PRINT
31734 #define TARGET_OPTION_PRINT ix86_function_specific_print
31736 #undef TARGET_CAN_INLINE_P
31737 #define TARGET_CAN_INLINE_P ix86_can_inline_p
31739 #undef TARGET_EXPAND_TO_RTL_HOOK
31740 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
31742 #undef TARGET_LEGITIMATE_ADDRESS_P
31743 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
31745 #undef TARGET_IRA_COVER_CLASSES
31746 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
31748 #undef TARGET_FRAME_POINTER_REQUIRED
31749 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
31751 #undef TARGET_CAN_ELIMINATE
31752 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
31754 #undef TARGET_ASM_CODE_END
31755 #define TARGET_ASM_CODE_END ix86_code_end
31757 struct gcc_target targetm = TARGET_INITIALIZER;
31759 #include "gt-i386.h"