1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1484 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_ADD_ESP_8 */
1488 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1489 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1491 /* X86_TUNE_SUB_ESP_4 */
1492 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1495 /* X86_TUNE_SUB_ESP_8 */
1496 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1497 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_EXT_80387_CONSTANTS */
1580 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1581 | m_CORE2 | m_GENERIC,
1583 /* X86_TUNE_SHORTEN_X87_SSE */
1586 /* X86_TUNE_AVOID_VECTOR_DECODE */
1589 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1590 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1593 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1594 vector path on AMD machines. */
1595 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1597 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1599 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1601 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1605 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1606 but one byte longer. */
1609 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1610 operand that cannot be represented using a modRM byte. The XOR
1611 replacement is long decoded, so this split helps here as well. */
1614 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1616 m_AMDFAM10 | m_GENERIC,
1618 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1619 from integer to FP. */
1622 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1623 with a subsequent conditional jump instruction into a single
1624 compare-and-branch uop. */
1627 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1628 will impact LEA instruction selection. */
1632 /* Feature tests against the various architecture variations. */
1633 unsigned char ix86_arch_features[X86_ARCH_LAST];
1635 /* Feature tests against the various architecture variations, used to create
1636 ix86_arch_features based on the processor mask. */
1637 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1638 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1639 ~(m_386 | m_486 | m_PENT | m_K6),
1641 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1644 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1647 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1650 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1654 static const unsigned int x86_accumulate_outgoing_args
1655 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1658 static const unsigned int x86_arch_always_fancy_math_387
1659 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1660 | m_NOCONA | m_CORE2 | m_GENERIC;
1662 static enum stringop_alg stringop_alg = no_stringop;
1664 /* In case the average insn count for single function invocation is
1665 lower than this constant, emit fast (but longer) prologue and
1667 #define FAST_PROLOGUE_INSN_COUNT 20
1669 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1670 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1671 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1672 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1674 /* Array of the smallest class containing reg number REGNO, indexed by
1675 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1677 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1679 /* ax, dx, cx, bx */
1680 AREG, DREG, CREG, BREG,
1681 /* si, di, bp, sp */
1682 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1684 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1685 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1688 /* flags, fpsr, fpcr, frame */
1689 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1691 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1694 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1697 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1698 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1699 /* SSE REX registers */
1700 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1704 /* The "default" register map used in 32bit mode. */
1706 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1708 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1709 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1710 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1711 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1712 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1713 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1714 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1717 /* The "default" register map used in 64bit mode. */
1719 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1721 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1722 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1723 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1724 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1725 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1726 8,9,10,11,12,13,14,15, /* extended integer registers */
1727 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1730 /* Define the register numbers to be used in Dwarf debugging information.
1731 The SVR4 reference port C compiler uses the following register numbers
1732 in its Dwarf output code:
1733 0 for %eax (gcc regno = 0)
1734 1 for %ecx (gcc regno = 2)
1735 2 for %edx (gcc regno = 1)
1736 3 for %ebx (gcc regno = 3)
1737 4 for %esp (gcc regno = 7)
1738 5 for %ebp (gcc regno = 6)
1739 6 for %esi (gcc regno = 4)
1740 7 for %edi (gcc regno = 5)
1741 The following three DWARF register numbers are never generated by
1742 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1743 believes these numbers have these meanings.
1744 8 for %eip (no gcc equivalent)
1745 9 for %eflags (gcc regno = 17)
1746 10 for %trapno (no gcc equivalent)
1747 It is not at all clear how we should number the FP stack registers
1748 for the x86 architecture. If the version of SDB on x86/svr4 were
1749 a bit less brain dead with respect to floating-point then we would
1750 have a precedent to follow with respect to DWARF register numbers
1751 for x86 FP registers, but the SDB on x86/svr4 is so completely
1752 broken with respect to FP registers that it is hardly worth thinking
1753 of it as something to strive for compatibility with.
1754 The version of x86/svr4 SDB I have at the moment does (partially)
1755 seem to believe that DWARF register number 11 is associated with
1756 the x86 register %st(0), but that's about all. Higher DWARF
1757 register numbers don't seem to be associated with anything in
1758 particular, and even for DWARF regno 11, SDB only seems to under-
1759 stand that it should say that a variable lives in %st(0) (when
1760 asked via an `=' command) if we said it was in DWARF regno 11,
1761 but SDB still prints garbage when asked for the value of the
1762 variable in question (via a `/' command).
1763 (Also note that the labels SDB prints for various FP stack regs
1764 when doing an `x' command are all wrong.)
1765 Note that these problems generally don't affect the native SVR4
1766 C compiler because it doesn't allow the use of -O with -g and
1767 because when it is *not* optimizing, it allocates a memory
1768 location for each floating-point variable, and the memory
1769 location is what gets described in the DWARF AT_location
1770 attribute for the variable in question.
1771 Regardless of the severe mental illness of the x86/svr4 SDB, we
1772 do something sensible here and we use the following DWARF
1773 register numbers. Note that these are all stack-top-relative
1775 11 for %st(0) (gcc regno = 8)
1776 12 for %st(1) (gcc regno = 9)
1777 13 for %st(2) (gcc regno = 10)
1778 14 for %st(3) (gcc regno = 11)
1779 15 for %st(4) (gcc regno = 12)
1780 16 for %st(5) (gcc regno = 13)
1781 17 for %st(6) (gcc regno = 14)
1782 18 for %st(7) (gcc regno = 15)
1784 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1786 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1787 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1788 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1789 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1790 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1791 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1792 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1795 /* Define parameter passing and return registers. */
1797 static int const x86_64_int_parameter_registers[6] =
1799 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1802 static int const x86_64_ms_abi_int_parameter_registers[4] =
1804 CX_REG, DX_REG, R8_REG, R9_REG
1807 static int const x86_64_int_return_registers[4] =
1809 AX_REG, DX_REG, DI_REG, SI_REG
1812 /* Define the structure for the machine field in struct function. */
1814 struct GTY(()) stack_local_entry {
1815 unsigned short mode;
1818 struct stack_local_entry *next;
1821 /* Structure describing stack frame layout.
1822 Stack grows downward:
1828 saved static chain if ix86_static_chain_on_stack
1830 saved frame pointer if frame_pointer_needed
1831 <- HARD_FRAME_POINTER
1837 <- sse_regs_save_offset
1840 [va_arg registers] |
1844 [padding2] | = to_allocate
1853 int outgoing_arguments_size;
1854 HOST_WIDE_INT frame;
1856 /* The offsets relative to ARG_POINTER. */
1857 HOST_WIDE_INT frame_pointer_offset;
1858 HOST_WIDE_INT hard_frame_pointer_offset;
1859 HOST_WIDE_INT stack_pointer_offset;
1860 HOST_WIDE_INT reg_save_offset;
1861 HOST_WIDE_INT sse_reg_save_offset;
1863 /* When save_regs_using_mov is set, emit prologue using
1864 move instead of push instructions. */
1865 bool save_regs_using_mov;
1868 /* Code model option. */
1869 enum cmodel ix86_cmodel;
1871 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1873 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1875 /* Which unit we are generating floating point math for. */
1876 enum fpmath_unit ix86_fpmath;
1878 /* Which cpu are we scheduling for. */
1879 enum attr_cpu ix86_schedule;
1881 /* Which cpu are we optimizing for. */
1882 enum processor_type ix86_tune;
1884 /* Which instruction set architecture to use. */
1885 enum processor_type ix86_arch;
1887 /* true if sse prefetch instruction is not NOOP. */
1888 int x86_prefetch_sse;
1890 /* ix86_regparm_string as a number */
1891 static int ix86_regparm;
1893 /* -mstackrealign option */
1894 extern int ix86_force_align_arg_pointer;
1895 static const char ix86_force_align_arg_pointer_string[]
1896 = "force_align_arg_pointer";
1898 static rtx (*ix86_gen_leave) (void);
1899 static rtx (*ix86_gen_pop1) (rtx);
1900 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1901 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1902 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1903 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1904 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1905 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1906 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1907 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1908 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1910 /* Preferred alignment for stack boundary in bits. */
1911 unsigned int ix86_preferred_stack_boundary;
1913 /* Alignment for incoming stack boundary in bits specified at
1915 static unsigned int ix86_user_incoming_stack_boundary;
1917 /* Default alignment for incoming stack boundary in bits. */
1918 static unsigned int ix86_default_incoming_stack_boundary;
1920 /* Alignment for incoming stack boundary in bits. */
1921 unsigned int ix86_incoming_stack_boundary;
1923 /* The abi used by target. */
1924 enum calling_abi ix86_abi;
1926 /* Values 1-5: see jump.c */
1927 int ix86_branch_cost;
1929 /* Calling abi specific va_list type nodes. */
1930 static GTY(()) tree sysv_va_list_type_node;
1931 static GTY(()) tree ms_va_list_type_node;
1933 /* Variables which are this size or smaller are put in the data/bss
1934 or ldata/lbss sections. */
1936 int ix86_section_threshold = 65536;
1938 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1939 char internal_label_prefix[16];
1940 int internal_label_prefix_len;
1942 /* Fence to use after loop using movnt. */
1945 /* Register class used for passing given 64bit part of the argument.
1946 These represent classes as documented by the PS ABI, with the exception
1947 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1948 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1950 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1951 whenever possible (upper half does contain padding). */
1952 enum x86_64_reg_class
1955 X86_64_INTEGER_CLASS,
1956 X86_64_INTEGERSI_CLASS,
1963 X86_64_COMPLEX_X87_CLASS,
1967 #define MAX_CLASSES 4
1969 /* Table of constants used by fldpi, fldln2, etc.... */
1970 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1971 static bool ext_80387_constants_init = 0;
1974 static struct machine_function * ix86_init_machine_status (void);
1975 static rtx ix86_function_value (const_tree, const_tree, bool);
1976 static bool ix86_function_value_regno_p (const unsigned int);
1977 static rtx ix86_static_chain (const_tree, bool);
1978 static int ix86_function_regparm (const_tree, const_tree);
1979 static void ix86_compute_frame_layout (struct ix86_frame *);
1980 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1982 static void ix86_add_new_builtins (int);
1983 static rtx ix86_expand_vec_perm_builtin (tree);
1984 static tree ix86_canonical_va_list_type (tree);
1986 enum ix86_function_specific_strings
1988 IX86_FUNCTION_SPECIFIC_ARCH,
1989 IX86_FUNCTION_SPECIFIC_TUNE,
1990 IX86_FUNCTION_SPECIFIC_FPMATH,
1991 IX86_FUNCTION_SPECIFIC_MAX
1994 static char *ix86_target_string (int, int, const char *, const char *,
1995 const char *, bool);
1996 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1997 static void ix86_function_specific_save (struct cl_target_option *);
1998 static void ix86_function_specific_restore (struct cl_target_option *);
1999 static void ix86_function_specific_print (FILE *, int,
2000 struct cl_target_option *);
2001 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2002 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2003 static bool ix86_can_inline_p (tree, tree);
2004 static void ix86_set_current_function (tree);
2005 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2007 static enum calling_abi ix86_function_abi (const_tree);
2010 #ifndef SUBTARGET32_DEFAULT_CPU
2011 #define SUBTARGET32_DEFAULT_CPU "i386"
2014 /* The svr4 ABI for the i386 says that records and unions are returned
2016 #ifndef DEFAULT_PCC_STRUCT_RETURN
2017 #define DEFAULT_PCC_STRUCT_RETURN 1
2020 /* Whether -mtune= or -march= were specified */
2021 static int ix86_tune_defaulted;
2022 static int ix86_arch_specified;
2024 /* Bit flags that specify the ISA we are compiling for. */
2025 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
2027 /* A mask of ix86_isa_flags that includes bit X if X
2028 was set or cleared on the command line. */
2029 static int ix86_isa_flags_explicit;
2031 /* Define a set of ISAs which are available when a given ISA is
2032 enabled. MMX and SSE ISAs are handled separately. */
2034 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2035 #define OPTION_MASK_ISA_3DNOW_SET \
2036 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2038 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2039 #define OPTION_MASK_ISA_SSE2_SET \
2040 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2041 #define OPTION_MASK_ISA_SSE3_SET \
2042 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2043 #define OPTION_MASK_ISA_SSSE3_SET \
2044 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2045 #define OPTION_MASK_ISA_SSE4_1_SET \
2046 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2047 #define OPTION_MASK_ISA_SSE4_2_SET \
2048 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2049 #define OPTION_MASK_ISA_AVX_SET \
2050 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2051 #define OPTION_MASK_ISA_FMA_SET \
2052 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2054 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2056 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2058 #define OPTION_MASK_ISA_SSE4A_SET \
2059 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2060 #define OPTION_MASK_ISA_FMA4_SET \
2061 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2062 | OPTION_MASK_ISA_AVX_SET)
2063 #define OPTION_MASK_ISA_XOP_SET \
2064 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2065 #define OPTION_MASK_ISA_LWP_SET \
2068 /* AES and PCLMUL need SSE2 because they use xmm registers */
2069 #define OPTION_MASK_ISA_AES_SET \
2070 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2071 #define OPTION_MASK_ISA_PCLMUL_SET \
2072 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2074 #define OPTION_MASK_ISA_ABM_SET \
2075 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2077 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2078 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2079 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2080 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2081 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2083 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2084 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2085 #define OPTION_MASK_ISA_F16C_SET \
2086 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2088 /* Define a set of ISAs which aren't available when a given ISA is
2089 disabled. MMX and SSE ISAs are handled separately. */
2091 #define OPTION_MASK_ISA_MMX_UNSET \
2092 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2093 #define OPTION_MASK_ISA_3DNOW_UNSET \
2094 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2095 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2097 #define OPTION_MASK_ISA_SSE_UNSET \
2098 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2099 #define OPTION_MASK_ISA_SSE2_UNSET \
2100 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2101 #define OPTION_MASK_ISA_SSE3_UNSET \
2102 (OPTION_MASK_ISA_SSE3 \
2103 | OPTION_MASK_ISA_SSSE3_UNSET \
2104 | OPTION_MASK_ISA_SSE4A_UNSET )
2105 #define OPTION_MASK_ISA_SSSE3_UNSET \
2106 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2107 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2108 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2109 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2110 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2111 #define OPTION_MASK_ISA_AVX_UNSET \
2112 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2113 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2114 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2116 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2118 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2120 #define OPTION_MASK_ISA_SSE4A_UNSET \
2121 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2123 #define OPTION_MASK_ISA_FMA4_UNSET \
2124 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2125 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2126 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2128 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2129 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2130 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2131 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2132 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2133 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2134 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2135 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2137 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2138 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2139 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2141 /* Vectorization library interface and handlers. */
2142 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2144 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2145 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2147 /* Processor target table, indexed by processor number */
2150 const struct processor_costs *cost; /* Processor costs */
2151 const int align_loop; /* Default alignments. */
2152 const int align_loop_max_skip;
2153 const int align_jump;
2154 const int align_jump_max_skip;
2155 const int align_func;
2158 static const struct ptt processor_target_table[PROCESSOR_max] =
2160 {&i386_cost, 4, 3, 4, 3, 4},
2161 {&i486_cost, 16, 15, 16, 15, 16},
2162 {&pentium_cost, 16, 7, 16, 7, 16},
2163 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2164 {&geode_cost, 0, 0, 0, 0, 0},
2165 {&k6_cost, 32, 7, 32, 7, 32},
2166 {&athlon_cost, 16, 7, 16, 7, 16},
2167 {&pentium4_cost, 0, 0, 0, 0, 0},
2168 {&k8_cost, 16, 7, 16, 7, 16},
2169 {&nocona_cost, 0, 0, 0, 0, 0},
2170 {&core2_cost, 16, 10, 16, 10, 16},
2171 {&generic32_cost, 16, 7, 16, 7, 16},
2172 {&generic64_cost, 16, 10, 16, 10, 16},
2173 {&amdfam10_cost, 32, 24, 32, 7, 32},
2174 {&bdver1_cost, 32, 24, 32, 7, 32},
2175 {&atom_cost, 16, 7, 16, 7, 16}
2178 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2205 /* Return true if a red-zone is in use. */
2208 ix86_using_red_zone (void)
2210 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2213 /* Implement TARGET_HANDLE_OPTION. */
2216 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2223 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2224 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2228 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2229 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2236 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2237 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2241 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2242 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2252 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2253 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2257 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2258 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2265 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2266 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2270 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2271 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2278 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2279 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2283 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2284 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2291 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2292 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2296 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2297 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2304 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2305 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2309 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2310 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2317 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2318 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2322 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2323 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2330 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2331 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2335 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2336 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2343 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2344 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2348 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2349 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2354 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2355 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2359 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2360 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2366 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2367 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2371 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2372 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2379 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2380 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2384 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2385 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2392 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2393 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2397 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2398 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2405 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2406 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2410 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2411 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2418 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2419 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2423 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2424 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2431 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2432 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2436 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2437 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2444 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2445 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2449 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2450 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2457 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2458 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2462 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2463 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2470 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2471 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2475 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2476 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2483 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2484 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2488 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2489 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2496 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2497 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2501 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2502 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2509 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2510 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2514 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2515 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2522 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2523 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2527 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2528 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2535 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2536 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2540 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2541 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2548 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2549 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2553 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2554 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2563 /* Return a string that documents the current -m options. The caller is
2564 responsible for freeing the string. */
2567 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2568 const char *fpmath, bool add_nl_p)
2570 struct ix86_target_opts
2572 const char *option; /* option string */
2573 int mask; /* isa mask options */
2576 /* This table is ordered so that options like -msse4.2 that imply
2577 preceding options while match those first. */
2578 static struct ix86_target_opts isa_opts[] =
2580 { "-m64", OPTION_MASK_ISA_64BIT },
2581 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2582 { "-mfma", OPTION_MASK_ISA_FMA },
2583 { "-mxop", OPTION_MASK_ISA_XOP },
2584 { "-mlwp", OPTION_MASK_ISA_LWP },
2585 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2586 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2587 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2588 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2589 { "-msse3", OPTION_MASK_ISA_SSE3 },
2590 { "-msse2", OPTION_MASK_ISA_SSE2 },
2591 { "-msse", OPTION_MASK_ISA_SSE },
2592 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2593 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2594 { "-mmmx", OPTION_MASK_ISA_MMX },
2595 { "-mabm", OPTION_MASK_ISA_ABM },
2596 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2597 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2598 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2599 { "-maes", OPTION_MASK_ISA_AES },
2600 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2601 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2602 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2603 { "-mf16c", OPTION_MASK_ISA_F16C },
2607 static struct ix86_target_opts flag_opts[] =
2609 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2610 { "-m80387", MASK_80387 },
2611 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2612 { "-malign-double", MASK_ALIGN_DOUBLE },
2613 { "-mcld", MASK_CLD },
2614 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2615 { "-mieee-fp", MASK_IEEE_FP },
2616 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2617 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2618 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2619 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2620 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2621 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2622 { "-mno-red-zone", MASK_NO_RED_ZONE },
2623 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2624 { "-mrecip", MASK_RECIP },
2625 { "-mrtd", MASK_RTD },
2626 { "-msseregparm", MASK_SSEREGPARM },
2627 { "-mstack-arg-probe", MASK_STACK_PROBE },
2628 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2631 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2634 char target_other[40];
2643 memset (opts, '\0', sizeof (opts));
2645 /* Add -march= option. */
2648 opts[num][0] = "-march=";
2649 opts[num++][1] = arch;
2652 /* Add -mtune= option. */
2655 opts[num][0] = "-mtune=";
2656 opts[num++][1] = tune;
2659 /* Pick out the options in isa options. */
2660 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2662 if ((isa & isa_opts[i].mask) != 0)
2664 opts[num++][0] = isa_opts[i].option;
2665 isa &= ~ isa_opts[i].mask;
2669 if (isa && add_nl_p)
2671 opts[num++][0] = isa_other;
2672 sprintf (isa_other, "(other isa: %#x)", isa);
2675 /* Add flag options. */
2676 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2678 if ((flags & flag_opts[i].mask) != 0)
2680 opts[num++][0] = flag_opts[i].option;
2681 flags &= ~ flag_opts[i].mask;
2685 if (flags && add_nl_p)
2687 opts[num++][0] = target_other;
2688 sprintf (target_other, "(other flags: %#x)", flags);
2691 /* Add -fpmath= option. */
2694 opts[num][0] = "-mfpmath=";
2695 opts[num++][1] = fpmath;
2702 gcc_assert (num < ARRAY_SIZE (opts));
2704 /* Size the string. */
2706 sep_len = (add_nl_p) ? 3 : 1;
2707 for (i = 0; i < num; i++)
2710 for (j = 0; j < 2; j++)
2712 len += strlen (opts[i][j]);
2715 /* Build the string. */
2716 ret = ptr = (char *) xmalloc (len);
2719 for (i = 0; i < num; i++)
2723 for (j = 0; j < 2; j++)
2724 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2731 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2739 for (j = 0; j < 2; j++)
2742 memcpy (ptr, opts[i][j], len2[j]);
2744 line_len += len2[j];
2749 gcc_assert (ret + len >= ptr);
2754 /* Return TRUE if software prefetching is beneficial for the
2758 software_prefetching_beneficial_p (void)
2762 case PROCESSOR_GEODE:
2764 case PROCESSOR_ATHLON:
2766 case PROCESSOR_AMDFAM10:
2774 /* Return true, if profiling code should be emitted before
2775 prologue. Otherwise it returns false.
2776 Note: For x86 with "hotfix" it is sorried. */
2778 ix86_profile_before_prologue (void)
2780 return flag_fentry != 0;
2783 /* Function that is callable from the debugger to print the current
2786 ix86_debug_options (void)
2788 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2789 ix86_arch_string, ix86_tune_string,
2790 ix86_fpmath_string, true);
2794 fprintf (stderr, "%s\n\n", opts);
2798 fputs ("<no options>\n\n", stderr);
2803 /* Sometimes certain combinations of command options do not make
2804 sense on a particular target machine. You can define a macro
2805 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2806 defined, is executed once just after all the command options have
2809 Don't use this macro to turn on various extra optimizations for
2810 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2813 override_options (bool main_args_p)
2816 unsigned int ix86_arch_mask, ix86_tune_mask;
2817 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2822 /* Comes from final.c -- no real reason to change it. */
2823 #define MAX_CODE_ALIGN 16
2831 PTA_PREFETCH_SSE = 1 << 4,
2833 PTA_3DNOW_A = 1 << 6,
2837 PTA_POPCNT = 1 << 10,
2839 PTA_SSE4A = 1 << 12,
2840 PTA_NO_SAHF = 1 << 13,
2841 PTA_SSE4_1 = 1 << 14,
2842 PTA_SSE4_2 = 1 << 15,
2844 PTA_PCLMUL = 1 << 17,
2847 PTA_MOVBE = 1 << 20,
2851 PTA_FSGSBASE = 1 << 24,
2852 PTA_RDRND = 1 << 25,
2858 const char *const name; /* processor name or nickname. */
2859 const enum processor_type processor;
2860 const enum attr_cpu schedule;
2861 const unsigned /*enum pta_flags*/ flags;
2863 const processor_alias_table[] =
2865 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2866 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2867 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2868 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2869 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2870 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2871 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2872 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2873 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2874 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2875 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2876 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2877 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2879 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2881 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2882 PTA_MMX | PTA_SSE | PTA_SSE2},
2883 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2884 PTA_MMX |PTA_SSE | PTA_SSE2},
2885 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2886 PTA_MMX | PTA_SSE | PTA_SSE2},
2887 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2888 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2889 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2890 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2891 | PTA_CX16 | PTA_NO_SAHF},
2892 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2893 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2894 | PTA_SSSE3 | PTA_CX16},
2895 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2896 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2897 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2898 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2899 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2900 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2901 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2902 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2903 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2904 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2905 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2906 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2907 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2908 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2909 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2910 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2911 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2912 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2913 {"x86-64", PROCESSOR_K8, CPU_K8,
2914 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2915 {"k8", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2917 | PTA_SSE2 | PTA_NO_SAHF},
2918 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2919 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2920 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2921 {"opteron", PROCESSOR_K8, CPU_K8,
2922 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2923 | PTA_SSE2 | PTA_NO_SAHF},
2924 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2925 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2926 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2927 {"athlon64", PROCESSOR_K8, CPU_K8,
2928 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2929 | PTA_SSE2 | PTA_NO_SAHF},
2930 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2931 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2932 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2933 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2934 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2935 | PTA_SSE2 | PTA_NO_SAHF},
2936 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2937 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2938 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2939 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2940 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2941 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2942 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2943 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2944 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2945 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2946 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2947 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2948 0 /* flags are only used for -march switch. */ },
2949 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2950 PTA_64BIT /* flags are only used for -march switch. */ },
2953 int const pta_size = ARRAY_SIZE (processor_alias_table);
2955 /* Set up prefix/suffix so the error messages refer to either the command
2956 line argument, or the attribute(target). */
2965 prefix = "option(\"";
2970 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2971 SUBTARGET_OVERRIDE_OPTIONS;
2974 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2975 SUBSUBTARGET_OVERRIDE_OPTIONS;
2978 /* -fPIC is the default for x86_64. */
2979 if (TARGET_MACHO && TARGET_64BIT)
2982 /* Need to check -mtune=generic first. */
2983 if (ix86_tune_string)
2985 if (!strcmp (ix86_tune_string, "generic")
2986 || !strcmp (ix86_tune_string, "i686")
2987 /* As special support for cross compilers we read -mtune=native
2988 as -mtune=generic. With native compilers we won't see the
2989 -mtune=native, as it was changed by the driver. */
2990 || !strcmp (ix86_tune_string, "native"))
2993 ix86_tune_string = "generic64";
2995 ix86_tune_string = "generic32";
2997 /* If this call is for setting the option attribute, allow the
2998 generic32/generic64 that was previously set. */
2999 else if (!main_args_p
3000 && (!strcmp (ix86_tune_string, "generic32")
3001 || !strcmp (ix86_tune_string, "generic64")))
3003 else if (!strncmp (ix86_tune_string, "generic", 7))
3004 error ("bad value (%s) for %stune=%s %s",
3005 ix86_tune_string, prefix, suffix, sw);
3006 else if (!strcmp (ix86_tune_string, "x86-64"))
3007 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3008 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3009 prefix, suffix, prefix, suffix, prefix, suffix);
3013 if (ix86_arch_string)
3014 ix86_tune_string = ix86_arch_string;
3015 if (!ix86_tune_string)
3017 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3018 ix86_tune_defaulted = 1;
3021 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3022 need to use a sensible tune option. */
3023 if (!strcmp (ix86_tune_string, "generic")
3024 || !strcmp (ix86_tune_string, "x86-64")
3025 || !strcmp (ix86_tune_string, "i686"))
3028 ix86_tune_string = "generic64";
3030 ix86_tune_string = "generic32";
3034 if (ix86_stringop_string)
3036 if (!strcmp (ix86_stringop_string, "rep_byte"))
3037 stringop_alg = rep_prefix_1_byte;
3038 else if (!strcmp (ix86_stringop_string, "libcall"))
3039 stringop_alg = libcall;
3040 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3041 stringop_alg = rep_prefix_4_byte;
3042 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3044 /* rep; movq isn't available in 32-bit code. */
3045 stringop_alg = rep_prefix_8_byte;
3046 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3047 stringop_alg = loop_1_byte;
3048 else if (!strcmp (ix86_stringop_string, "loop"))
3049 stringop_alg = loop;
3050 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3051 stringop_alg = unrolled_loop;
3053 error ("bad value (%s) for %sstringop-strategy=%s %s",
3054 ix86_stringop_string, prefix, suffix, sw);
3057 if (!ix86_arch_string)
3058 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3060 ix86_arch_specified = 1;
3062 /* Validate -mabi= value. */
3063 if (ix86_abi_string)
3065 if (strcmp (ix86_abi_string, "sysv") == 0)
3066 ix86_abi = SYSV_ABI;
3067 else if (strcmp (ix86_abi_string, "ms") == 0)
3070 error ("unknown ABI (%s) for %sabi=%s %s",
3071 ix86_abi_string, prefix, suffix, sw);
3074 ix86_abi = DEFAULT_ABI;
3076 if (ix86_cmodel_string != 0)
3078 if (!strcmp (ix86_cmodel_string, "small"))
3079 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3080 else if (!strcmp (ix86_cmodel_string, "medium"))
3081 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3082 else if (!strcmp (ix86_cmodel_string, "large"))
3083 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3085 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3086 else if (!strcmp (ix86_cmodel_string, "32"))
3087 ix86_cmodel = CM_32;
3088 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3089 ix86_cmodel = CM_KERNEL;
3091 error ("bad value (%s) for %scmodel=%s %s",
3092 ix86_cmodel_string, prefix, suffix, sw);
3096 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3097 use of rip-relative addressing. This eliminates fixups that
3098 would otherwise be needed if this object is to be placed in a
3099 DLL, and is essentially just as efficient as direct addressing. */
3100 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3101 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3102 else if (TARGET_64BIT)
3103 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3105 ix86_cmodel = CM_32;
3107 if (ix86_asm_string != 0)
3110 && !strcmp (ix86_asm_string, "intel"))
3111 ix86_asm_dialect = ASM_INTEL;
3112 else if (!strcmp (ix86_asm_string, "att"))
3113 ix86_asm_dialect = ASM_ATT;
3115 error ("bad value (%s) for %sasm=%s %s",
3116 ix86_asm_string, prefix, suffix, sw);
3118 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3119 error ("code model %qs not supported in the %s bit mode",
3120 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3121 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3122 sorry ("%i-bit mode not compiled in",
3123 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3125 for (i = 0; i < pta_size; i++)
3126 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3128 ix86_schedule = processor_alias_table[i].schedule;
3129 ix86_arch = processor_alias_table[i].processor;
3130 /* Default cpu tuning to the architecture. */
3131 ix86_tune = ix86_arch;
3133 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3134 error ("CPU you selected does not support x86-64 "
3137 if (processor_alias_table[i].flags & PTA_MMX
3138 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3139 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3140 if (processor_alias_table[i].flags & PTA_3DNOW
3141 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3142 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3143 if (processor_alias_table[i].flags & PTA_3DNOW_A
3144 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3145 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3146 if (processor_alias_table[i].flags & PTA_SSE
3147 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3148 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3149 if (processor_alias_table[i].flags & PTA_SSE2
3150 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3151 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3152 if (processor_alias_table[i].flags & PTA_SSE3
3153 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3154 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3155 if (processor_alias_table[i].flags & PTA_SSSE3
3156 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3157 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3158 if (processor_alias_table[i].flags & PTA_SSE4_1
3159 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3160 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3161 if (processor_alias_table[i].flags & PTA_SSE4_2
3162 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3163 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3164 if (processor_alias_table[i].flags & PTA_AVX
3165 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3166 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3167 if (processor_alias_table[i].flags & PTA_FMA
3168 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3169 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3170 if (processor_alias_table[i].flags & PTA_SSE4A
3171 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3172 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3173 if (processor_alias_table[i].flags & PTA_FMA4
3174 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3175 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3176 if (processor_alias_table[i].flags & PTA_XOP
3177 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3178 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3179 if (processor_alias_table[i].flags & PTA_LWP
3180 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3181 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3182 if (processor_alias_table[i].flags & PTA_ABM
3183 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3184 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3185 if (processor_alias_table[i].flags & PTA_CX16
3186 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3187 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3188 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3189 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3190 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3191 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3192 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3193 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3194 if (processor_alias_table[i].flags & PTA_MOVBE
3195 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3196 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3197 if (processor_alias_table[i].flags & PTA_AES
3198 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3199 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3200 if (processor_alias_table[i].flags & PTA_PCLMUL
3201 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3202 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3203 if (processor_alias_table[i].flags & PTA_FSGSBASE
3204 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3205 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3206 if (processor_alias_table[i].flags & PTA_RDRND
3207 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3208 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3209 if (processor_alias_table[i].flags & PTA_F16C
3210 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3211 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3212 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3213 x86_prefetch_sse = true;
3218 if (!strcmp (ix86_arch_string, "generic"))
3219 error ("generic CPU can be used only for %stune=%s %s",
3220 prefix, suffix, sw);
3221 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3222 error ("bad value (%s) for %sarch=%s %s",
3223 ix86_arch_string, prefix, suffix, sw);
3225 ix86_arch_mask = 1u << ix86_arch;
3226 for (i = 0; i < X86_ARCH_LAST; ++i)
3227 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3229 for (i = 0; i < pta_size; i++)
3230 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3232 ix86_schedule = processor_alias_table[i].schedule;
3233 ix86_tune = processor_alias_table[i].processor;
3234 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3236 if (ix86_tune_defaulted)
3238 ix86_tune_string = "x86-64";
3239 for (i = 0; i < pta_size; i++)
3240 if (! strcmp (ix86_tune_string,
3241 processor_alias_table[i].name))
3243 ix86_schedule = processor_alias_table[i].schedule;
3244 ix86_tune = processor_alias_table[i].processor;
3247 error ("CPU you selected does not support x86-64 "
3250 /* Intel CPUs have always interpreted SSE prefetch instructions as
3251 NOPs; so, we can enable SSE prefetch instructions even when
3252 -mtune (rather than -march) points us to a processor that has them.
3253 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3254 higher processors. */
3256 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3257 x86_prefetch_sse = true;
3261 if (ix86_tune_specified && i == pta_size)
3262 error ("bad value (%s) for %stune=%s %s",
3263 ix86_tune_string, prefix, suffix, sw);
3265 ix86_tune_mask = 1u << ix86_tune;
3266 for (i = 0; i < X86_TUNE_LAST; ++i)
3267 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3269 #ifndef USE_IX86_FRAME_POINTER
3270 #define USE_IX86_FRAME_POINTER 0
3273 /* Set the default values for switches whose default depends on TARGET_64BIT
3274 in case they weren't overwritten by command line options. */
3279 if (flag_omit_frame_pointer == 2)
3280 flag_omit_frame_pointer = 1;
3281 if (flag_asynchronous_unwind_tables == 2)
3282 flag_asynchronous_unwind_tables = 1;
3283 if (flag_pcc_struct_return == 2)
3284 flag_pcc_struct_return = 0;
3290 if (flag_omit_frame_pointer == 2)
3291 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3292 if (flag_asynchronous_unwind_tables == 2)
3293 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3294 if (flag_pcc_struct_return == 2)
3295 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3299 ix86_cost = &ix86_size_cost;
3301 ix86_cost = processor_target_table[ix86_tune].cost;
3303 /* Arrange to set up i386_stack_locals for all functions. */
3304 init_machine_status = ix86_init_machine_status;
3306 /* Validate -mregparm= value. */
3307 if (ix86_regparm_string)
3310 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3311 i = atoi (ix86_regparm_string);
3312 if (i < 0 || i > REGPARM_MAX)
3313 error ("%sregparm=%d%s is not between 0 and %d",
3314 prefix, i, suffix, REGPARM_MAX);
3319 ix86_regparm = REGPARM_MAX;
3321 /* If the user has provided any of the -malign-* options,
3322 warn and use that value only if -falign-* is not set.
3323 Remove this code in GCC 3.2 or later. */
3324 if (ix86_align_loops_string)
3326 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3327 prefix, suffix, suffix);
3328 if (align_loops == 0)
3330 i = atoi (ix86_align_loops_string);
3331 if (i < 0 || i > MAX_CODE_ALIGN)
3332 error ("%salign-loops=%d%s is not between 0 and %d",
3333 prefix, i, suffix, MAX_CODE_ALIGN);
3335 align_loops = 1 << i;
3339 if (ix86_align_jumps_string)
3341 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3342 prefix, suffix, suffix);
3343 if (align_jumps == 0)
3345 i = atoi (ix86_align_jumps_string);
3346 if (i < 0 || i > MAX_CODE_ALIGN)
3347 error ("%salign-loops=%d%s is not between 0 and %d",
3348 prefix, i, suffix, MAX_CODE_ALIGN);
3350 align_jumps = 1 << i;
3354 if (ix86_align_funcs_string)
3356 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3357 prefix, suffix, suffix);
3358 if (align_functions == 0)
3360 i = atoi (ix86_align_funcs_string);
3361 if (i < 0 || i > MAX_CODE_ALIGN)
3362 error ("%salign-loops=%d%s is not between 0 and %d",
3363 prefix, i, suffix, MAX_CODE_ALIGN);
3365 align_functions = 1 << i;
3369 /* Default align_* from the processor table. */
3370 if (align_loops == 0)
3372 align_loops = processor_target_table[ix86_tune].align_loop;
3373 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3375 if (align_jumps == 0)
3377 align_jumps = processor_target_table[ix86_tune].align_jump;
3378 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3380 if (align_functions == 0)
3382 align_functions = processor_target_table[ix86_tune].align_func;
3385 /* Validate -mbranch-cost= value, or provide default. */
3386 ix86_branch_cost = ix86_cost->branch_cost;
3387 if (ix86_branch_cost_string)
3389 i = atoi (ix86_branch_cost_string);
3391 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3393 ix86_branch_cost = i;
3395 if (ix86_section_threshold_string)
3397 i = atoi (ix86_section_threshold_string);
3399 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3401 ix86_section_threshold = i;
3404 if (ix86_tls_dialect_string)
3406 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3407 ix86_tls_dialect = TLS_DIALECT_GNU;
3408 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3409 ix86_tls_dialect = TLS_DIALECT_GNU2;
3411 error ("bad value (%s) for %stls-dialect=%s %s",
3412 ix86_tls_dialect_string, prefix, suffix, sw);
3415 if (ix87_precision_string)
3417 i = atoi (ix87_precision_string);
3418 if (i != 32 && i != 64 && i != 80)
3419 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3424 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3426 /* Enable by default the SSE and MMX builtins. Do allow the user to
3427 explicitly disable any of these. In particular, disabling SSE and
3428 MMX for kernel code is extremely useful. */
3429 if (!ix86_arch_specified)
3431 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3432 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3435 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3439 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3441 if (!ix86_arch_specified)
3443 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3445 /* i386 ABI does not specify red zone. It still makes sense to use it
3446 when programmer takes care to stack from being destroyed. */
3447 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3448 target_flags |= MASK_NO_RED_ZONE;
3451 /* Keep nonleaf frame pointers. */
3452 if (flag_omit_frame_pointer)
3453 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3454 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3455 flag_omit_frame_pointer = 1;
3457 /* If we're doing fast math, we don't care about comparison order
3458 wrt NaNs. This lets us use a shorter comparison sequence. */
3459 if (flag_finite_math_only)
3460 target_flags &= ~MASK_IEEE_FP;
3462 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3463 since the insns won't need emulation. */
3464 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3465 target_flags &= ~MASK_NO_FANCY_MATH_387;
3467 /* Likewise, if the target doesn't have a 387, or we've specified
3468 software floating point, don't use 387 inline intrinsics. */
3470 target_flags |= MASK_NO_FANCY_MATH_387;
3472 /* Turn on MMX builtins for -msse. */
3475 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3476 x86_prefetch_sse = true;
3479 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3480 if (TARGET_SSE4_2 || TARGET_ABM)
3481 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3483 /* Validate -mpreferred-stack-boundary= value or default it to
3484 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3485 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3486 if (ix86_preferred_stack_boundary_string)
3488 i = atoi (ix86_preferred_stack_boundary_string);
3489 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3490 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3491 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3493 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3496 /* Set the default value for -mstackrealign. */
3497 if (ix86_force_align_arg_pointer == -1)
3498 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3500 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3502 /* Validate -mincoming-stack-boundary= value or default it to
3503 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3504 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3505 if (ix86_incoming_stack_boundary_string)
3507 i = atoi (ix86_incoming_stack_boundary_string);
3508 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3509 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3510 i, TARGET_64BIT ? 4 : 2);
3513 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3514 ix86_incoming_stack_boundary
3515 = ix86_user_incoming_stack_boundary;
3519 /* Accept -msseregparm only if at least SSE support is enabled. */
3520 if (TARGET_SSEREGPARM
3522 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3524 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3525 if (ix86_fpmath_string != 0)
3527 if (! strcmp (ix86_fpmath_string, "387"))
3528 ix86_fpmath = FPMATH_387;
3529 else if (! strcmp (ix86_fpmath_string, "sse"))
3533 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3534 ix86_fpmath = FPMATH_387;
3537 ix86_fpmath = FPMATH_SSE;
3539 else if (! strcmp (ix86_fpmath_string, "387,sse")
3540 || ! strcmp (ix86_fpmath_string, "387+sse")
3541 || ! strcmp (ix86_fpmath_string, "sse,387")
3542 || ! strcmp (ix86_fpmath_string, "sse+387")
3543 || ! strcmp (ix86_fpmath_string, "both"))
3547 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3548 ix86_fpmath = FPMATH_387;
3550 else if (!TARGET_80387)
3552 warning (0, "387 instruction set disabled, using SSE arithmetics");
3553 ix86_fpmath = FPMATH_SSE;
3556 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3559 error ("bad value (%s) for %sfpmath=%s %s",
3560 ix86_fpmath_string, prefix, suffix, sw);
3563 /* If the i387 is disabled, then do not return values in it. */
3565 target_flags &= ~MASK_FLOAT_RETURNS;
3567 /* Use external vectorized library in vectorizing intrinsics. */
3568 if (ix86_veclibabi_string)
3570 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3571 ix86_veclib_handler = ix86_veclibabi_svml;
3572 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3573 ix86_veclib_handler = ix86_veclibabi_acml;
3575 error ("unknown vectorization library ABI type (%s) for "
3576 "%sveclibabi=%s %s", ix86_veclibabi_string,
3577 prefix, suffix, sw);
3580 if ((!USE_IX86_FRAME_POINTER
3581 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3582 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3584 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3586 /* ??? Unwind info is not correct around the CFG unless either a frame
3587 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3588 unwind info generation to be aware of the CFG and propagating states
3590 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3591 || flag_exceptions || flag_non_call_exceptions)
3592 && flag_omit_frame_pointer
3593 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3595 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3596 warning (0, "unwind tables currently require either a frame pointer "
3597 "or %saccumulate-outgoing-args%s for correctness",
3599 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3602 /* If stack probes are required, the space used for large function
3603 arguments on the stack must also be probed, so enable
3604 -maccumulate-outgoing-args so this happens in the prologue. */
3605 if (TARGET_STACK_PROBE
3606 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3608 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3609 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3610 "for correctness", prefix, suffix);
3611 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3614 /* For sane SSE instruction set generation we need fcomi instruction.
3615 It is safe to enable all CMOVE instructions. */
3619 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3622 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3623 p = strchr (internal_label_prefix, 'X');
3624 internal_label_prefix_len = p - internal_label_prefix;
3628 /* When scheduling description is not available, disable scheduler pass
3629 so it won't slow down the compilation and make x87 code slower. */
3630 if (!TARGET_SCHEDULE)
3631 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3633 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3634 set_param_value ("simultaneous-prefetches",
3635 ix86_cost->simultaneous_prefetches);
3636 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3637 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3638 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3639 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3640 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3641 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3643 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3644 if (flag_prefetch_loop_arrays < 0
3647 && software_prefetching_beneficial_p ())
3648 flag_prefetch_loop_arrays = 1;
3650 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3651 can be optimized to ap = __builtin_next_arg (0). */
3653 targetm.expand_builtin_va_start = NULL;
3657 ix86_gen_leave = gen_leave_rex64;
3658 ix86_gen_pop1 = gen_popdi1;
3659 ix86_gen_add3 = gen_adddi3;
3660 ix86_gen_sub3 = gen_subdi3;
3661 ix86_gen_sub3_carry = gen_subdi3_carry;
3662 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3663 ix86_gen_monitor = gen_sse3_monitor64;
3664 ix86_gen_andsp = gen_anddi3;
3665 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
3666 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3667 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3671 ix86_gen_leave = gen_leave;
3672 ix86_gen_pop1 = gen_popsi1;
3673 ix86_gen_add3 = gen_addsi3;
3674 ix86_gen_sub3 = gen_subsi3;
3675 ix86_gen_sub3_carry = gen_subsi3_carry;
3676 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3677 ix86_gen_monitor = gen_sse3_monitor;
3678 ix86_gen_andsp = gen_andsi3;
3679 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
3680 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3681 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3685 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3687 target_flags |= MASK_CLD & ~target_flags_explicit;
3690 if (!TARGET_64BIT && flag_pic)
3692 if (flag_fentry > 0)
3693 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3696 if (flag_fentry < 0)
3698 #if defined(PROFILE_BEFORE_PROLOGUE)
3705 /* Save the initial options in case the user does function specific options */
3707 target_option_default_node = target_option_current_node
3708 = build_target_option_node ();
3711 /* Update register usage after having seen the compiler flags. */
3714 ix86_conditional_register_usage (void)
3719 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3721 if (fixed_regs[i] > 1)
3722 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3723 if (call_used_regs[i] > 1)
3724 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3727 /* The PIC register, if it exists, is fixed. */
3728 j = PIC_OFFSET_TABLE_REGNUM;
3729 if (j != INVALID_REGNUM)
3730 fixed_regs[j] = call_used_regs[j] = 1;
3732 /* The MS_ABI changes the set of call-used registers. */
3733 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3735 call_used_regs[SI_REG] = 0;
3736 call_used_regs[DI_REG] = 0;
3737 call_used_regs[XMM6_REG] = 0;
3738 call_used_regs[XMM7_REG] = 0;
3739 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3740 call_used_regs[i] = 0;
3743 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3744 other call-clobbered regs for 64-bit. */
3747 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3749 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3750 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3751 && call_used_regs[i])
3752 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3755 /* If MMX is disabled, squash the registers. */
3757 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3758 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3759 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3761 /* If SSE is disabled, squash the registers. */
3763 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3764 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3765 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3767 /* If the FPU is disabled, squash the registers. */
3768 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3769 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3770 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3771 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3773 /* If 32-bit, squash the 64-bit registers. */
3776 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3778 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3784 /* Save the current options */
3787 ix86_function_specific_save (struct cl_target_option *ptr)
3789 ptr->arch = ix86_arch;
3790 ptr->schedule = ix86_schedule;
3791 ptr->tune = ix86_tune;
3792 ptr->fpmath = ix86_fpmath;
3793 ptr->branch_cost = ix86_branch_cost;
3794 ptr->tune_defaulted = ix86_tune_defaulted;
3795 ptr->arch_specified = ix86_arch_specified;
3796 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3797 ptr->target_flags_explicit = target_flags_explicit;
3799 /* The fields are char but the variables are not; make sure the
3800 values fit in the fields. */
3801 gcc_assert (ptr->arch == ix86_arch);
3802 gcc_assert (ptr->schedule == ix86_schedule);
3803 gcc_assert (ptr->tune == ix86_tune);
3804 gcc_assert (ptr->fpmath == ix86_fpmath);
3805 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3808 /* Restore the current options */
3811 ix86_function_specific_restore (struct cl_target_option *ptr)
3813 enum processor_type old_tune = ix86_tune;
3814 enum processor_type old_arch = ix86_arch;
3815 unsigned int ix86_arch_mask, ix86_tune_mask;
3818 ix86_arch = (enum processor_type) ptr->arch;
3819 ix86_schedule = (enum attr_cpu) ptr->schedule;
3820 ix86_tune = (enum processor_type) ptr->tune;
3821 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3822 ix86_branch_cost = ptr->branch_cost;
3823 ix86_tune_defaulted = ptr->tune_defaulted;
3824 ix86_arch_specified = ptr->arch_specified;
3825 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3826 target_flags_explicit = ptr->target_flags_explicit;
3828 /* Recreate the arch feature tests if the arch changed */
3829 if (old_arch != ix86_arch)
3831 ix86_arch_mask = 1u << ix86_arch;
3832 for (i = 0; i < X86_ARCH_LAST; ++i)
3833 ix86_arch_features[i]
3834 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3837 /* Recreate the tune optimization tests */
3838 if (old_tune != ix86_tune)
3840 ix86_tune_mask = 1u << ix86_tune;
3841 for (i = 0; i < X86_TUNE_LAST; ++i)
3842 ix86_tune_features[i]
3843 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3847 /* Print the current options */
3850 ix86_function_specific_print (FILE *file, int indent,
3851 struct cl_target_option *ptr)
3854 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3855 NULL, NULL, NULL, false);
3857 fprintf (file, "%*sarch = %d (%s)\n",
3860 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3861 ? cpu_names[ptr->arch]
3864 fprintf (file, "%*stune = %d (%s)\n",
3867 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3868 ? cpu_names[ptr->tune]
3871 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3872 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3873 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3874 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3878 fprintf (file, "%*s%s\n", indent, "", target_string);
3879 free (target_string);
3884 /* Inner function to process the attribute((target(...))), take an argument and
3885 set the current options from the argument. If we have a list, recursively go
3889 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3894 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3895 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3896 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3897 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3912 enum ix86_opt_type type;
3917 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3918 IX86_ATTR_ISA ("abm", OPT_mabm),
3919 IX86_ATTR_ISA ("aes", OPT_maes),
3920 IX86_ATTR_ISA ("avx", OPT_mavx),
3921 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3922 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3923 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3924 IX86_ATTR_ISA ("sse", OPT_msse),
3925 IX86_ATTR_ISA ("sse2", OPT_msse2),
3926 IX86_ATTR_ISA ("sse3", OPT_msse3),
3927 IX86_ATTR_ISA ("sse4", OPT_msse4),
3928 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3929 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3930 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3931 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3932 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3933 IX86_ATTR_ISA ("xop", OPT_mxop),
3934 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3935 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3936 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3937 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3939 /* string options */
3940 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3941 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3942 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3945 IX86_ATTR_YES ("cld",
3949 IX86_ATTR_NO ("fancy-math-387",
3950 OPT_mfancy_math_387,
3951 MASK_NO_FANCY_MATH_387),
3953 IX86_ATTR_YES ("ieee-fp",
3957 IX86_ATTR_YES ("inline-all-stringops",
3958 OPT_minline_all_stringops,
3959 MASK_INLINE_ALL_STRINGOPS),
3961 IX86_ATTR_YES ("inline-stringops-dynamically",
3962 OPT_minline_stringops_dynamically,
3963 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3965 IX86_ATTR_NO ("align-stringops",
3966 OPT_mno_align_stringops,
3967 MASK_NO_ALIGN_STRINGOPS),
3969 IX86_ATTR_YES ("recip",
3975 /* If this is a list, recurse to get the options. */
3976 if (TREE_CODE (args) == TREE_LIST)
3980 for (; args; args = TREE_CHAIN (args))
3981 if (TREE_VALUE (args)
3982 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3988 else if (TREE_CODE (args) != STRING_CST)
3991 /* Handle multiple arguments separated by commas. */
3992 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3994 while (next_optstr && *next_optstr != '\0')
3996 char *p = next_optstr;
3998 char *comma = strchr (next_optstr, ',');
3999 const char *opt_string;
4000 size_t len, opt_len;
4005 enum ix86_opt_type type = ix86_opt_unknown;
4011 len = comma - next_optstr;
4012 next_optstr = comma + 1;
4020 /* Recognize no-xxx. */
4021 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4030 /* Find the option. */
4033 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4035 type = attrs[i].type;
4036 opt_len = attrs[i].len;
4037 if (ch == attrs[i].string[0]
4038 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4039 && memcmp (p, attrs[i].string, opt_len) == 0)
4042 mask = attrs[i].mask;
4043 opt_string = attrs[i].string;
4048 /* Process the option. */
4051 error ("attribute(target(\"%s\")) is unknown", orig_p);
4055 else if (type == ix86_opt_isa)
4056 ix86_handle_option (opt, p, opt_set_p);
4058 else if (type == ix86_opt_yes || type == ix86_opt_no)
4060 if (type == ix86_opt_no)
4061 opt_set_p = !opt_set_p;
4064 target_flags |= mask;
4066 target_flags &= ~mask;
4069 else if (type == ix86_opt_str)
4073 error ("option(\"%s\") was already specified", opt_string);
4077 p_strings[opt] = xstrdup (p + opt_len);
4087 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4090 ix86_valid_target_attribute_tree (tree args)
4092 const char *orig_arch_string = ix86_arch_string;
4093 const char *orig_tune_string = ix86_tune_string;
4094 const char *orig_fpmath_string = ix86_fpmath_string;
4095 int orig_tune_defaulted = ix86_tune_defaulted;
4096 int orig_arch_specified = ix86_arch_specified;
4097 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4100 struct cl_target_option *def
4101 = TREE_TARGET_OPTION (target_option_default_node);
4103 /* Process each of the options on the chain. */
4104 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4107 /* If the changed options are different from the default, rerun override_options,
4108 and then save the options away. The string options are are attribute options,
4109 and will be undone when we copy the save structure. */
4110 if (ix86_isa_flags != def->ix86_isa_flags
4111 || target_flags != def->target_flags
4112 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4113 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4114 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4116 /* If we are using the default tune= or arch=, undo the string assigned,
4117 and use the default. */
4118 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4119 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4120 else if (!orig_arch_specified)
4121 ix86_arch_string = NULL;
4123 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4124 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4125 else if (orig_tune_defaulted)
4126 ix86_tune_string = NULL;
4128 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4129 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4130 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4131 else if (!TARGET_64BIT && TARGET_SSE)
4132 ix86_fpmath_string = "sse,387";
4134 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4135 override_options (false);
4137 /* Add any builtin functions with the new isa if any. */
4138 ix86_add_new_builtins (ix86_isa_flags);
4140 /* Save the current options unless we are validating options for
4142 t = build_target_option_node ();
4144 ix86_arch_string = orig_arch_string;
4145 ix86_tune_string = orig_tune_string;
4146 ix86_fpmath_string = orig_fpmath_string;
4148 /* Free up memory allocated to hold the strings */
4149 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4150 if (option_strings[i])
4151 free (option_strings[i]);
4157 /* Hook to validate attribute((target("string"))). */
4160 ix86_valid_target_attribute_p (tree fndecl,
4161 tree ARG_UNUSED (name),
4163 int ARG_UNUSED (flags))
4165 struct cl_target_option cur_target;
4167 tree old_optimize = build_optimization_node ();
4168 tree new_target, new_optimize;
4169 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4171 /* If the function changed the optimization levels as well as setting target
4172 options, start with the optimizations specified. */
4173 if (func_optimize && func_optimize != old_optimize)
4174 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
4176 /* The target attributes may also change some optimization flags, so update
4177 the optimization options if necessary. */
4178 cl_target_option_save (&cur_target);
4179 new_target = ix86_valid_target_attribute_tree (args);
4180 new_optimize = build_optimization_node ();
4187 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4189 if (old_optimize != new_optimize)
4190 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4193 cl_target_option_restore (&cur_target);
4195 if (old_optimize != new_optimize)
4196 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
4202 /* Hook to determine if one function can safely inline another. */
4205 ix86_can_inline_p (tree caller, tree callee)
4208 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4209 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4211 /* If callee has no option attributes, then it is ok to inline. */
4215 /* If caller has no option attributes, but callee does then it is not ok to
4217 else if (!caller_tree)
4222 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4223 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4225 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4226 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4228 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
4229 != callee_opts->ix86_isa_flags)
4232 /* See if we have the same non-isa options. */
4233 else if (caller_opts->target_flags != callee_opts->target_flags)
4236 /* See if arch, tune, etc. are the same. */
4237 else if (caller_opts->arch != callee_opts->arch)
4240 else if (caller_opts->tune != callee_opts->tune)
4243 else if (caller_opts->fpmath != callee_opts->fpmath)
4246 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4257 /* Remember the last target of ix86_set_current_function. */
4258 static GTY(()) tree ix86_previous_fndecl;
4260 /* Establish appropriate back-end context for processing the function
4261 FNDECL. The argument might be NULL to indicate processing at top
4262 level, outside of any function scope. */
4264 ix86_set_current_function (tree fndecl)
4266 /* Only change the context if the function changes. This hook is called
4267 several times in the course of compiling a function, and we don't want to
4268 slow things down too much or call target_reinit when it isn't safe. */
4269 if (fndecl && fndecl != ix86_previous_fndecl)
4271 tree old_tree = (ix86_previous_fndecl
4272 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4275 tree new_tree = (fndecl
4276 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4279 ix86_previous_fndecl = fndecl;
4280 if (old_tree == new_tree)
4285 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4291 struct cl_target_option *def
4292 = TREE_TARGET_OPTION (target_option_current_node);
4294 cl_target_option_restore (def);
4301 /* Return true if this goes in large data/bss. */
4304 ix86_in_large_data_p (tree exp)
4306 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4309 /* Functions are never large data. */
4310 if (TREE_CODE (exp) == FUNCTION_DECL)
4313 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4315 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4316 if (strcmp (section, ".ldata") == 0
4317 || strcmp (section, ".lbss") == 0)
4323 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4325 /* If this is an incomplete type with size 0, then we can't put it
4326 in data because it might be too big when completed. */
4327 if (!size || size > ix86_section_threshold)
4334 /* Switch to the appropriate section for output of DECL.
4335 DECL is either a `VAR_DECL' node or a constant of some sort.
4336 RELOC indicates whether forming the initial value of DECL requires
4337 link-time relocations. */
4339 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4343 x86_64_elf_select_section (tree decl, int reloc,
4344 unsigned HOST_WIDE_INT align)
4346 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4347 && ix86_in_large_data_p (decl))
4349 const char *sname = NULL;
4350 unsigned int flags = SECTION_WRITE;
4351 switch (categorize_decl_for_section (decl, reloc))
4356 case SECCAT_DATA_REL:
4357 sname = ".ldata.rel";
4359 case SECCAT_DATA_REL_LOCAL:
4360 sname = ".ldata.rel.local";
4362 case SECCAT_DATA_REL_RO:
4363 sname = ".ldata.rel.ro";
4365 case SECCAT_DATA_REL_RO_LOCAL:
4366 sname = ".ldata.rel.ro.local";
4370 flags |= SECTION_BSS;
4373 case SECCAT_RODATA_MERGE_STR:
4374 case SECCAT_RODATA_MERGE_STR_INIT:
4375 case SECCAT_RODATA_MERGE_CONST:
4379 case SECCAT_SRODATA:
4386 /* We don't split these for medium model. Place them into
4387 default sections and hope for best. */
4392 /* We might get called with string constants, but get_named_section
4393 doesn't like them as they are not DECLs. Also, we need to set
4394 flags in that case. */
4396 return get_section (sname, flags, NULL);
4397 return get_named_section (decl, sname, reloc);
4400 return default_elf_select_section (decl, reloc, align);
4403 /* Build up a unique section name, expressed as a
4404 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4405 RELOC indicates whether the initial value of EXP requires
4406 link-time relocations. */
4408 static void ATTRIBUTE_UNUSED
4409 x86_64_elf_unique_section (tree decl, int reloc)
4411 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4412 && ix86_in_large_data_p (decl))
4414 const char *prefix = NULL;
4415 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4416 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4418 switch (categorize_decl_for_section (decl, reloc))
4421 case SECCAT_DATA_REL:
4422 case SECCAT_DATA_REL_LOCAL:
4423 case SECCAT_DATA_REL_RO:
4424 case SECCAT_DATA_REL_RO_LOCAL:
4425 prefix = one_only ? ".ld" : ".ldata";
4428 prefix = one_only ? ".lb" : ".lbss";
4431 case SECCAT_RODATA_MERGE_STR:
4432 case SECCAT_RODATA_MERGE_STR_INIT:
4433 case SECCAT_RODATA_MERGE_CONST:
4434 prefix = one_only ? ".lr" : ".lrodata";
4436 case SECCAT_SRODATA:
4443 /* We don't split these for medium model. Place them into
4444 default sections and hope for best. */
4449 const char *name, *linkonce;
4452 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4453 name = targetm.strip_name_encoding (name);
4455 /* If we're using one_only, then there needs to be a .gnu.linkonce
4456 prefix to the section name. */
4457 linkonce = one_only ? ".gnu.linkonce" : "";
4459 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4461 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4465 default_unique_section (decl, reloc);
4468 #ifdef COMMON_ASM_OP
4469 /* This says how to output assembler code to declare an
4470 uninitialized external linkage data object.
4472 For medium model x86-64 we need to use .largecomm opcode for
4475 x86_elf_aligned_common (FILE *file,
4476 const char *name, unsigned HOST_WIDE_INT size,
4479 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4480 && size > (unsigned int)ix86_section_threshold)
4481 fputs (".largecomm\t", file);
4483 fputs (COMMON_ASM_OP, file);
4484 assemble_name (file, name);
4485 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4486 size, align / BITS_PER_UNIT);
4490 /* Utility function for targets to use in implementing
4491 ASM_OUTPUT_ALIGNED_BSS. */
4494 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4495 const char *name, unsigned HOST_WIDE_INT size,
4498 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4499 && size > (unsigned int)ix86_section_threshold)
4500 switch_to_section (get_named_section (decl, ".lbss", 0));
4502 switch_to_section (bss_section);
4503 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4504 #ifdef ASM_DECLARE_OBJECT_NAME
4505 last_assemble_variable_decl = decl;
4506 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4508 /* Standard thing is just output label for the object. */
4509 ASM_OUTPUT_LABEL (file, name);
4510 #endif /* ASM_DECLARE_OBJECT_NAME */
4511 ASM_OUTPUT_SKIP (file, size ? size : 1);
4515 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4517 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4518 make the problem with not enough registers even worse. */
4519 #ifdef INSN_SCHEDULING
4521 flag_schedule_insns = 0;
4525 /* The Darwin libraries never set errno, so we might as well
4526 avoid calling them when that's the only reason we would. */
4527 flag_errno_math = 0;
4529 /* The default values of these switches depend on the TARGET_64BIT
4530 that is not known at this moment. Mark these values with 2 and
4531 let user the to override these. In case there is no command line option
4532 specifying them, we will set the defaults in override_options. */
4534 flag_omit_frame_pointer = 2;
4536 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4540 flag_pcc_struct_return = 2;
4541 flag_asynchronous_unwind_tables = 2;
4542 flag_vect_cost_model = 1;
4543 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4544 SUBTARGET_OPTIMIZATION_OPTIONS;
4548 /* Decide whether we must probe the stack before any space allocation
4549 on this target. It's essentially TARGET_STACK_PROBE except when
4550 -fstack-check causes the stack to be already probed differently. */
4553 ix86_target_stack_probe (void)
4555 /* Do not probe the stack twice if static stack checking is enabled. */
4556 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4559 return TARGET_STACK_PROBE;
4562 /* Decide whether we can make a sibling call to a function. DECL is the
4563 declaration of the function being targeted by the call and EXP is the
4564 CALL_EXPR representing the call. */
4567 ix86_function_ok_for_sibcall (tree decl, tree exp)
4569 tree type, decl_or_type;
4572 /* If we are generating position-independent code, we cannot sibcall
4573 optimize any indirect call, or a direct call to a global function,
4574 as the PLT requires %ebx be live. */
4575 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4578 /* If we need to align the outgoing stack, then sibcalling would
4579 unalign the stack, which may break the called function. */
4580 if (ix86_minimum_incoming_stack_boundary (true)
4581 < PREFERRED_STACK_BOUNDARY)
4586 decl_or_type = decl;
4587 type = TREE_TYPE (decl);
4591 /* We're looking at the CALL_EXPR, we need the type of the function. */
4592 type = CALL_EXPR_FN (exp); /* pointer expression */
4593 type = TREE_TYPE (type); /* pointer type */
4594 type = TREE_TYPE (type); /* function type */
4595 decl_or_type = type;
4598 /* Check that the return value locations are the same. Like
4599 if we are returning floats on the 80387 register stack, we cannot
4600 make a sibcall from a function that doesn't return a float to a
4601 function that does or, conversely, from a function that does return
4602 a float to a function that doesn't; the necessary stack adjustment
4603 would not be executed. This is also the place we notice
4604 differences in the return value ABI. Note that it is ok for one
4605 of the functions to have void return type as long as the return
4606 value of the other is passed in a register. */
4607 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4608 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4610 if (STACK_REG_P (a) || STACK_REG_P (b))
4612 if (!rtx_equal_p (a, b))
4615 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4617 else if (!rtx_equal_p (a, b))
4622 /* The SYSV ABI has more call-clobbered registers;
4623 disallow sibcalls from MS to SYSV. */
4624 if (cfun->machine->call_abi == MS_ABI
4625 && ix86_function_type_abi (type) == SYSV_ABI)
4630 /* If this call is indirect, we'll need to be able to use a
4631 call-clobbered register for the address of the target function.
4632 Make sure that all such registers are not used for passing
4633 parameters. Note that DLLIMPORT functions are indirect. */
4635 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4637 if (ix86_function_regparm (type, NULL) >= 3)
4639 /* ??? Need to count the actual number of registers to be used,
4640 not the possible number of registers. Fix later. */
4646 /* Otherwise okay. That also includes certain types of indirect calls. */
4650 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4651 and "sseregparm" calling convention attributes;
4652 arguments as in struct attribute_spec.handler. */
4655 ix86_handle_cconv_attribute (tree *node, tree name,
4657 int flags ATTRIBUTE_UNUSED,
4660 if (TREE_CODE (*node) != FUNCTION_TYPE
4661 && TREE_CODE (*node) != METHOD_TYPE
4662 && TREE_CODE (*node) != FIELD_DECL
4663 && TREE_CODE (*node) != TYPE_DECL)
4665 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4667 *no_add_attrs = true;
4671 /* Can combine regparm with all attributes but fastcall. */
4672 if (is_attribute_p ("regparm", name))
4676 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4678 error ("fastcall and regparm attributes are not compatible");
4681 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4683 error ("regparam and thiscall attributes are not compatible");
4686 cst = TREE_VALUE (args);
4687 if (TREE_CODE (cst) != INTEGER_CST)
4689 warning (OPT_Wattributes,
4690 "%qE attribute requires an integer constant argument",
4692 *no_add_attrs = true;
4694 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4696 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4698 *no_add_attrs = true;
4706 /* Do not warn when emulating the MS ABI. */
4707 if ((TREE_CODE (*node) != FUNCTION_TYPE
4708 && TREE_CODE (*node) != METHOD_TYPE)
4709 || ix86_function_type_abi (*node) != MS_ABI)
4710 warning (OPT_Wattributes, "%qE attribute ignored",
4712 *no_add_attrs = true;
4716 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4717 if (is_attribute_p ("fastcall", name))
4719 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4721 error ("fastcall and cdecl attributes are not compatible");
4723 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4725 error ("fastcall and stdcall attributes are not compatible");
4727 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4729 error ("fastcall and regparm attributes are not compatible");
4731 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4733 error ("fastcall and thiscall attributes are not compatible");
4737 /* Can combine stdcall with fastcall (redundant), regparm and
4739 else if (is_attribute_p ("stdcall", name))
4741 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4743 error ("stdcall and cdecl attributes are not compatible");
4745 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4747 error ("stdcall and fastcall attributes are not compatible");
4749 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4751 error ("stdcall and thiscall attributes are not compatible");
4755 /* Can combine cdecl with regparm and sseregparm. */
4756 else if (is_attribute_p ("cdecl", name))
4758 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4760 error ("stdcall and cdecl attributes are not compatible");
4762 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4764 error ("fastcall and cdecl attributes are not compatible");
4766 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4768 error ("cdecl and thiscall attributes are not compatible");
4771 else if (is_attribute_p ("thiscall", name))
4773 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4774 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4776 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4778 error ("stdcall and thiscall attributes are not compatible");
4780 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4782 error ("fastcall and thiscall attributes are not compatible");
4784 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4786 error ("cdecl and thiscall attributes are not compatible");
4790 /* Can combine sseregparm with all attributes. */
4795 /* Return 0 if the attributes for two types are incompatible, 1 if they
4796 are compatible, and 2 if they are nearly compatible (which causes a
4797 warning to be generated). */
4800 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4802 /* Check for mismatch of non-default calling convention. */
4803 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4805 if (TREE_CODE (type1) != FUNCTION_TYPE
4806 && TREE_CODE (type1) != METHOD_TYPE)
4809 /* Check for mismatched fastcall/regparm types. */
4810 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4811 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4812 || (ix86_function_regparm (type1, NULL)
4813 != ix86_function_regparm (type2, NULL)))
4816 /* Check for mismatched sseregparm types. */
4817 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4818 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4821 /* Check for mismatched thiscall types. */
4822 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4823 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4826 /* Check for mismatched return types (cdecl vs stdcall). */
4827 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4828 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4834 /* Return the regparm value for a function with the indicated TYPE and DECL.
4835 DECL may be NULL when calling function indirectly
4836 or considering a libcall. */
4839 ix86_function_regparm (const_tree type, const_tree decl)
4845 return (ix86_function_type_abi (type) == SYSV_ABI
4846 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4848 regparm = ix86_regparm;
4849 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4852 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4856 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4859 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4862 /* Use register calling convention for local functions when possible. */
4864 && TREE_CODE (decl) == FUNCTION_DECL
4866 && !(profile_flag && !flag_fentry))
4868 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4869 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4872 int local_regparm, globals = 0, regno;
4874 /* Make sure no regparm register is taken by a
4875 fixed register variable. */
4876 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4877 if (fixed_regs[local_regparm])
4880 /* We don't want to use regparm(3) for nested functions as
4881 these use a static chain pointer in the third argument. */
4882 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4885 /* Each fixed register usage increases register pressure,
4886 so less registers should be used for argument passing.
4887 This functionality can be overriden by an explicit
4889 for (regno = 0; regno <= DI_REG; regno++)
4890 if (fixed_regs[regno])
4894 = globals < local_regparm ? local_regparm - globals : 0;
4896 if (local_regparm > regparm)
4897 regparm = local_regparm;
4904 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4905 DFmode (2) arguments in SSE registers for a function with the
4906 indicated TYPE and DECL. DECL may be NULL when calling function
4907 indirectly or considering a libcall. Otherwise return 0. */
4910 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4912 gcc_assert (!TARGET_64BIT);
4914 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4915 by the sseregparm attribute. */
4916 if (TARGET_SSEREGPARM
4917 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4924 error ("Calling %qD with attribute sseregparm without "
4925 "SSE/SSE2 enabled", decl);
4927 error ("Calling %qT with attribute sseregparm without "
4928 "SSE/SSE2 enabled", type);
4936 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4937 (and DFmode for SSE2) arguments in SSE registers. */
4938 if (decl && TARGET_SSE_MATH && optimize
4939 && !(profile_flag && !flag_fentry))
4941 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4942 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4944 return TARGET_SSE2 ? 2 : 1;
4950 /* Return true if EAX is live at the start of the function. Used by
4951 ix86_expand_prologue to determine if we need special help before
4952 calling allocate_stack_worker. */
4955 ix86_eax_live_at_start_p (void)
4957 /* Cheat. Don't bother working forward from ix86_function_regparm
4958 to the function type to whether an actual argument is located in
4959 eax. Instead just look at cfg info, which is still close enough
4960 to correct at this point. This gives false positives for broken
4961 functions that might use uninitialized data that happens to be
4962 allocated in eax, but who cares? */
4963 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4966 /* Value is the number of bytes of arguments automatically
4967 popped when returning from a subroutine call.
4968 FUNDECL is the declaration node of the function (as a tree),
4969 FUNTYPE is the data type of the function (as a tree),
4970 or for a library call it is an identifier node for the subroutine name.
4971 SIZE is the number of bytes of arguments passed on the stack.
4973 On the 80386, the RTD insn may be used to pop them if the number
4974 of args is fixed, but if the number is variable then the caller
4975 must pop them all. RTD can't be used for library calls now
4976 because the library is compiled with the Unix compiler.
4977 Use of RTD is a selectable option, since it is incompatible with
4978 standard Unix calling sequences. If the option is not selected,
4979 the caller must always pop the args.
4981 The attribute stdcall is equivalent to RTD on a per module basis. */
4984 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4988 /* None of the 64-bit ABIs pop arguments. */
4992 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4994 /* Cdecl functions override -mrtd, and never pop the stack. */
4995 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4997 /* Stdcall and fastcall functions will pop the stack if not
4999 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
5000 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
5001 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5004 if (rtd && ! stdarg_p (funtype))
5008 /* Lose any fake structure return argument if it is passed on the stack. */
5009 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5010 && !KEEP_AGGREGATE_RETURN_POINTER)
5012 int nregs = ix86_function_regparm (funtype, fundecl);
5014 return GET_MODE_SIZE (Pmode);
5020 /* Argument support functions. */
5022 /* Return true when register may be used to pass function parameters. */
5024 ix86_function_arg_regno_p (int regno)
5027 const int *parm_regs;
5032 return (regno < REGPARM_MAX
5033 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5035 return (regno < REGPARM_MAX
5036 || (TARGET_MMX && MMX_REGNO_P (regno)
5037 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5038 || (TARGET_SSE && SSE_REGNO_P (regno)
5039 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5044 if (SSE_REGNO_P (regno) && TARGET_SSE)
5049 if (TARGET_SSE && SSE_REGNO_P (regno)
5050 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5054 /* TODO: The function should depend on current function ABI but
5055 builtins.c would need updating then. Therefore we use the
5058 /* RAX is used as hidden argument to va_arg functions. */
5059 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5062 if (ix86_abi == MS_ABI)
5063 parm_regs = x86_64_ms_abi_int_parameter_registers;
5065 parm_regs = x86_64_int_parameter_registers;
5066 for (i = 0; i < (ix86_abi == MS_ABI
5067 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5068 if (regno == parm_regs[i])
5073 /* Return if we do not know how to pass TYPE solely in registers. */
5076 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5078 if (must_pass_in_stack_var_size_or_pad (mode, type))
5081 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5082 The layout_type routine is crafty and tries to trick us into passing
5083 currently unsupported vector types on the stack by using TImode. */
5084 return (!TARGET_64BIT && mode == TImode
5085 && type && TREE_CODE (type) != VECTOR_TYPE);
5088 /* It returns the size, in bytes, of the area reserved for arguments passed
5089 in registers for the function represented by fndecl dependent to the used
5092 ix86_reg_parm_stack_space (const_tree fndecl)
5094 enum calling_abi call_abi = SYSV_ABI;
5095 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5096 call_abi = ix86_function_abi (fndecl);
5098 call_abi = ix86_function_type_abi (fndecl);
5099 if (call_abi == MS_ABI)
5104 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5107 ix86_function_type_abi (const_tree fntype)
5109 if (TARGET_64BIT && fntype != NULL)
5111 enum calling_abi abi = ix86_abi;
5112 if (abi == SYSV_ABI)
5114 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5117 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5125 ix86_function_ms_hook_prologue (const_tree fn)
5127 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5129 if (decl_function_context (fn) != NULL_TREE)
5130 error_at (DECL_SOURCE_LOCATION (fn),
5131 "ms_hook_prologue is not compatible with nested function");
5138 static enum calling_abi
5139 ix86_function_abi (const_tree fndecl)
5143 return ix86_function_type_abi (TREE_TYPE (fndecl));
5146 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5149 ix86_cfun_abi (void)
5151 if (! cfun || ! TARGET_64BIT)
5153 return cfun->machine->call_abi;
5156 /* Write the extra assembler code needed to declare a function properly. */
5159 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5162 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5166 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5167 unsigned int filler_cc = 0xcccccccc;
5169 for (i = 0; i < filler_count; i += 4)
5170 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5173 ASM_OUTPUT_LABEL (asm_out_file, fname);
5175 /* Output magic byte marker, if hot-patch attribute is set. */
5180 /* leaq [%rsp + 0], %rsp */
5181 asm_fprintf (asm_out_file, ASM_BYTE
5182 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5186 /* movl.s %edi, %edi
5188 movl.s %esp, %ebp */
5189 asm_fprintf (asm_out_file, ASM_BYTE
5190 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5196 extern void init_regs (void);
5198 /* Implementation of call abi switching target hook. Specific to FNDECL
5199 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5200 for more details. */
5202 ix86_call_abi_override (const_tree fndecl)
5204 if (fndecl == NULL_TREE)
5205 cfun->machine->call_abi = ix86_abi;
5207 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5210 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5211 re-initialization of init_regs each time we switch function context since
5212 this is needed only during RTL expansion. */
5214 ix86_maybe_switch_abi (void)
5217 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5221 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5222 for a call to a function whose data type is FNTYPE.
5223 For a library call, FNTYPE is 0. */
5226 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5227 tree fntype, /* tree ptr for function decl */
5228 rtx libname, /* SYMBOL_REF of library name or 0 */
5231 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5232 memset (cum, 0, sizeof (*cum));
5235 cum->call_abi = ix86_function_abi (fndecl);
5237 cum->call_abi = ix86_function_type_abi (fntype);
5238 /* Set up the number of registers to use for passing arguments. */
5240 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5241 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5242 "or subtarget optimization implying it");
5243 cum->nregs = ix86_regparm;
5246 cum->nregs = (cum->call_abi == SYSV_ABI
5247 ? X86_64_REGPARM_MAX
5248 : X86_64_MS_REGPARM_MAX);
5252 cum->sse_nregs = SSE_REGPARM_MAX;
5255 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5256 ? X86_64_SSE_REGPARM_MAX
5257 : X86_64_MS_SSE_REGPARM_MAX);
5261 cum->mmx_nregs = MMX_REGPARM_MAX;
5262 cum->warn_avx = true;
5263 cum->warn_sse = true;
5264 cum->warn_mmx = true;
5266 /* Because type might mismatch in between caller and callee, we need to
5267 use actual type of function for local calls.
5268 FIXME: cgraph_analyze can be told to actually record if function uses
5269 va_start so for local functions maybe_vaarg can be made aggressive
5271 FIXME: once typesytem is fixed, we won't need this code anymore. */
5273 fntype = TREE_TYPE (fndecl);
5274 cum->maybe_vaarg = (fntype
5275 ? (!prototype_p (fntype) || stdarg_p (fntype))
5280 /* If there are variable arguments, then we won't pass anything
5281 in registers in 32-bit mode. */
5282 if (stdarg_p (fntype))
5293 /* Use ecx and edx registers if function has fastcall attribute,
5294 else look for regparm information. */
5297 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5300 cum->fastcall = 1; /* Same first register as in fastcall. */
5302 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5308 cum->nregs = ix86_function_regparm (fntype, fndecl);
5311 /* Set up the number of SSE registers used for passing SFmode
5312 and DFmode arguments. Warn for mismatching ABI. */
5313 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5317 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5318 But in the case of vector types, it is some vector mode.
5320 When we have only some of our vector isa extensions enabled, then there
5321 are some modes for which vector_mode_supported_p is false. For these
5322 modes, the generic vector support in gcc will choose some non-vector mode
5323 in order to implement the type. By computing the natural mode, we'll
5324 select the proper ABI location for the operand and not depend on whatever
5325 the middle-end decides to do with these vector types.
5327 The midde-end can't deal with the vector types > 16 bytes. In this
5328 case, we return the original mode and warn ABI change if CUM isn't
5331 static enum machine_mode
5332 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5334 enum machine_mode mode = TYPE_MODE (type);
5336 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5338 HOST_WIDE_INT size = int_size_in_bytes (type);
5339 if ((size == 8 || size == 16 || size == 32)
5340 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5341 && TYPE_VECTOR_SUBPARTS (type) > 1)
5343 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5345 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5346 mode = MIN_MODE_VECTOR_FLOAT;
5348 mode = MIN_MODE_VECTOR_INT;
5350 /* Get the mode which has this inner mode and number of units. */
5351 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5352 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5353 && GET_MODE_INNER (mode) == innermode)
5355 if (size == 32 && !TARGET_AVX)
5357 static bool warnedavx;
5364 warning (0, "AVX vector argument without AVX "
5365 "enabled changes the ABI");
5367 return TYPE_MODE (type);
5380 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5381 this may not agree with the mode that the type system has chosen for the
5382 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5383 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5386 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5391 if (orig_mode != BLKmode)
5392 tmp = gen_rtx_REG (orig_mode, regno);
5395 tmp = gen_rtx_REG (mode, regno);
5396 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5397 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5403 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5404 of this code is to classify each 8bytes of incoming argument by the register
5405 class and assign registers accordingly. */
5407 /* Return the union class of CLASS1 and CLASS2.
5408 See the x86-64 PS ABI for details. */
5410 static enum x86_64_reg_class
5411 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5413 /* Rule #1: If both classes are equal, this is the resulting class. */
5414 if (class1 == class2)
5417 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5419 if (class1 == X86_64_NO_CLASS)
5421 if (class2 == X86_64_NO_CLASS)
5424 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5425 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5426 return X86_64_MEMORY_CLASS;
5428 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5429 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5430 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5431 return X86_64_INTEGERSI_CLASS;
5432 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5433 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5434 return X86_64_INTEGER_CLASS;
5436 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5438 if (class1 == X86_64_X87_CLASS
5439 || class1 == X86_64_X87UP_CLASS
5440 || class1 == X86_64_COMPLEX_X87_CLASS
5441 || class2 == X86_64_X87_CLASS
5442 || class2 == X86_64_X87UP_CLASS
5443 || class2 == X86_64_COMPLEX_X87_CLASS)
5444 return X86_64_MEMORY_CLASS;
5446 /* Rule #6: Otherwise class SSE is used. */
5447 return X86_64_SSE_CLASS;
5450 /* Classify the argument of type TYPE and mode MODE.
5451 CLASSES will be filled by the register class used to pass each word
5452 of the operand. The number of words is returned. In case the parameter
5453 should be passed in memory, 0 is returned. As a special case for zero
5454 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5456 BIT_OFFSET is used internally for handling records and specifies offset
5457 of the offset in bits modulo 256 to avoid overflow cases.
5459 See the x86-64 PS ABI for details.
5463 classify_argument (enum machine_mode mode, const_tree type,
5464 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5466 HOST_WIDE_INT bytes =
5467 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5468 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5470 /* Variable sized entities are always passed/returned in memory. */
5474 if (mode != VOIDmode
5475 && targetm.calls.must_pass_in_stack (mode, type))
5478 if (type && AGGREGATE_TYPE_P (type))
5482 enum x86_64_reg_class subclasses[MAX_CLASSES];
5484 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5488 for (i = 0; i < words; i++)
5489 classes[i] = X86_64_NO_CLASS;
5491 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5492 signalize memory class, so handle it as special case. */
5495 classes[0] = X86_64_NO_CLASS;
5499 /* Classify each field of record and merge classes. */
5500 switch (TREE_CODE (type))
5503 /* And now merge the fields of structure. */
5504 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5506 if (TREE_CODE (field) == FIELD_DECL)
5510 if (TREE_TYPE (field) == error_mark_node)
5513 /* Bitfields are always classified as integer. Handle them
5514 early, since later code would consider them to be
5515 misaligned integers. */
5516 if (DECL_BIT_FIELD (field))
5518 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5519 i < ((int_bit_position (field) + (bit_offset % 64))
5520 + tree_low_cst (DECL_SIZE (field), 0)
5523 merge_classes (X86_64_INTEGER_CLASS,
5530 type = TREE_TYPE (field);
5532 /* Flexible array member is ignored. */
5533 if (TYPE_MODE (type) == BLKmode
5534 && TREE_CODE (type) == ARRAY_TYPE
5535 && TYPE_SIZE (type) == NULL_TREE
5536 && TYPE_DOMAIN (type) != NULL_TREE
5537 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5542 if (!warned && warn_psabi)
5545 inform (input_location,
5546 "The ABI of passing struct with"
5547 " a flexible array member has"
5548 " changed in GCC 4.4");
5552 num = classify_argument (TYPE_MODE (type), type,
5554 (int_bit_position (field)
5555 + bit_offset) % 256);
5558 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5559 for (i = 0; i < num && (i + pos) < words; i++)
5561 merge_classes (subclasses[i], classes[i + pos]);
5568 /* Arrays are handled as small records. */
5571 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5572 TREE_TYPE (type), subclasses, bit_offset);
5576 /* The partial classes are now full classes. */
5577 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5578 subclasses[0] = X86_64_SSE_CLASS;
5579 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5580 && !((bit_offset % 64) == 0 && bytes == 4))
5581 subclasses[0] = X86_64_INTEGER_CLASS;
5583 for (i = 0; i < words; i++)
5584 classes[i] = subclasses[i % num];
5589 case QUAL_UNION_TYPE:
5590 /* Unions are similar to RECORD_TYPE but offset is always 0.
5592 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5594 if (TREE_CODE (field) == FIELD_DECL)
5598 if (TREE_TYPE (field) == error_mark_node)
5601 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5602 TREE_TYPE (field), subclasses,
5606 for (i = 0; i < num; i++)
5607 classes[i] = merge_classes (subclasses[i], classes[i]);
5618 /* When size > 16 bytes, if the first one isn't
5619 X86_64_SSE_CLASS or any other ones aren't
5620 X86_64_SSEUP_CLASS, everything should be passed in
5622 if (classes[0] != X86_64_SSE_CLASS)
5625 for (i = 1; i < words; i++)
5626 if (classes[i] != X86_64_SSEUP_CLASS)
5630 /* Final merger cleanup. */
5631 for (i = 0; i < words; i++)
5633 /* If one class is MEMORY, everything should be passed in
5635 if (classes[i] == X86_64_MEMORY_CLASS)
5638 /* The X86_64_SSEUP_CLASS should be always preceded by
5639 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5640 if (classes[i] == X86_64_SSEUP_CLASS
5641 && classes[i - 1] != X86_64_SSE_CLASS
5642 && classes[i - 1] != X86_64_SSEUP_CLASS)
5644 /* The first one should never be X86_64_SSEUP_CLASS. */
5645 gcc_assert (i != 0);
5646 classes[i] = X86_64_SSE_CLASS;
5649 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5650 everything should be passed in memory. */
5651 if (classes[i] == X86_64_X87UP_CLASS
5652 && (classes[i - 1] != X86_64_X87_CLASS))
5656 /* The first one should never be X86_64_X87UP_CLASS. */
5657 gcc_assert (i != 0);
5658 if (!warned && warn_psabi)
5661 inform (input_location,
5662 "The ABI of passing union with long double"
5663 " has changed in GCC 4.4");
5671 /* Compute alignment needed. We align all types to natural boundaries with
5672 exception of XFmode that is aligned to 64bits. */
5673 if (mode != VOIDmode && mode != BLKmode)
5675 int mode_alignment = GET_MODE_BITSIZE (mode);
5678 mode_alignment = 128;
5679 else if (mode == XCmode)
5680 mode_alignment = 256;
5681 if (COMPLEX_MODE_P (mode))
5682 mode_alignment /= 2;
5683 /* Misaligned fields are always returned in memory. */
5684 if (bit_offset % mode_alignment)
5688 /* for V1xx modes, just use the base mode */
5689 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5690 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5691 mode = GET_MODE_INNER (mode);
5693 /* Classification of atomic types. */
5698 classes[0] = X86_64_SSE_CLASS;
5701 classes[0] = X86_64_SSE_CLASS;
5702 classes[1] = X86_64_SSEUP_CLASS;
5712 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5716 classes[0] = X86_64_INTEGERSI_CLASS;
5719 else if (size <= 64)
5721 classes[0] = X86_64_INTEGER_CLASS;
5724 else if (size <= 64+32)
5726 classes[0] = X86_64_INTEGER_CLASS;
5727 classes[1] = X86_64_INTEGERSI_CLASS;
5730 else if (size <= 64+64)
5732 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5740 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5744 /* OImode shouldn't be used directly. */
5749 if (!(bit_offset % 64))
5750 classes[0] = X86_64_SSESF_CLASS;
5752 classes[0] = X86_64_SSE_CLASS;
5755 classes[0] = X86_64_SSEDF_CLASS;
5758 classes[0] = X86_64_X87_CLASS;
5759 classes[1] = X86_64_X87UP_CLASS;
5762 classes[0] = X86_64_SSE_CLASS;
5763 classes[1] = X86_64_SSEUP_CLASS;
5766 classes[0] = X86_64_SSE_CLASS;
5767 if (!(bit_offset % 64))
5773 if (!warned && warn_psabi)
5776 inform (input_location,
5777 "The ABI of passing structure with complex float"
5778 " member has changed in GCC 4.4");
5780 classes[1] = X86_64_SSESF_CLASS;
5784 classes[0] = X86_64_SSEDF_CLASS;
5785 classes[1] = X86_64_SSEDF_CLASS;
5788 classes[0] = X86_64_COMPLEX_X87_CLASS;
5791 /* This modes is larger than 16 bytes. */
5799 classes[0] = X86_64_SSE_CLASS;
5800 classes[1] = X86_64_SSEUP_CLASS;
5801 classes[2] = X86_64_SSEUP_CLASS;
5802 classes[3] = X86_64_SSEUP_CLASS;
5810 classes[0] = X86_64_SSE_CLASS;
5811 classes[1] = X86_64_SSEUP_CLASS;
5819 classes[0] = X86_64_SSE_CLASS;
5825 gcc_assert (VECTOR_MODE_P (mode));
5830 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5832 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5833 classes[0] = X86_64_INTEGERSI_CLASS;
5835 classes[0] = X86_64_INTEGER_CLASS;
5836 classes[1] = X86_64_INTEGER_CLASS;
5837 return 1 + (bytes > 8);
5841 /* Examine the argument and return set number of register required in each
5842 class. Return 0 iff parameter should be passed in memory. */
5844 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5845 int *int_nregs, int *sse_nregs)
5847 enum x86_64_reg_class regclass[MAX_CLASSES];
5848 int n = classify_argument (mode, type, regclass, 0);
5854 for (n--; n >= 0; n--)
5855 switch (regclass[n])
5857 case X86_64_INTEGER_CLASS:
5858 case X86_64_INTEGERSI_CLASS:
5861 case X86_64_SSE_CLASS:
5862 case X86_64_SSESF_CLASS:
5863 case X86_64_SSEDF_CLASS:
5866 case X86_64_NO_CLASS:
5867 case X86_64_SSEUP_CLASS:
5869 case X86_64_X87_CLASS:
5870 case X86_64_X87UP_CLASS:
5874 case X86_64_COMPLEX_X87_CLASS:
5875 return in_return ? 2 : 0;
5876 case X86_64_MEMORY_CLASS:
5882 /* Construct container for the argument used by GCC interface. See
5883 FUNCTION_ARG for the detailed description. */
5886 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5887 const_tree type, int in_return, int nintregs, int nsseregs,
5888 const int *intreg, int sse_regno)
5890 /* The following variables hold the static issued_error state. */
5891 static bool issued_sse_arg_error;
5892 static bool issued_sse_ret_error;
5893 static bool issued_x87_ret_error;
5895 enum machine_mode tmpmode;
5897 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5898 enum x86_64_reg_class regclass[MAX_CLASSES];
5902 int needed_sseregs, needed_intregs;
5903 rtx exp[MAX_CLASSES];
5906 n = classify_argument (mode, type, regclass, 0);
5909 if (!examine_argument (mode, type, in_return, &needed_intregs,
5912 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5915 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5916 some less clueful developer tries to use floating-point anyway. */
5917 if (needed_sseregs && !TARGET_SSE)
5921 if (!issued_sse_ret_error)
5923 error ("SSE register return with SSE disabled");
5924 issued_sse_ret_error = true;
5927 else if (!issued_sse_arg_error)
5929 error ("SSE register argument with SSE disabled");
5930 issued_sse_arg_error = true;
5935 /* Likewise, error if the ABI requires us to return values in the
5936 x87 registers and the user specified -mno-80387. */
5937 if (!TARGET_80387 && in_return)
5938 for (i = 0; i < n; i++)
5939 if (regclass[i] == X86_64_X87_CLASS
5940 || regclass[i] == X86_64_X87UP_CLASS
5941 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5943 if (!issued_x87_ret_error)
5945 error ("x87 register return with x87 disabled");
5946 issued_x87_ret_error = true;
5951 /* First construct simple cases. Avoid SCmode, since we want to use
5952 single register to pass this type. */
5953 if (n == 1 && mode != SCmode)
5954 switch (regclass[0])
5956 case X86_64_INTEGER_CLASS:
5957 case X86_64_INTEGERSI_CLASS:
5958 return gen_rtx_REG (mode, intreg[0]);
5959 case X86_64_SSE_CLASS:
5960 case X86_64_SSESF_CLASS:
5961 case X86_64_SSEDF_CLASS:
5962 if (mode != BLKmode)
5963 return gen_reg_or_parallel (mode, orig_mode,
5964 SSE_REGNO (sse_regno));
5966 case X86_64_X87_CLASS:
5967 case X86_64_COMPLEX_X87_CLASS:
5968 return gen_rtx_REG (mode, FIRST_STACK_REG);
5969 case X86_64_NO_CLASS:
5970 /* Zero sized array, struct or class. */
5975 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5976 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5977 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5979 && regclass[0] == X86_64_SSE_CLASS
5980 && regclass[1] == X86_64_SSEUP_CLASS
5981 && regclass[2] == X86_64_SSEUP_CLASS
5982 && regclass[3] == X86_64_SSEUP_CLASS
5984 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5987 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5988 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5989 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5990 && regclass[1] == X86_64_INTEGER_CLASS
5991 && (mode == CDImode || mode == TImode || mode == TFmode)
5992 && intreg[0] + 1 == intreg[1])
5993 return gen_rtx_REG (mode, intreg[0]);
5995 /* Otherwise figure out the entries of the PARALLEL. */
5996 for (i = 0; i < n; i++)
6000 switch (regclass[i])
6002 case X86_64_NO_CLASS:
6004 case X86_64_INTEGER_CLASS:
6005 case X86_64_INTEGERSI_CLASS:
6006 /* Merge TImodes on aligned occasions here too. */
6007 if (i * 8 + 8 > bytes)
6008 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6009 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6013 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6014 if (tmpmode == BLKmode)
6016 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6017 gen_rtx_REG (tmpmode, *intreg),
6021 case X86_64_SSESF_CLASS:
6022 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6023 gen_rtx_REG (SFmode,
6024 SSE_REGNO (sse_regno)),
6028 case X86_64_SSEDF_CLASS:
6029 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6030 gen_rtx_REG (DFmode,
6031 SSE_REGNO (sse_regno)),
6035 case X86_64_SSE_CLASS:
6043 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6053 && regclass[1] == X86_64_SSEUP_CLASS
6054 && regclass[2] == X86_64_SSEUP_CLASS
6055 && regclass[3] == X86_64_SSEUP_CLASS);
6062 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6063 gen_rtx_REG (tmpmode,
6064 SSE_REGNO (sse_regno)),
6073 /* Empty aligned struct, union or class. */
6077 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6078 for (i = 0; i < nexps; i++)
6079 XVECEXP (ret, 0, i) = exp [i];
6083 /* Update the data in CUM to advance over an argument of mode MODE
6084 and data type TYPE. (TYPE is null for libcalls where that information
6085 may not be available.) */
6088 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6089 const_tree type, HOST_WIDE_INT bytes,
6090 HOST_WIDE_INT words)
6106 cum->words += words;
6107 cum->nregs -= words;
6108 cum->regno += words;
6110 if (cum->nregs <= 0)
6118 /* OImode shouldn't be used directly. */
6122 if (cum->float_in_sse < 2)
6125 if (cum->float_in_sse < 1)
6142 if (!type || !AGGREGATE_TYPE_P (type))
6144 cum->sse_words += words;
6145 cum->sse_nregs -= 1;
6146 cum->sse_regno += 1;
6147 if (cum->sse_nregs <= 0)
6161 if (!type || !AGGREGATE_TYPE_P (type))
6163 cum->mmx_words += words;
6164 cum->mmx_nregs -= 1;
6165 cum->mmx_regno += 1;
6166 if (cum->mmx_nregs <= 0)
6177 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6178 const_tree type, HOST_WIDE_INT words, bool named)
6180 int int_nregs, sse_nregs;
6182 /* Unnamed 256bit vector mode parameters are passed on stack. */
6183 if (!named && VALID_AVX256_REG_MODE (mode))
6186 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6187 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6189 cum->nregs -= int_nregs;
6190 cum->sse_nregs -= sse_nregs;
6191 cum->regno += int_nregs;
6192 cum->sse_regno += sse_nregs;
6196 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6197 cum->words = (cum->words + align - 1) & ~(align - 1);
6198 cum->words += words;
6203 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6204 HOST_WIDE_INT words)
6206 /* Otherwise, this should be passed indirect. */
6207 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6209 cum->words += words;
6217 /* Update the data in CUM to advance over an argument of mode MODE and
6218 data type TYPE. (TYPE is null for libcalls where that information
6219 may not be available.) */
6222 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6223 const_tree type, bool named)
6225 HOST_WIDE_INT bytes, words;
6227 if (mode == BLKmode)
6228 bytes = int_size_in_bytes (type);
6230 bytes = GET_MODE_SIZE (mode);
6231 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6234 mode = type_natural_mode (type, NULL);
6236 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6237 function_arg_advance_ms_64 (cum, bytes, words);
6238 else if (TARGET_64BIT)
6239 function_arg_advance_64 (cum, mode, type, words, named);
6241 function_arg_advance_32 (cum, mode, type, bytes, words);
6244 /* Define where to put the arguments to a function.
6245 Value is zero to push the argument on the stack,
6246 or a hard register in which to store the argument.
6248 MODE is the argument's machine mode.
6249 TYPE is the data type of the argument (as a tree).
6250 This is null for libcalls where that information may
6252 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6253 the preceding args and about the function being called.
6254 NAMED is nonzero if this argument is a named parameter
6255 (otherwise it is an extra parameter matching an ellipsis). */
6258 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6259 enum machine_mode orig_mode, const_tree type,
6260 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6262 static bool warnedsse, warnedmmx;
6264 /* Avoid the AL settings for the Unix64 ABI. */
6265 if (mode == VOIDmode)
6281 if (words <= cum->nregs)
6283 int regno = cum->regno;
6285 /* Fastcall allocates the first two DWORD (SImode) or
6286 smaller arguments to ECX and EDX if it isn't an
6292 || (type && AGGREGATE_TYPE_P (type)))
6295 /* ECX not EAX is the first allocated register. */
6296 if (regno == AX_REG)
6299 return gen_rtx_REG (mode, regno);
6304 if (cum->float_in_sse < 2)
6307 if (cum->float_in_sse < 1)
6311 /* In 32bit, we pass TImode in xmm registers. */
6318 if (!type || !AGGREGATE_TYPE_P (type))
6320 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6323 warning (0, "SSE vector argument without SSE enabled "
6327 return gen_reg_or_parallel (mode, orig_mode,
6328 cum->sse_regno + FIRST_SSE_REG);
6333 /* OImode shouldn't be used directly. */
6342 if (!type || !AGGREGATE_TYPE_P (type))
6345 return gen_reg_or_parallel (mode, orig_mode,
6346 cum->sse_regno + FIRST_SSE_REG);
6356 if (!type || !AGGREGATE_TYPE_P (type))
6358 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6361 warning (0, "MMX vector argument without MMX enabled "
6365 return gen_reg_or_parallel (mode, orig_mode,
6366 cum->mmx_regno + FIRST_MMX_REG);
6375 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6376 enum machine_mode orig_mode, const_tree type, bool named)
6378 /* Handle a hidden AL argument containing number of registers
6379 for varargs x86-64 functions. */
6380 if (mode == VOIDmode)
6381 return GEN_INT (cum->maybe_vaarg
6382 ? (cum->sse_nregs < 0
6383 ? X86_64_SSE_REGPARM_MAX
6398 /* Unnamed 256bit vector mode parameters are passed on stack. */
6404 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6406 &x86_64_int_parameter_registers [cum->regno],
6411 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6412 enum machine_mode orig_mode, bool named,
6413 HOST_WIDE_INT bytes)
6417 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6418 We use value of -2 to specify that current function call is MSABI. */
6419 if (mode == VOIDmode)
6420 return GEN_INT (-2);
6422 /* If we've run out of registers, it goes on the stack. */
6423 if (cum->nregs == 0)
6426 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6428 /* Only floating point modes are passed in anything but integer regs. */
6429 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6432 regno = cum->regno + FIRST_SSE_REG;
6437 /* Unnamed floating parameters are passed in both the
6438 SSE and integer registers. */
6439 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6440 t2 = gen_rtx_REG (mode, regno);
6441 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6442 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6443 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6446 /* Handle aggregated types passed in register. */
6447 if (orig_mode == BLKmode)
6449 if (bytes > 0 && bytes <= 8)
6450 mode = (bytes > 4 ? DImode : SImode);
6451 if (mode == BLKmode)
6455 return gen_reg_or_parallel (mode, orig_mode, regno);
6458 /* Return where to put the arguments to a function.
6459 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6461 MODE is the argument's machine mode. TYPE is the data type of the
6462 argument. It is null for libcalls where that information may not be
6463 available. CUM gives information about the preceding args and about
6464 the function being called. NAMED is nonzero if this argument is a
6465 named parameter (otherwise it is an extra parameter matching an
6469 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6470 const_tree type, bool named)
6472 enum machine_mode mode = omode;
6473 HOST_WIDE_INT bytes, words;
6475 if (mode == BLKmode)
6476 bytes = int_size_in_bytes (type);
6478 bytes = GET_MODE_SIZE (mode);
6479 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6481 /* To simplify the code below, represent vector types with a vector mode
6482 even if MMX/SSE are not active. */
6483 if (type && TREE_CODE (type) == VECTOR_TYPE)
6484 mode = type_natural_mode (type, cum);
6486 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6487 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6488 else if (TARGET_64BIT)
6489 return function_arg_64 (cum, mode, omode, type, named);
6491 return function_arg_32 (cum, mode, omode, type, bytes, words);
6494 /* A C expression that indicates when an argument must be passed by
6495 reference. If nonzero for an argument, a copy of that argument is
6496 made in memory and a pointer to the argument is passed instead of
6497 the argument itself. The pointer is passed in whatever way is
6498 appropriate for passing a pointer to that type. */
6501 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6502 enum machine_mode mode ATTRIBUTE_UNUSED,
6503 const_tree type, bool named ATTRIBUTE_UNUSED)
6505 /* See Windows x64 Software Convention. */
6506 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6508 int msize = (int) GET_MODE_SIZE (mode);
6511 /* Arrays are passed by reference. */
6512 if (TREE_CODE (type) == ARRAY_TYPE)
6515 if (AGGREGATE_TYPE_P (type))
6517 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6518 are passed by reference. */
6519 msize = int_size_in_bytes (type);
6523 /* __m128 is passed by reference. */
6525 case 1: case 2: case 4: case 8:
6531 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6537 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6540 contains_aligned_value_p (const_tree type)
6542 enum machine_mode mode = TYPE_MODE (type);
6543 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6547 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6549 if (TYPE_ALIGN (type) < 128)
6552 if (AGGREGATE_TYPE_P (type))
6554 /* Walk the aggregates recursively. */
6555 switch (TREE_CODE (type))
6559 case QUAL_UNION_TYPE:
6563 /* Walk all the structure fields. */
6564 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6566 if (TREE_CODE (field) == FIELD_DECL
6567 && contains_aligned_value_p (TREE_TYPE (field)))
6574 /* Just for use if some languages passes arrays by value. */
6575 if (contains_aligned_value_p (TREE_TYPE (type)))
6586 /* Gives the alignment boundary, in bits, of an argument with the
6587 specified mode and type. */
6590 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6595 /* Since the main variant type is used for call, we convert it to
6596 the main variant type. */
6597 type = TYPE_MAIN_VARIANT (type);
6598 align = TYPE_ALIGN (type);
6601 align = GET_MODE_ALIGNMENT (mode);
6602 if (align < PARM_BOUNDARY)
6603 align = PARM_BOUNDARY;
6604 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6605 natural boundaries. */
6606 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6608 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6609 make an exception for SSE modes since these require 128bit
6612 The handling here differs from field_alignment. ICC aligns MMX
6613 arguments to 4 byte boundaries, while structure fields are aligned
6614 to 8 byte boundaries. */
6617 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6618 align = PARM_BOUNDARY;
6622 if (!contains_aligned_value_p (type))
6623 align = PARM_BOUNDARY;
6626 if (align > BIGGEST_ALIGNMENT)
6627 align = BIGGEST_ALIGNMENT;
6631 /* Return true if N is a possible register number of function value. */
6634 ix86_function_value_regno_p (const unsigned int regno)
6641 case FIRST_FLOAT_REG:
6642 /* TODO: The function should depend on current function ABI but
6643 builtins.c would need updating then. Therefore we use the
6645 if (TARGET_64BIT && ix86_abi == MS_ABI)
6647 return TARGET_FLOAT_RETURNS_IN_80387;
6653 if (TARGET_MACHO || TARGET_64BIT)
6661 /* Define how to find the value returned by a function.
6662 VALTYPE is the data type of the value (as a tree).
6663 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6664 otherwise, FUNC is 0. */
6667 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6668 const_tree fntype, const_tree fn)
6672 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6673 we normally prevent this case when mmx is not available. However
6674 some ABIs may require the result to be returned like DImode. */
6675 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6676 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6678 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6679 we prevent this case when sse is not available. However some ABIs
6680 may require the result to be returned like integer TImode. */
6681 else if (mode == TImode
6682 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6683 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6685 /* 32-byte vector modes in %ymm0. */
6686 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6687 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6689 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6690 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6691 regno = FIRST_FLOAT_REG;
6693 /* Most things go in %eax. */
6696 /* Override FP return register with %xmm0 for local functions when
6697 SSE math is enabled or for functions with sseregparm attribute. */
6698 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6700 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6701 if ((sse_level >= 1 && mode == SFmode)
6702 || (sse_level == 2 && mode == DFmode))
6703 regno = FIRST_SSE_REG;
6706 /* OImode shouldn't be used directly. */
6707 gcc_assert (mode != OImode);
6709 return gen_rtx_REG (orig_mode, regno);
6713 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6718 /* Handle libcalls, which don't provide a type node. */
6719 if (valtype == NULL)
6731 return gen_rtx_REG (mode, FIRST_SSE_REG);
6734 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6738 return gen_rtx_REG (mode, AX_REG);
6742 ret = construct_container (mode, orig_mode, valtype, 1,
6743 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6744 x86_64_int_return_registers, 0);
6746 /* For zero sized structures, construct_container returns NULL, but we
6747 need to keep rest of compiler happy by returning meaningful value. */
6749 ret = gen_rtx_REG (orig_mode, AX_REG);
6755 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6757 unsigned int regno = AX_REG;
6761 switch (GET_MODE_SIZE (mode))
6764 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6765 && !COMPLEX_MODE_P (mode))
6766 regno = FIRST_SSE_REG;
6770 if (mode == SFmode || mode == DFmode)
6771 regno = FIRST_SSE_REG;
6777 return gen_rtx_REG (orig_mode, regno);
6781 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6782 enum machine_mode orig_mode, enum machine_mode mode)
6784 const_tree fn, fntype;
6787 if (fntype_or_decl && DECL_P (fntype_or_decl))
6788 fn = fntype_or_decl;
6789 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6791 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6792 return function_value_ms_64 (orig_mode, mode);
6793 else if (TARGET_64BIT)
6794 return function_value_64 (orig_mode, mode, valtype);
6796 return function_value_32 (orig_mode, mode, fntype, fn);
6800 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6801 bool outgoing ATTRIBUTE_UNUSED)
6803 enum machine_mode mode, orig_mode;
6805 orig_mode = TYPE_MODE (valtype);
6806 mode = type_natural_mode (valtype, NULL);
6807 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6811 ix86_libcall_value (enum machine_mode mode)
6813 return ix86_function_value_1 (NULL, NULL, mode, mode);
6816 /* Return true iff type is returned in memory. */
6818 static int ATTRIBUTE_UNUSED
6819 return_in_memory_32 (const_tree type, enum machine_mode mode)
6823 if (mode == BLKmode)
6826 size = int_size_in_bytes (type);
6828 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6831 if (VECTOR_MODE_P (mode) || mode == TImode)
6833 /* User-created vectors small enough to fit in EAX. */
6837 /* MMX/3dNow values are returned in MM0,
6838 except when it doesn't exits. */
6840 return (TARGET_MMX ? 0 : 1);
6842 /* SSE values are returned in XMM0, except when it doesn't exist. */
6844 return (TARGET_SSE ? 0 : 1);
6846 /* AVX values are returned in YMM0, except when it doesn't exist. */
6848 return TARGET_AVX ? 0 : 1;
6857 /* OImode shouldn't be used directly. */
6858 gcc_assert (mode != OImode);
6863 static int ATTRIBUTE_UNUSED
6864 return_in_memory_64 (const_tree type, enum machine_mode mode)
6866 int needed_intregs, needed_sseregs;
6867 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6870 static int ATTRIBUTE_UNUSED
6871 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6873 HOST_WIDE_INT size = int_size_in_bytes (type);
6875 /* __m128 is returned in xmm0. */
6876 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6877 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6880 /* Otherwise, the size must be exactly in [1248]. */
6881 return (size != 1 && size != 2 && size != 4 && size != 8);
6885 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6887 #ifdef SUBTARGET_RETURN_IN_MEMORY
6888 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6890 const enum machine_mode mode = type_natural_mode (type, NULL);
6894 if (ix86_function_type_abi (fntype) == MS_ABI)
6895 return return_in_memory_ms_64 (type, mode);
6897 return return_in_memory_64 (type, mode);
6900 return return_in_memory_32 (type, mode);
6904 /* Return false iff TYPE is returned in memory. This version is used
6905 on Solaris 2. It is similar to the generic ix86_return_in_memory,
6906 but differs notably in that when MMX is available, 8-byte vectors
6907 are returned in memory, rather than in MMX registers. */
6910 ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6913 enum machine_mode mode = type_natural_mode (type, NULL);
6916 return return_in_memory_64 (type, mode);
6918 if (mode == BLKmode)
6921 size = int_size_in_bytes (type);
6923 if (VECTOR_MODE_P (mode))
6925 /* Return in memory only if MMX registers *are* available. This
6926 seems backwards, but it is consistent with the existing
6933 else if (mode == TImode)
6935 else if (mode == XFmode)
6941 /* When returning SSE vector types, we have a choice of either
6942 (1) being abi incompatible with a -march switch, or
6943 (2) generating an error.
6944 Given no good solution, I think the safest thing is one warning.
6945 The user won't be able to use -Werror, but....
6947 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6948 called in response to actually generating a caller or callee that
6949 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6950 via aggregate_value_p for general type probing from tree-ssa. */
6953 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6955 static bool warnedsse, warnedmmx;
6957 if (!TARGET_64BIT && type)
6959 /* Look at the return type of the function, not the function type. */
6960 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6962 if (!TARGET_SSE && !warnedsse)
6965 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6968 warning (0, "SSE vector return without SSE enabled "
6973 if (!TARGET_MMX && !warnedmmx)
6975 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6978 warning (0, "MMX vector return without MMX enabled "
6988 /* Create the va_list data type. */
6990 /* Returns the calling convention specific va_list date type.
6991 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6994 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6996 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6998 /* For i386 we use plain pointer to argument area. */
6999 if (!TARGET_64BIT || abi == MS_ABI)
7000 return build_pointer_type (char_type_node);
7002 record = lang_hooks.types.make_type (RECORD_TYPE);
7003 type_decl = build_decl (BUILTINS_LOCATION,
7004 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7006 f_gpr = build_decl (BUILTINS_LOCATION,
7007 FIELD_DECL, get_identifier ("gp_offset"),
7008 unsigned_type_node);
7009 f_fpr = build_decl (BUILTINS_LOCATION,
7010 FIELD_DECL, get_identifier ("fp_offset"),
7011 unsigned_type_node);
7012 f_ovf = build_decl (BUILTINS_LOCATION,
7013 FIELD_DECL, get_identifier ("overflow_arg_area"),
7015 f_sav = build_decl (BUILTINS_LOCATION,
7016 FIELD_DECL, get_identifier ("reg_save_area"),
7019 va_list_gpr_counter_field = f_gpr;
7020 va_list_fpr_counter_field = f_fpr;
7022 DECL_FIELD_CONTEXT (f_gpr) = record;
7023 DECL_FIELD_CONTEXT (f_fpr) = record;
7024 DECL_FIELD_CONTEXT (f_ovf) = record;
7025 DECL_FIELD_CONTEXT (f_sav) = record;
7027 TREE_CHAIN (record) = type_decl;
7028 TYPE_NAME (record) = type_decl;
7029 TYPE_FIELDS (record) = f_gpr;
7030 DECL_CHAIN (f_gpr) = f_fpr;
7031 DECL_CHAIN (f_fpr) = f_ovf;
7032 DECL_CHAIN (f_ovf) = f_sav;
7034 layout_type (record);
7036 /* The correct type is an array type of one element. */
7037 return build_array_type (record, build_index_type (size_zero_node));
7040 /* Setup the builtin va_list data type and for 64-bit the additional
7041 calling convention specific va_list data types. */
7044 ix86_build_builtin_va_list (void)
7046 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7048 /* Initialize abi specific va_list builtin types. */
7052 if (ix86_abi == MS_ABI)
7054 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7055 if (TREE_CODE (t) != RECORD_TYPE)
7056 t = build_variant_type_copy (t);
7057 sysv_va_list_type_node = t;
7062 if (TREE_CODE (t) != RECORD_TYPE)
7063 t = build_variant_type_copy (t);
7064 sysv_va_list_type_node = t;
7066 if (ix86_abi != MS_ABI)
7068 t = ix86_build_builtin_va_list_abi (MS_ABI);
7069 if (TREE_CODE (t) != RECORD_TYPE)
7070 t = build_variant_type_copy (t);
7071 ms_va_list_type_node = t;
7076 if (TREE_CODE (t) != RECORD_TYPE)
7077 t = build_variant_type_copy (t);
7078 ms_va_list_type_node = t;
7085 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7088 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7094 /* GPR size of varargs save area. */
7095 if (cfun->va_list_gpr_size)
7096 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7098 ix86_varargs_gpr_size = 0;
7100 /* FPR size of varargs save area. We don't need it if we don't pass
7101 anything in SSE registers. */
7102 if (TARGET_SSE && cfun->va_list_fpr_size)
7103 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7105 ix86_varargs_fpr_size = 0;
7107 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7110 save_area = frame_pointer_rtx;
7111 set = get_varargs_alias_set ();
7113 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7114 if (max > X86_64_REGPARM_MAX)
7115 max = X86_64_REGPARM_MAX;
7117 for (i = cum->regno; i < max; i++)
7119 mem = gen_rtx_MEM (Pmode,
7120 plus_constant (save_area, i * UNITS_PER_WORD));
7121 MEM_NOTRAP_P (mem) = 1;
7122 set_mem_alias_set (mem, set);
7123 emit_move_insn (mem, gen_rtx_REG (Pmode,
7124 x86_64_int_parameter_registers[i]));
7127 if (ix86_varargs_fpr_size)
7129 enum machine_mode smode;
7132 /* Now emit code to save SSE registers. The AX parameter contains number
7133 of SSE parameter registers used to call this function, though all we
7134 actually check here is the zero/non-zero status. */
7136 label = gen_label_rtx ();
7137 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7138 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7141 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7142 we used movdqa (i.e. TImode) instead? Perhaps even better would
7143 be if we could determine the real mode of the data, via a hook
7144 into pass_stdarg. Ignore all that for now. */
7146 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7147 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7149 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7150 if (max > X86_64_SSE_REGPARM_MAX)
7151 max = X86_64_SSE_REGPARM_MAX;
7153 for (i = cum->sse_regno; i < max; ++i)
7155 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7156 mem = gen_rtx_MEM (smode, mem);
7157 MEM_NOTRAP_P (mem) = 1;
7158 set_mem_alias_set (mem, set);
7159 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7161 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7169 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7171 alias_set_type set = get_varargs_alias_set ();
7174 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7178 mem = gen_rtx_MEM (Pmode,
7179 plus_constant (virtual_incoming_args_rtx,
7180 i * UNITS_PER_WORD));
7181 MEM_NOTRAP_P (mem) = 1;
7182 set_mem_alias_set (mem, set);
7184 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7185 emit_move_insn (mem, reg);
7190 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7191 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7194 CUMULATIVE_ARGS next_cum;
7197 /* This argument doesn't appear to be used anymore. Which is good,
7198 because the old code here didn't suppress rtl generation. */
7199 gcc_assert (!no_rtl);
7204 fntype = TREE_TYPE (current_function_decl);
7206 /* For varargs, we do not want to skip the dummy va_dcl argument.
7207 For stdargs, we do want to skip the last named argument. */
7209 if (stdarg_p (fntype))
7210 ix86_function_arg_advance (&next_cum, mode, type, true);
7212 if (cum->call_abi == MS_ABI)
7213 setup_incoming_varargs_ms_64 (&next_cum);
7215 setup_incoming_varargs_64 (&next_cum);
7218 /* Checks if TYPE is of kind va_list char *. */
7221 is_va_list_char_pointer (tree type)
7225 /* For 32-bit it is always true. */
7228 canonic = ix86_canonical_va_list_type (type);
7229 return (canonic == ms_va_list_type_node
7230 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7233 /* Implement va_start. */
7236 ix86_va_start (tree valist, rtx nextarg)
7238 HOST_WIDE_INT words, n_gpr, n_fpr;
7239 tree f_gpr, f_fpr, f_ovf, f_sav;
7240 tree gpr, fpr, ovf, sav, t;
7243 /* Only 64bit target needs something special. */
7244 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7246 std_expand_builtin_va_start (valist, nextarg);
7250 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7251 f_fpr = DECL_CHAIN (f_gpr);
7252 f_ovf = DECL_CHAIN (f_fpr);
7253 f_sav = DECL_CHAIN (f_ovf);
7255 valist = build_simple_mem_ref (valist);
7256 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7257 /* The following should be folded into the MEM_REF offset. */
7258 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7260 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7262 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7264 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7267 /* Count number of gp and fp argument registers used. */
7268 words = crtl->args.info.words;
7269 n_gpr = crtl->args.info.regno;
7270 n_fpr = crtl->args.info.sse_regno;
7272 if (cfun->va_list_gpr_size)
7274 type = TREE_TYPE (gpr);
7275 t = build2 (MODIFY_EXPR, type,
7276 gpr, build_int_cst (type, n_gpr * 8));
7277 TREE_SIDE_EFFECTS (t) = 1;
7278 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7281 if (TARGET_SSE && cfun->va_list_fpr_size)
7283 type = TREE_TYPE (fpr);
7284 t = build2 (MODIFY_EXPR, type, fpr,
7285 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7286 TREE_SIDE_EFFECTS (t) = 1;
7287 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7290 /* Find the overflow area. */
7291 type = TREE_TYPE (ovf);
7292 t = make_tree (type, crtl->args.internal_arg_pointer);
7294 t = build2 (POINTER_PLUS_EXPR, type, t,
7295 size_int (words * UNITS_PER_WORD));
7296 t = build2 (MODIFY_EXPR, type, ovf, t);
7297 TREE_SIDE_EFFECTS (t) = 1;
7298 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7300 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7302 /* Find the register save area.
7303 Prologue of the function save it right above stack frame. */
7304 type = TREE_TYPE (sav);
7305 t = make_tree (type, frame_pointer_rtx);
7306 if (!ix86_varargs_gpr_size)
7307 t = build2 (POINTER_PLUS_EXPR, type, t,
7308 size_int (-8 * X86_64_REGPARM_MAX));
7309 t = build2 (MODIFY_EXPR, type, sav, t);
7310 TREE_SIDE_EFFECTS (t) = 1;
7311 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7315 /* Implement va_arg. */
7318 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7321 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7322 tree f_gpr, f_fpr, f_ovf, f_sav;
7323 tree gpr, fpr, ovf, sav, t;
7325 tree lab_false, lab_over = NULL_TREE;
7330 enum machine_mode nat_mode;
7331 unsigned int arg_boundary;
7333 /* Only 64bit target needs something special. */
7334 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7335 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7337 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7338 f_fpr = DECL_CHAIN (f_gpr);
7339 f_ovf = DECL_CHAIN (f_fpr);
7340 f_sav = DECL_CHAIN (f_ovf);
7342 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7343 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7344 valist = build_va_arg_indirect_ref (valist);
7345 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7346 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7347 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7349 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7351 type = build_pointer_type (type);
7352 size = int_size_in_bytes (type);
7353 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7355 nat_mode = type_natural_mode (type, NULL);
7364 /* Unnamed 256bit vector mode parameters are passed on stack. */
7365 if (ix86_cfun_abi () == SYSV_ABI)
7372 container = construct_container (nat_mode, TYPE_MODE (type),
7373 type, 0, X86_64_REGPARM_MAX,
7374 X86_64_SSE_REGPARM_MAX, intreg,
7379 /* Pull the value out of the saved registers. */
7381 addr = create_tmp_var (ptr_type_node, "addr");
7385 int needed_intregs, needed_sseregs;
7387 tree int_addr, sse_addr;
7389 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7390 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7392 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7394 need_temp = (!REG_P (container)
7395 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7396 || TYPE_ALIGN (type) > 128));
7398 /* In case we are passing structure, verify that it is consecutive block
7399 on the register save area. If not we need to do moves. */
7400 if (!need_temp && !REG_P (container))
7402 /* Verify that all registers are strictly consecutive */
7403 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7407 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7409 rtx slot = XVECEXP (container, 0, i);
7410 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7411 || INTVAL (XEXP (slot, 1)) != i * 16)
7419 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7421 rtx slot = XVECEXP (container, 0, i);
7422 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7423 || INTVAL (XEXP (slot, 1)) != i * 8)
7435 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7436 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7439 /* First ensure that we fit completely in registers. */
7442 t = build_int_cst (TREE_TYPE (gpr),
7443 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7444 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7445 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7446 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7447 gimplify_and_add (t, pre_p);
7451 t = build_int_cst (TREE_TYPE (fpr),
7452 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7453 + X86_64_REGPARM_MAX * 8);
7454 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7455 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7456 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7457 gimplify_and_add (t, pre_p);
7460 /* Compute index to start of area used for integer regs. */
7463 /* int_addr = gpr + sav; */
7464 t = fold_convert (sizetype, gpr);
7465 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7466 gimplify_assign (int_addr, t, pre_p);
7470 /* sse_addr = fpr + sav; */
7471 t = fold_convert (sizetype, fpr);
7472 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7473 gimplify_assign (sse_addr, t, pre_p);
7477 int i, prev_size = 0;
7478 tree temp = create_tmp_var (type, "va_arg_tmp");
7481 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7482 gimplify_assign (addr, t, pre_p);
7484 for (i = 0; i < XVECLEN (container, 0); i++)
7486 rtx slot = XVECEXP (container, 0, i);
7487 rtx reg = XEXP (slot, 0);
7488 enum machine_mode mode = GET_MODE (reg);
7494 tree dest_addr, dest;
7495 int cur_size = GET_MODE_SIZE (mode);
7497 if (prev_size + cur_size > size)
7499 cur_size = size - prev_size;
7500 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7501 if (mode == BLKmode)
7504 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7505 if (mode == GET_MODE (reg))
7506 addr_type = build_pointer_type (piece_type);
7508 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7510 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7513 if (SSE_REGNO_P (REGNO (reg)))
7515 src_addr = sse_addr;
7516 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7520 src_addr = int_addr;
7521 src_offset = REGNO (reg) * 8;
7523 src_addr = fold_convert (addr_type, src_addr);
7524 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7525 size_int (src_offset));
7527 dest_addr = fold_convert (daddr_type, addr);
7528 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7529 size_int (INTVAL (XEXP (slot, 1))));
7530 if (cur_size == GET_MODE_SIZE (mode))
7532 src = build_va_arg_indirect_ref (src_addr);
7533 dest = build_va_arg_indirect_ref (dest_addr);
7535 gimplify_assign (dest, src, pre_p);
7540 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7541 3, dest_addr, src_addr,
7542 size_int (cur_size));
7543 gimplify_and_add (copy, pre_p);
7545 prev_size += cur_size;
7551 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7552 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7553 gimplify_assign (gpr, t, pre_p);
7558 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7559 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7560 gimplify_assign (fpr, t, pre_p);
7563 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7565 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7568 /* ... otherwise out of the overflow area. */
7570 /* When we align parameter on stack for caller, if the parameter
7571 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7572 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7573 here with caller. */
7574 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7575 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7576 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7578 /* Care for on-stack alignment if needed. */
7579 if (arg_boundary <= 64 || size == 0)
7583 HOST_WIDE_INT align = arg_boundary / 8;
7584 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7585 size_int (align - 1));
7586 t = fold_convert (sizetype, t);
7587 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7589 t = fold_convert (TREE_TYPE (ovf), t);
7592 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7593 gimplify_assign (addr, t, pre_p);
7595 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7596 size_int (rsize * UNITS_PER_WORD));
7597 gimplify_assign (unshare_expr (ovf), t, pre_p);
7600 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7602 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7603 addr = fold_convert (ptrtype, addr);
7606 addr = build_va_arg_indirect_ref (addr);
7607 return build_va_arg_indirect_ref (addr);
7610 /* Return nonzero if OPNUM's MEM should be matched
7611 in movabs* patterns. */
7614 ix86_check_movabs (rtx insn, int opnum)
7618 set = PATTERN (insn);
7619 if (GET_CODE (set) == PARALLEL)
7620 set = XVECEXP (set, 0, 0);
7621 gcc_assert (GET_CODE (set) == SET);
7622 mem = XEXP (set, opnum);
7623 while (GET_CODE (mem) == SUBREG)
7624 mem = SUBREG_REG (mem);
7625 gcc_assert (MEM_P (mem));
7626 return (volatile_ok || !MEM_VOLATILE_P (mem));
7629 /* Initialize the table of extra 80387 mathematical constants. */
7632 init_ext_80387_constants (void)
7634 static const char * cst[5] =
7636 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7637 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7638 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7639 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7640 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7644 for (i = 0; i < 5; i++)
7646 real_from_string (&ext_80387_constants_table[i], cst[i]);
7647 /* Ensure each constant is rounded to XFmode precision. */
7648 real_convert (&ext_80387_constants_table[i],
7649 XFmode, &ext_80387_constants_table[i]);
7652 ext_80387_constants_init = 1;
7655 /* Return true if the constant is something that can be loaded with
7656 a special instruction. */
7659 standard_80387_constant_p (rtx x)
7661 enum machine_mode mode = GET_MODE (x);
7665 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7668 if (x == CONST0_RTX (mode))
7670 if (x == CONST1_RTX (mode))
7673 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7675 /* For XFmode constants, try to find a special 80387 instruction when
7676 optimizing for size or on those CPUs that benefit from them. */
7678 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7682 if (! ext_80387_constants_init)
7683 init_ext_80387_constants ();
7685 for (i = 0; i < 5; i++)
7686 if (real_identical (&r, &ext_80387_constants_table[i]))
7690 /* Load of the constant -0.0 or -1.0 will be split as
7691 fldz;fchs or fld1;fchs sequence. */
7692 if (real_isnegzero (&r))
7694 if (real_identical (&r, &dconstm1))
7700 /* Return the opcode of the special instruction to be used to load
7704 standard_80387_constant_opcode (rtx x)
7706 switch (standard_80387_constant_p (x))
7730 /* Return the CONST_DOUBLE representing the 80387 constant that is
7731 loaded by the specified special instruction. The argument IDX
7732 matches the return value from standard_80387_constant_p. */
7735 standard_80387_constant_rtx (int idx)
7739 if (! ext_80387_constants_init)
7740 init_ext_80387_constants ();
7756 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7760 /* Return 1 if X is all 0s and 2 if x is all 1s
7761 in supported SSE vector mode. */
7764 standard_sse_constant_p (rtx x)
7766 enum machine_mode mode = GET_MODE (x);
7768 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7770 if (vector_all_ones_operand (x, mode))
7786 /* Return the opcode of the special instruction to be used to load
7790 standard_sse_constant_opcode (rtx insn, rtx x)
7792 switch (standard_sse_constant_p (x))
7795 switch (get_attr_mode (insn))
7798 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7800 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7801 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7803 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7805 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7806 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7808 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7810 return "vxorps\t%x0, %x0, %x0";
7812 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7813 return "vxorps\t%x0, %x0, %x0";
7815 return "vxorpd\t%x0, %x0, %x0";
7817 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7818 return "vxorps\t%x0, %x0, %x0";
7820 return "vpxor\t%x0, %x0, %x0";
7825 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7832 /* Returns 1 if OP contains a symbol reference */
7835 symbolic_reference_mentioned_p (rtx op)
7840 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7843 fmt = GET_RTX_FORMAT (GET_CODE (op));
7844 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7850 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7851 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7855 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7862 /* Return 1 if it is appropriate to emit `ret' instructions in the
7863 body of a function. Do this only if the epilogue is simple, needing a
7864 couple of insns. Prior to reloading, we can't tell how many registers
7865 must be saved, so return 0 then. Return 0 if there is no frame
7866 marker to de-allocate. */
7869 ix86_can_use_return_insn_p (void)
7871 struct ix86_frame frame;
7873 if (! reload_completed || frame_pointer_needed)
7876 /* Don't allow more than 32k pop, since that's all we can do
7877 with one instruction. */
7878 if (crtl->args.pops_args && crtl->args.size >= 32768)
7881 ix86_compute_frame_layout (&frame);
7882 return (frame.stack_pointer_offset == UNITS_PER_WORD
7883 && (frame.nregs + frame.nsseregs) == 0);
7886 /* Value should be nonzero if functions must have frame pointers.
7887 Zero means the frame pointer need not be set up (and parms may
7888 be accessed via the stack pointer) in functions that seem suitable. */
7891 ix86_frame_pointer_required (void)
7893 /* If we accessed previous frames, then the generated code expects
7894 to be able to access the saved ebp value in our frame. */
7895 if (cfun->machine->accesses_prev_frame)
7898 /* Several x86 os'es need a frame pointer for other reasons,
7899 usually pertaining to setjmp. */
7900 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7903 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7904 the frame pointer by default. Turn it back on now if we've not
7905 got a leaf function. */
7906 if (TARGET_OMIT_LEAF_FRAME_POINTER
7907 && (!current_function_is_leaf
7908 || ix86_current_function_calls_tls_descriptor))
7911 if (crtl->profile && !flag_fentry)
7917 /* Record that the current function accesses previous call frames. */
7920 ix86_setup_frame_addresses (void)
7922 cfun->machine->accesses_prev_frame = 1;
7925 #ifndef USE_HIDDEN_LINKONCE
7926 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7927 # define USE_HIDDEN_LINKONCE 1
7929 # define USE_HIDDEN_LINKONCE 0
7933 static int pic_labels_used;
7935 /* Fills in the label name that should be used for a pc thunk for
7936 the given register. */
7939 get_pc_thunk_name (char name[32], unsigned int regno)
7941 gcc_assert (!TARGET_64BIT);
7943 if (USE_HIDDEN_LINKONCE)
7944 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7946 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7950 /* This function generates code for -fpic that loads %ebx with
7951 the return address of the caller and then returns. */
7954 ix86_code_end (void)
7959 for (regno = 0; regno < 8; ++regno)
7964 if (! ((pic_labels_used >> regno) & 1))
7967 get_pc_thunk_name (name, regno);
7969 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
7970 get_identifier (name),
7971 build_function_type (void_type_node, void_list_node));
7972 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
7973 NULL_TREE, void_type_node);
7974 TREE_PUBLIC (decl) = 1;
7975 TREE_STATIC (decl) = 1;
7980 switch_to_section (darwin_sections[text_coal_section]);
7981 fputs ("\t.weak_definition\t", asm_out_file);
7982 assemble_name (asm_out_file, name);
7983 fputs ("\n\t.private_extern\t", asm_out_file);
7984 assemble_name (asm_out_file, name);
7985 putc ('\n', asm_out_file);
7986 ASM_OUTPUT_LABEL (asm_out_file, name);
7987 DECL_WEAK (decl) = 1;
7991 if (USE_HIDDEN_LINKONCE)
7993 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7995 targetm.asm_out.unique_section (decl, 0);
7996 switch_to_section (get_named_section (decl, NULL, 0));
7998 targetm.asm_out.globalize_label (asm_out_file, name);
7999 fputs ("\t.hidden\t", asm_out_file);
8000 assemble_name (asm_out_file, name);
8001 putc ('\n', asm_out_file);
8002 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8006 switch_to_section (text_section);
8007 ASM_OUTPUT_LABEL (asm_out_file, name);
8010 DECL_INITIAL (decl) = make_node (BLOCK);
8011 current_function_decl = decl;
8012 init_function_start (decl);
8013 first_function_block_is_cold = false;
8014 /* Make sure unwind info is emitted for the thunk if needed. */
8015 final_start_function (emit_barrier (), asm_out_file, 1);
8017 xops[0] = gen_rtx_REG (Pmode, regno);
8018 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8019 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8020 output_asm_insn ("ret", xops);
8021 final_end_function ();
8022 init_insn_lengths ();
8023 free_after_compilation (cfun);
8025 current_function_decl = NULL;
8029 /* Emit code for the SET_GOT patterns. */
8032 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8038 if (TARGET_VXWORKS_RTP && flag_pic)
8040 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8041 xops[2] = gen_rtx_MEM (Pmode,
8042 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8043 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8045 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8046 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8047 an unadorned address. */
8048 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8049 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8050 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8054 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8056 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8058 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8061 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8064 output_asm_insn ("call\t%a2", xops);
8065 #ifdef DWARF2_UNWIND_INFO
8066 /* The call to next label acts as a push. */
8067 if (dwarf2out_do_frame ())
8071 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8072 gen_rtx_PLUS (Pmode,
8075 RTX_FRAME_RELATED_P (insn) = 1;
8076 dwarf2out_frame_debug (insn, true);
8083 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8084 is what will be referenced by the Mach-O PIC subsystem. */
8086 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8089 targetm.asm_out.internal_label (asm_out_file, "L",
8090 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8094 output_asm_insn ("pop%z0\t%0", xops);
8095 #ifdef DWARF2_UNWIND_INFO
8096 /* The pop is a pop and clobbers dest, but doesn't restore it
8097 for unwind info purposes. */
8098 if (dwarf2out_do_frame ())
8102 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8103 dwarf2out_frame_debug (insn, true);
8104 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8105 gen_rtx_PLUS (Pmode,
8108 RTX_FRAME_RELATED_P (insn) = 1;
8109 dwarf2out_frame_debug (insn, true);
8118 get_pc_thunk_name (name, REGNO (dest));
8119 pic_labels_used |= 1 << REGNO (dest);
8121 #ifdef DWARF2_UNWIND_INFO
8122 /* Ensure all queued register saves are flushed before the
8124 if (dwarf2out_do_frame ())
8128 insn = emit_barrier ();
8130 dwarf2out_frame_debug (insn, false);
8133 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8134 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8135 output_asm_insn ("call\t%X2", xops);
8136 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8137 is what will be referenced by the Mach-O PIC subsystem. */
8140 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8142 targetm.asm_out.internal_label (asm_out_file, "L",
8143 CODE_LABEL_NUMBER (label));
8150 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8151 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8153 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8158 /* Generate an "push" pattern for input ARG. */
8163 struct machine_function *m = cfun->machine;
8165 if (m->fs.cfa_reg == stack_pointer_rtx)
8166 m->fs.cfa_offset += UNITS_PER_WORD;
8167 m->fs.sp_offset += UNITS_PER_WORD;
8169 return gen_rtx_SET (VOIDmode,
8171 gen_rtx_PRE_DEC (Pmode,
8172 stack_pointer_rtx)),
8176 /* Return >= 0 if there is an unused call-clobbered register available
8177 for the entire function. */
8180 ix86_select_alt_pic_regnum (void)
8182 if (current_function_is_leaf
8184 && !ix86_current_function_calls_tls_descriptor)
8187 /* Can't use the same register for both PIC and DRAP. */
8189 drap = REGNO (crtl->drap_reg);
8192 for (i = 2; i >= 0; --i)
8193 if (i != drap && !df_regs_ever_live_p (i))
8197 return INVALID_REGNUM;
8200 /* Return 1 if we need to save REGNO. */
8202 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8204 if (pic_offset_table_rtx
8205 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8206 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8208 || crtl->calls_eh_return
8209 || crtl->uses_const_pool))
8211 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8216 if (crtl->calls_eh_return && maybe_eh_return)
8221 unsigned test = EH_RETURN_DATA_REGNO (i);
8222 if (test == INVALID_REGNUM)
8229 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8232 return (df_regs_ever_live_p (regno)
8233 && !call_used_regs[regno]
8234 && !fixed_regs[regno]
8235 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8238 /* Return number of saved general prupose registers. */
8241 ix86_nsaved_regs (void)
8246 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8247 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8252 /* Return number of saved SSE registrers. */
8255 ix86_nsaved_sseregs (void)
8260 if (ix86_cfun_abi () != MS_ABI)
8262 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8263 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8268 /* Given FROM and TO register numbers, say whether this elimination is
8269 allowed. If stack alignment is needed, we can only replace argument
8270 pointer with hard frame pointer, or replace frame pointer with stack
8271 pointer. Otherwise, frame pointer elimination is automatically
8272 handled and all other eliminations are valid. */
8275 ix86_can_eliminate (const int from, const int to)
8277 if (stack_realign_fp)
8278 return ((from == ARG_POINTER_REGNUM
8279 && to == HARD_FRAME_POINTER_REGNUM)
8280 || (from == FRAME_POINTER_REGNUM
8281 && to == STACK_POINTER_REGNUM));
8283 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8286 /* Return the offset between two registers, one to be eliminated, and the other
8287 its replacement, at the start of a routine. */
8290 ix86_initial_elimination_offset (int from, int to)
8292 struct ix86_frame frame;
8293 ix86_compute_frame_layout (&frame);
8295 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8296 return frame.hard_frame_pointer_offset;
8297 else if (from == FRAME_POINTER_REGNUM
8298 && to == HARD_FRAME_POINTER_REGNUM)
8299 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8302 gcc_assert (to == STACK_POINTER_REGNUM);
8304 if (from == ARG_POINTER_REGNUM)
8305 return frame.stack_pointer_offset;
8307 gcc_assert (from == FRAME_POINTER_REGNUM);
8308 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8312 /* In a dynamically-aligned function, we can't know the offset from
8313 stack pointer to frame pointer, so we must ensure that setjmp
8314 eliminates fp against the hard fp (%ebp) rather than trying to
8315 index from %esp up to the top of the frame across a gap that is
8316 of unknown (at compile-time) size. */
8318 ix86_builtin_setjmp_frame_value (void)
8320 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8323 /* Fill structure ix86_frame about frame of currently computed function. */
8326 ix86_compute_frame_layout (struct ix86_frame *frame)
8328 unsigned int stack_alignment_needed;
8329 HOST_WIDE_INT offset;
8330 unsigned int preferred_alignment;
8331 HOST_WIDE_INT size = get_frame_size ();
8332 HOST_WIDE_INT to_allocate;
8334 frame->nregs = ix86_nsaved_regs ();
8335 frame->nsseregs = ix86_nsaved_sseregs ();
8337 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8338 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8340 /* MS ABI seem to require stack alignment to be always 16 except for function
8341 prologues and leaf. */
8342 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8343 && (!current_function_is_leaf || cfun->calls_alloca != 0
8344 || ix86_current_function_calls_tls_descriptor))
8346 preferred_alignment = 16;
8347 stack_alignment_needed = 16;
8348 crtl->preferred_stack_boundary = 128;
8349 crtl->stack_alignment_needed = 128;
8352 gcc_assert (!size || stack_alignment_needed);
8353 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8354 gcc_assert (preferred_alignment <= stack_alignment_needed);
8356 /* During reload iteration the amount of registers saved can change.
8357 Recompute the value as needed. Do not recompute when amount of registers
8358 didn't change as reload does multiple calls to the function and does not
8359 expect the decision to change within single iteration. */
8360 if (!optimize_function_for_size_p (cfun)
8361 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8363 int count = frame->nregs;
8364 struct cgraph_node *node = cgraph_node (current_function_decl);
8366 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8367 /* The fast prologue uses move instead of push to save registers. This
8368 is significantly longer, but also executes faster as modern hardware
8369 can execute the moves in parallel, but can't do that for push/pop.
8371 Be careful about choosing what prologue to emit: When function takes
8372 many instructions to execute we may use slow version as well as in
8373 case function is known to be outside hot spot (this is known with
8374 feedback only). Weight the size of function by number of registers
8375 to save as it is cheap to use one or two push instructions but very
8376 slow to use many of them. */
8378 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8379 if (node->frequency < NODE_FREQUENCY_NORMAL
8380 || (flag_branch_probabilities
8381 && node->frequency < NODE_FREQUENCY_HOT))
8382 cfun->machine->use_fast_prologue_epilogue = false;
8384 cfun->machine->use_fast_prologue_epilogue
8385 = !expensive_function_p (count);
8387 if (TARGET_PROLOGUE_USING_MOVE
8388 && cfun->machine->use_fast_prologue_epilogue)
8389 frame->save_regs_using_mov = true;
8391 frame->save_regs_using_mov = false;
8393 /* If static stack checking is enabled and done with probes, the registers
8394 need to be saved before allocating the frame. */
8395 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8396 frame->save_regs_using_mov = false;
8398 /* Skip return address. */
8399 offset = UNITS_PER_WORD;
8401 /* Skip pushed static chain. */
8402 if (ix86_static_chain_on_stack)
8403 offset += UNITS_PER_WORD;
8405 /* Skip saved base pointer. */
8406 if (frame_pointer_needed)
8407 offset += UNITS_PER_WORD;
8409 frame->hard_frame_pointer_offset = offset;
8411 /* Register save area */
8412 offset += frame->nregs * UNITS_PER_WORD;
8413 frame->reg_save_offset = offset;
8415 /* Align and set SSE register save area. */
8416 if (frame->nsseregs)
8418 /* The only ABI that has saved SSE registers (Win64) also has a
8419 16-byte aligned default stack, and thus we don't need to be
8420 within the re-aligned local stack frame to save them. */
8421 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8422 offset = (offset + 16 - 1) & -16;
8423 offset += frame->nsseregs * 16;
8425 frame->sse_reg_save_offset = offset;
8427 /* The re-aligned stack starts here. Values before this point are not
8428 directly comparable with values below this point. In order to make
8429 sure that no value happens to be the same before and after, force
8430 the alignment computation below to add a non-zero value. */
8431 if (stack_realign_fp)
8432 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8435 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8436 offset += frame->va_arg_size;
8438 /* Align start of frame for local function. */
8439 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8441 /* Frame pointer points here. */
8442 frame->frame_pointer_offset = offset;
8446 /* Add outgoing arguments area. Can be skipped if we eliminated
8447 all the function calls as dead code.
8448 Skipping is however impossible when function calls alloca. Alloca
8449 expander assumes that last crtl->outgoing_args_size
8450 of stack frame are unused. */
8451 if (ACCUMULATE_OUTGOING_ARGS
8452 && (!current_function_is_leaf || cfun->calls_alloca
8453 || ix86_current_function_calls_tls_descriptor))
8455 offset += crtl->outgoing_args_size;
8456 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8459 frame->outgoing_arguments_size = 0;
8461 /* Align stack boundary. Only needed if we're calling another function
8463 if (!current_function_is_leaf || cfun->calls_alloca
8464 || ix86_current_function_calls_tls_descriptor)
8465 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8467 /* We've reached end of stack frame. */
8468 frame->stack_pointer_offset = offset;
8470 /* Size prologue needs to allocate. */
8471 to_allocate = offset - frame->sse_reg_save_offset;
8473 if ((!to_allocate && frame->nregs <= 1)
8474 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8475 frame->save_regs_using_mov = false;
8477 if (ix86_using_red_zone ()
8478 && current_function_sp_is_unchanging
8479 && current_function_is_leaf
8480 && !ix86_current_function_calls_tls_descriptor)
8482 frame->red_zone_size = to_allocate;
8483 if (frame->save_regs_using_mov)
8484 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8485 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8486 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8489 frame->red_zone_size = 0;
8490 frame->stack_pointer_offset -= frame->red_zone_size;
8493 /* This is semi-inlined memory_address_length, but simplified
8494 since we know that we're always dealing with reg+offset, and
8495 to avoid having to create and discard all that rtl. */
8498 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8504 /* EBP and R13 cannot be encoded without an offset. */
8505 len = (regno == BP_REG || regno == R13_REG);
8507 else if (IN_RANGE (offset, -128, 127))
8510 /* ESP and R12 must be encoded with a SIB byte. */
8511 if (regno == SP_REG || regno == R12_REG)
8517 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8518 The valid base registers are taken from CFUN->MACHINE->FS. */
8521 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8523 const struct machine_function *m = cfun->machine;
8524 rtx base_reg = NULL;
8525 HOST_WIDE_INT base_offset = 0;
8527 if (m->use_fast_prologue_epilogue)
8529 /* Choose the base register most likely to allow the most scheduling
8530 opportunities. Generally FP is valid througout the function,
8531 while DRAP must be reloaded within the epilogue. But choose either
8532 over the SP due to increased encoding size. */
8536 base_reg = hard_frame_pointer_rtx;
8537 base_offset = m->fs.fp_offset - cfa_offset;
8539 else if (m->fs.drap_valid)
8541 base_reg = crtl->drap_reg;
8542 base_offset = 0 - cfa_offset;
8544 else if (m->fs.sp_valid)
8546 base_reg = stack_pointer_rtx;
8547 base_offset = m->fs.sp_offset - cfa_offset;
8552 HOST_WIDE_INT toffset;
8555 /* Choose the base register with the smallest address encoding.
8556 With a tie, choose FP > DRAP > SP. */
8559 base_reg = stack_pointer_rtx;
8560 base_offset = m->fs.sp_offset - cfa_offset;
8561 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8563 if (m->fs.drap_valid)
8565 toffset = 0 - cfa_offset;
8566 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8569 base_reg = crtl->drap_reg;
8570 base_offset = toffset;
8576 toffset = m->fs.fp_offset - cfa_offset;
8577 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8580 base_reg = hard_frame_pointer_rtx;
8581 base_offset = toffset;
8586 gcc_assert (base_reg != NULL);
8588 return plus_constant (base_reg, base_offset);
8591 /* Emit code to save registers in the prologue. */
8594 ix86_emit_save_regs (void)
8599 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8600 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8602 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8603 RTX_FRAME_RELATED_P (insn) = 1;
8607 /* Emit a single register save at CFA - CFA_OFFSET. */
8610 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8611 HOST_WIDE_INT cfa_offset)
8613 struct machine_function *m = cfun->machine;
8614 rtx reg = gen_rtx_REG (mode, regno);
8615 rtx mem, addr, base, insn;
8617 addr = choose_baseaddr (cfa_offset);
8618 mem = gen_frame_mem (mode, addr);
8620 /* For SSE saves, we need to indicate the 128-bit alignment. */
8621 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8623 insn = emit_move_insn (mem, reg);
8624 RTX_FRAME_RELATED_P (insn) = 1;
8627 if (GET_CODE (base) == PLUS)
8628 base = XEXP (base, 0);
8629 gcc_checking_assert (REG_P (base));
8631 /* When saving registers into a re-aligned local stack frame, avoid
8632 any tricky guessing by dwarf2out. */
8633 if (m->fs.realigned)
8635 gcc_checking_assert (stack_realign_drap);
8637 if (regno == REGNO (crtl->drap_reg))
8639 /* A bit of a hack. We force the DRAP register to be saved in
8640 the re-aligned stack frame, which provides us with a copy
8641 of the CFA that will last past the prologue. Install it. */
8642 gcc_checking_assert (cfun->machine->fs.fp_valid);
8643 addr = plus_constant (hard_frame_pointer_rtx,
8644 cfun->machine->fs.fp_offset - cfa_offset);
8645 mem = gen_rtx_MEM (mode, addr);
8646 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8650 /* The frame pointer is a stable reference within the
8651 aligned frame. Use it. */
8652 gcc_checking_assert (cfun->machine->fs.fp_valid);
8653 addr = plus_constant (hard_frame_pointer_rtx,
8654 cfun->machine->fs.fp_offset - cfa_offset);
8655 mem = gen_rtx_MEM (mode, addr);
8656 add_reg_note (insn, REG_CFA_EXPRESSION,
8657 gen_rtx_SET (VOIDmode, mem, reg));
8661 /* The memory may not be relative to the current CFA register,
8662 which means that we may need to generate a new pattern for
8663 use by the unwind info. */
8664 else if (base != m->fs.cfa_reg)
8666 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8667 mem = gen_rtx_MEM (mode, addr);
8668 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8672 /* Emit code to save registers using MOV insns.
8673 First register is stored at CFA - CFA_OFFSET. */
8675 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8679 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8680 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8682 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8683 cfa_offset -= UNITS_PER_WORD;
8687 /* Emit code to save SSE registers using MOV insns.
8688 First register is stored at CFA - CFA_OFFSET. */
8690 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8694 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8695 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8697 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8702 static GTY(()) rtx queued_cfa_restores;
8704 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8705 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8706 Don't add the note if the previously saved value will be left untouched
8707 within stack red-zone till return, as unwinders can find the same value
8708 in the register and on the stack. */
8711 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8713 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8718 add_reg_note (insn, REG_CFA_RESTORE, reg);
8719 RTX_FRAME_RELATED_P (insn) = 1;
8723 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8726 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8729 ix86_add_queued_cfa_restore_notes (rtx insn)
8732 if (!queued_cfa_restores)
8734 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8736 XEXP (last, 1) = REG_NOTES (insn);
8737 REG_NOTES (insn) = queued_cfa_restores;
8738 queued_cfa_restores = NULL_RTX;
8739 RTX_FRAME_RELATED_P (insn) = 1;
8742 /* Expand prologue or epilogue stack adjustment.
8743 The pattern exist to put a dependency on all ebp-based memory accesses.
8744 STYLE should be negative if instructions should be marked as frame related,
8745 zero if %r11 register is live and cannot be freely used and positive
8749 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8750 int style, bool set_cfa)
8752 struct machine_function *m = cfun->machine;
8756 insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
8757 else if (x86_64_immediate_operand (offset, DImode))
8758 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
8762 /* r11 is used by indirect sibcall return as well, set before the
8763 epilogue and used after the epilogue. */
8765 tmp = gen_rtx_REG (DImode, R11_REG);
8768 gcc_assert (src != hard_frame_pointer_rtx
8769 && dest != hard_frame_pointer_rtx);
8770 tmp = hard_frame_pointer_rtx;
8772 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8774 RTX_FRAME_RELATED_P (insn) = 1;
8775 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
8780 ix86_add_queued_cfa_restore_notes (insn);
8786 gcc_assert (m->fs.cfa_reg == src);
8787 m->fs.cfa_offset += INTVAL (offset);
8788 m->fs.cfa_reg = dest;
8790 r = gen_rtx_PLUS (Pmode, src, offset);
8791 r = gen_rtx_SET (VOIDmode, dest, r);
8792 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8793 RTX_FRAME_RELATED_P (insn) = 1;
8796 RTX_FRAME_RELATED_P (insn) = 1;
8798 if (dest == stack_pointer_rtx)
8800 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8801 bool valid = m->fs.sp_valid;
8803 if (src == hard_frame_pointer_rtx)
8805 valid = m->fs.fp_valid;
8806 ooffset = m->fs.fp_offset;
8808 else if (src == crtl->drap_reg)
8810 valid = m->fs.drap_valid;
8815 /* Else there are two possibilities: SP itself, which we set
8816 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8817 taken care of this by hand along the eh_return path. */
8818 gcc_checking_assert (src == stack_pointer_rtx
8819 || offset == const0_rtx);
8822 m->fs.sp_offset = ooffset - INTVAL (offset);
8823 m->fs.sp_valid = valid;
8827 /* Find an available register to be used as dynamic realign argument
8828 pointer regsiter. Such a register will be written in prologue and
8829 used in begin of body, so it must not be
8830 1. parameter passing register.
8832 We reuse static-chain register if it is available. Otherwise, we
8833 use DI for i386 and R13 for x86-64. We chose R13 since it has
8836 Return: the regno of chosen register. */
8839 find_drap_reg (void)
8841 tree decl = cfun->decl;
8845 /* Use R13 for nested function or function need static chain.
8846 Since function with tail call may use any caller-saved
8847 registers in epilogue, DRAP must not use caller-saved
8848 register in such case. */
8849 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8856 /* Use DI for nested function or function need static chain.
8857 Since function with tail call may use any caller-saved
8858 registers in epilogue, DRAP must not use caller-saved
8859 register in such case. */
8860 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8863 /* Reuse static chain register if it isn't used for parameter
8865 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8866 && !lookup_attribute ("fastcall",
8867 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8868 && !lookup_attribute ("thiscall",
8869 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8876 /* Return minimum incoming stack alignment. */
8879 ix86_minimum_incoming_stack_boundary (bool sibcall)
8881 unsigned int incoming_stack_boundary;
8883 /* Prefer the one specified at command line. */
8884 if (ix86_user_incoming_stack_boundary)
8885 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8886 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8887 if -mstackrealign is used, it isn't used for sibcall check and
8888 estimated stack alignment is 128bit. */
8891 && ix86_force_align_arg_pointer
8892 && crtl->stack_alignment_estimated == 128)
8893 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8895 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8897 /* Incoming stack alignment can be changed on individual functions
8898 via force_align_arg_pointer attribute. We use the smallest
8899 incoming stack boundary. */
8900 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8901 && lookup_attribute (ix86_force_align_arg_pointer_string,
8902 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8903 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8905 /* The incoming stack frame has to be aligned at least at
8906 parm_stack_boundary. */
8907 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8908 incoming_stack_boundary = crtl->parm_stack_boundary;
8910 /* Stack at entrance of main is aligned by runtime. We use the
8911 smallest incoming stack boundary. */
8912 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8913 && DECL_NAME (current_function_decl)
8914 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8915 && DECL_FILE_SCOPE_P (current_function_decl))
8916 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8918 return incoming_stack_boundary;
8921 /* Update incoming stack boundary and estimated stack alignment. */
8924 ix86_update_stack_boundary (void)
8926 ix86_incoming_stack_boundary
8927 = ix86_minimum_incoming_stack_boundary (false);
8929 /* x86_64 vararg needs 16byte stack alignment for register save
8933 && crtl->stack_alignment_estimated < 128)
8934 crtl->stack_alignment_estimated = 128;
8937 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8938 needed or an rtx for DRAP otherwise. */
8941 ix86_get_drap_rtx (void)
8943 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8944 crtl->need_drap = true;
8946 if (stack_realign_drap)
8948 /* Assign DRAP to vDRAP and returns vDRAP */
8949 unsigned int regno = find_drap_reg ();
8954 arg_ptr = gen_rtx_REG (Pmode, regno);
8955 crtl->drap_reg = arg_ptr;
8958 drap_vreg = copy_to_reg (arg_ptr);
8962 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8965 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8966 RTX_FRAME_RELATED_P (insn) = 1;
8974 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8977 ix86_internal_arg_pointer (void)
8979 return virtual_incoming_args_rtx;
8982 struct scratch_reg {
8987 /* Return a short-lived scratch register for use on function entry.
8988 In 32-bit mode, it is valid only after the registers are saved
8989 in the prologue. This register must be released by means of
8990 release_scratch_register_on_entry once it is dead. */
8993 get_scratch_register_on_entry (struct scratch_reg *sr)
9001 /* We always use R11 in 64-bit mode. */
9006 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9008 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9009 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9010 int regparm = ix86_function_regparm (fntype, decl);
9012 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9014 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9015 for the static chain register. */
9016 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9017 && drap_regno != AX_REG)
9019 else if (regparm < 2 && drap_regno != DX_REG)
9021 /* ecx is the static chain register. */
9022 else if (regparm < 3 && !fastcall_p && !static_chain_p
9023 && drap_regno != CX_REG)
9025 else if (ix86_save_reg (BX_REG, true))
9027 /* esi is the static chain register. */
9028 else if (!(regparm == 3 && static_chain_p)
9029 && ix86_save_reg (SI_REG, true))
9031 else if (ix86_save_reg (DI_REG, true))
9035 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9040 sr->reg = gen_rtx_REG (Pmode, regno);
9043 rtx insn = emit_insn (gen_push (sr->reg));
9044 RTX_FRAME_RELATED_P (insn) = 1;
9048 /* Release a scratch register obtained from the preceding function. */
9051 release_scratch_register_on_entry (struct scratch_reg *sr)
9055 rtx x, insn = emit_insn (ix86_gen_pop1 (sr->reg));
9057 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9058 RTX_FRAME_RELATED_P (insn) = 1;
9059 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9060 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9061 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9065 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9067 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9070 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9072 /* We skip the probe for the first interval + a small dope of 4 words and
9073 probe that many bytes past the specified size to maintain a protection
9074 area at the botton of the stack. */
9075 const int dope = 4 * UNITS_PER_WORD;
9076 rtx size_rtx = GEN_INT (size);
9078 /* See if we have a constant small number of probes to generate. If so,
9079 that's the easy case. The run-time loop is made up of 11 insns in the
9080 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9081 for n # of intervals. */
9082 if (size <= 5 * PROBE_INTERVAL)
9084 HOST_WIDE_INT i, adjust;
9085 bool first_probe = true;
9087 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9088 values of N from 1 until it exceeds SIZE. If only one probe is
9089 needed, this will not generate any code. Then adjust and probe
9090 to PROBE_INTERVAL + SIZE. */
9091 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9095 adjust = 2 * PROBE_INTERVAL + dope;
9096 first_probe = false;
9099 adjust = PROBE_INTERVAL;
9101 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9102 plus_constant (stack_pointer_rtx, -adjust)));
9103 emit_stack_probe (stack_pointer_rtx);
9107 adjust = size + PROBE_INTERVAL + dope;
9109 adjust = size + PROBE_INTERVAL - i;
9111 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9112 plus_constant (stack_pointer_rtx, -adjust)));
9113 emit_stack_probe (stack_pointer_rtx);
9115 /* Adjust back to account for the additional first interval. */
9116 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9117 plus_constant (stack_pointer_rtx,
9118 PROBE_INTERVAL + dope)));
9121 /* Otherwise, do the same as above, but in a loop. Note that we must be
9122 extra careful with variables wrapping around because we might be at
9123 the very top (or the very bottom) of the address space and we have
9124 to be able to handle this case properly; in particular, we use an
9125 equality test for the loop condition. */
9128 HOST_WIDE_INT rounded_size;
9129 struct scratch_reg sr;
9131 get_scratch_register_on_entry (&sr);
9134 /* Step 1: round SIZE to the previous multiple of the interval. */
9136 rounded_size = size & -PROBE_INTERVAL;
9139 /* Step 2: compute initial and final value of the loop counter. */
9141 /* SP = SP_0 + PROBE_INTERVAL. */
9142 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9143 plus_constant (stack_pointer_rtx,
9144 - (PROBE_INTERVAL + dope))));
9146 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9147 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9148 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9149 gen_rtx_PLUS (Pmode, sr.reg,
9150 stack_pointer_rtx)));
9155 while (SP != LAST_ADDR)
9157 SP = SP + PROBE_INTERVAL
9161 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9162 values of N from 1 until it is equal to ROUNDED_SIZE. */
9164 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9167 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9168 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9170 if (size != rounded_size)
9172 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9173 plus_constant (stack_pointer_rtx,
9174 rounded_size - size)));
9175 emit_stack_probe (stack_pointer_rtx);
9178 /* Adjust back to account for the additional first interval. */
9179 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9180 plus_constant (stack_pointer_rtx,
9181 PROBE_INTERVAL + dope)));
9183 release_scratch_register_on_entry (&sr);
9186 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9187 cfun->machine->fs.sp_offset += size;
9189 /* Make sure nothing is scheduled before we are done. */
9190 emit_insn (gen_blockage ());
9193 /* Adjust the stack pointer up to REG while probing it. */
9196 output_adjust_stack_and_probe (rtx reg)
9198 static int labelno = 0;
9199 char loop_lab[32], end_lab[32];
9202 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9203 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9205 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9207 /* Jump to END_LAB if SP == LAST_ADDR. */
9208 xops[0] = stack_pointer_rtx;
9210 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9211 fputs ("\tje\t", asm_out_file);
9212 assemble_name_raw (asm_out_file, end_lab);
9213 fputc ('\n', asm_out_file);
9215 /* SP = SP + PROBE_INTERVAL. */
9216 xops[1] = GEN_INT (PROBE_INTERVAL);
9217 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9220 xops[1] = const0_rtx;
9221 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9223 fprintf (asm_out_file, "\tjmp\t");
9224 assemble_name_raw (asm_out_file, loop_lab);
9225 fputc ('\n', asm_out_file);
9227 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9232 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9233 inclusive. These are offsets from the current stack pointer. */
9236 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9238 /* See if we have a constant small number of probes to generate. If so,
9239 that's the easy case. The run-time loop is made up of 7 insns in the
9240 generic case while the compile-time loop is made up of n insns for n #
9242 if (size <= 7 * PROBE_INTERVAL)
9246 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9247 it exceeds SIZE. If only one probe is needed, this will not
9248 generate any code. Then probe at FIRST + SIZE. */
9249 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9250 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9252 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9255 /* Otherwise, do the same as above, but in a loop. Note that we must be
9256 extra careful with variables wrapping around because we might be at
9257 the very top (or the very bottom) of the address space and we have
9258 to be able to handle this case properly; in particular, we use an
9259 equality test for the loop condition. */
9262 HOST_WIDE_INT rounded_size, last;
9263 struct scratch_reg sr;
9265 get_scratch_register_on_entry (&sr);
9268 /* Step 1: round SIZE to the previous multiple of the interval. */
9270 rounded_size = size & -PROBE_INTERVAL;
9273 /* Step 2: compute initial and final value of the loop counter. */
9275 /* TEST_OFFSET = FIRST. */
9276 emit_move_insn (sr.reg, GEN_INT (-first));
9278 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9279 last = first + rounded_size;
9284 while (TEST_ADDR != LAST_ADDR)
9286 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9290 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9291 until it is equal to ROUNDED_SIZE. */
9293 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9296 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9297 that SIZE is equal to ROUNDED_SIZE. */
9299 if (size != rounded_size)
9300 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9303 rounded_size - size));
9305 release_scratch_register_on_entry (&sr);
9308 /* Make sure nothing is scheduled before we are done. */
9309 emit_insn (gen_blockage ());
9312 /* Probe a range of stack addresses from REG to END, inclusive. These are
9313 offsets from the current stack pointer. */
9316 output_probe_stack_range (rtx reg, rtx end)
9318 static int labelno = 0;
9319 char loop_lab[32], end_lab[32];
9322 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9323 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9325 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9327 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9330 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9331 fputs ("\tje\t", asm_out_file);
9332 assemble_name_raw (asm_out_file, end_lab);
9333 fputc ('\n', asm_out_file);
9335 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9336 xops[1] = GEN_INT (PROBE_INTERVAL);
9337 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9339 /* Probe at TEST_ADDR. */
9340 xops[0] = stack_pointer_rtx;
9342 xops[2] = const0_rtx;
9343 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9345 fprintf (asm_out_file, "\tjmp\t");
9346 assemble_name_raw (asm_out_file, loop_lab);
9347 fputc ('\n', asm_out_file);
9349 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9354 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9355 to be generated in correct form. */
9357 ix86_finalize_stack_realign_flags (void)
9359 /* Check if stack realign is really needed after reload, and
9360 stores result in cfun */
9361 unsigned int incoming_stack_boundary
9362 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9363 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9364 unsigned int stack_realign = (incoming_stack_boundary
9365 < (current_function_is_leaf
9366 ? crtl->max_used_stack_slot_alignment
9367 : crtl->stack_alignment_needed));
9369 if (crtl->stack_realign_finalized)
9371 /* After stack_realign_needed is finalized, we can't no longer
9373 gcc_assert (crtl->stack_realign_needed == stack_realign);
9377 crtl->stack_realign_needed = stack_realign;
9378 crtl->stack_realign_finalized = true;
9382 /* Expand the prologue into a bunch of separate insns. */
9385 ix86_expand_prologue (void)
9387 struct machine_function *m = cfun->machine;
9390 struct ix86_frame frame;
9391 HOST_WIDE_INT allocate;
9392 bool int_registers_saved;
9394 ix86_finalize_stack_realign_flags ();
9396 /* DRAP should not coexist with stack_realign_fp */
9397 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9399 memset (&m->fs, 0, sizeof (m->fs));
9401 /* Initialize CFA state for before the prologue. */
9402 m->fs.cfa_reg = stack_pointer_rtx;
9403 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9405 /* Track SP offset to the CFA. We continue tracking this after we've
9406 swapped the CFA register away from SP. In the case of re-alignment
9407 this is fudged; we're interested to offsets within the local frame. */
9408 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9409 m->fs.sp_valid = true;
9411 ix86_compute_frame_layout (&frame);
9413 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9415 /* We should have already generated an error for any use of
9416 ms_hook on a nested function. */
9417 gcc_checking_assert (!ix86_static_chain_on_stack);
9419 /* Check if profiling is active and we shall use profiling before
9420 prologue variant. If so sorry. */
9421 if (crtl->profile && flag_fentry != 0)
9422 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9424 /* In ix86_asm_output_function_label we emitted:
9425 8b ff movl.s %edi,%edi
9427 8b ec movl.s %esp,%ebp
9429 This matches the hookable function prologue in Win32 API
9430 functions in Microsoft Windows XP Service Pack 2 and newer.
9431 Wine uses this to enable Windows apps to hook the Win32 API
9432 functions provided by Wine.
9434 What that means is that we've already set up the frame pointer. */
9436 if (frame_pointer_needed
9437 && !(crtl->drap_reg && crtl->stack_realign_needed))
9441 /* We've decided to use the frame pointer already set up.
9442 Describe this to the unwinder by pretending that both
9443 push and mov insns happen right here.
9445 Putting the unwind info here at the end of the ms_hook
9446 is done so that we can make absolutely certain we get
9447 the required byte sequence at the start of the function,
9448 rather than relying on an assembler that can produce
9449 the exact encoding required.
9451 However it does mean (in the unpatched case) that we have
9452 a 1 insn window where the asynchronous unwind info is
9453 incorrect. However, if we placed the unwind info at
9454 its correct location we would have incorrect unwind info
9455 in the patched case. Which is probably all moot since
9456 I don't expect Wine generates dwarf2 unwind info for the
9457 system libraries that use this feature. */
9459 insn = emit_insn (gen_blockage ());
9461 push = gen_push (hard_frame_pointer_rtx);
9462 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9464 RTX_FRAME_RELATED_P (push) = 1;
9465 RTX_FRAME_RELATED_P (mov) = 1;
9467 RTX_FRAME_RELATED_P (insn) = 1;
9468 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9469 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9471 /* Note that gen_push incremented m->fs.cfa_offset, even
9472 though we didn't emit the push insn here. */
9473 m->fs.cfa_reg = hard_frame_pointer_rtx;
9474 m->fs.fp_offset = m->fs.cfa_offset;
9475 m->fs.fp_valid = true;
9479 /* The frame pointer is not needed so pop %ebp again.
9480 This leaves us with a pristine state. */
9481 emit_insn (ix86_gen_pop1 (hard_frame_pointer_rtx));
9485 /* The first insn of a function that accepts its static chain on the
9486 stack is to push the register that would be filled in by a direct
9487 call. This insn will be skipped by the trampoline. */
9488 else if (ix86_static_chain_on_stack)
9490 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9491 emit_insn (gen_blockage ());
9493 /* We don't want to interpret this push insn as a register save,
9494 only as a stack adjustment. The real copy of the register as
9495 a save will be done later, if needed. */
9496 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9497 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9498 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9499 RTX_FRAME_RELATED_P (insn) = 1;
9502 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9503 of DRAP is needed and stack realignment is really needed after reload */
9504 if (stack_realign_drap)
9506 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9508 /* Only need to push parameter pointer reg if it is caller saved. */
9509 if (!call_used_regs[REGNO (crtl->drap_reg)])
9511 /* Push arg pointer reg */
9512 insn = emit_insn (gen_push (crtl->drap_reg));
9513 RTX_FRAME_RELATED_P (insn) = 1;
9516 /* Grab the argument pointer. */
9517 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9518 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9519 RTX_FRAME_RELATED_P (insn) = 1;
9520 m->fs.cfa_reg = crtl->drap_reg;
9521 m->fs.cfa_offset = 0;
9523 /* Align the stack. */
9524 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9526 GEN_INT (-align_bytes)));
9527 RTX_FRAME_RELATED_P (insn) = 1;
9529 /* Replicate the return address on the stack so that return
9530 address can be reached via (argp - 1) slot. This is needed
9531 to implement macro RETURN_ADDR_RTX and intrinsic function
9532 expand_builtin_return_addr etc. */
9533 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9534 t = gen_frame_mem (Pmode, t);
9535 insn = emit_insn (gen_push (t));
9536 RTX_FRAME_RELATED_P (insn) = 1;
9538 /* For the purposes of frame and register save area addressing,
9539 we've started over with a new frame. */
9540 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9541 m->fs.realigned = true;
9544 if (frame_pointer_needed && !m->fs.fp_valid)
9546 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9547 slower on all targets. Also sdb doesn't like it. */
9548 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9549 RTX_FRAME_RELATED_P (insn) = 1;
9551 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9552 RTX_FRAME_RELATED_P (insn) = 1;
9554 if (m->fs.cfa_reg == stack_pointer_rtx)
9555 m->fs.cfa_reg = hard_frame_pointer_rtx;
9556 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9557 m->fs.fp_offset = m->fs.sp_offset;
9558 m->fs.fp_valid = true;
9561 int_registers_saved = (frame.nregs == 0);
9563 if (!int_registers_saved)
9565 /* If saving registers via PUSH, do so now. */
9566 if (!frame.save_regs_using_mov)
9568 ix86_emit_save_regs ();
9569 int_registers_saved = true;
9570 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9573 /* When using red zone we may start register saving before allocating
9574 the stack frame saving one cycle of the prologue. However, avoid
9575 doing this if we have to probe the stack; at least on x86_64 the
9576 stack probe can turn into a call that clobbers a red zone location. */
9577 else if (ix86_using_red_zone ()
9578 && (! TARGET_STACK_PROBE
9579 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9581 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9582 int_registers_saved = true;
9586 if (stack_realign_fp)
9588 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9589 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9591 /* The computation of the size of the re-aligned stack frame means
9592 that we must allocate the size of the register save area before
9593 performing the actual alignment. Otherwise we cannot guarantee
9594 that there's enough storage above the realignment point. */
9595 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9596 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9597 GEN_INT (m->fs.sp_offset
9598 - frame.sse_reg_save_offset),
9601 /* Align the stack. */
9602 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9604 GEN_INT (-align_bytes)));
9606 /* For the purposes of register save area addressing, the stack
9607 pointer is no longer valid. As for the value of sp_offset,
9608 see ix86_compute_frame_layout, which we need to match in order
9609 to pass verification of stack_pointer_offset at the end. */
9610 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9611 m->fs.sp_valid = false;
9614 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9616 if (flag_stack_usage)
9618 /* We start to count from ARG_POINTER. */
9619 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9621 /* If it was realigned, take into account the fake frame. */
9622 if (stack_realign_drap)
9624 if (ix86_static_chain_on_stack)
9625 stack_size += UNITS_PER_WORD;
9627 if (!call_used_regs[REGNO (crtl->drap_reg)])
9628 stack_size += UNITS_PER_WORD;
9630 /* This over-estimates by 1 minimal-stack-alignment-unit but
9631 mitigates that by counting in the new return address slot. */
9632 current_function_dynamic_stack_size
9633 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9636 current_function_static_stack_size = stack_size;
9639 /* The stack has already been decremented by the instruction calling us
9640 so we need to probe unconditionally to preserve the protection area. */
9641 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9643 /* We expect the registers to be saved when probes are used. */
9644 gcc_assert (int_registers_saved);
9646 if (STACK_CHECK_MOVING_SP)
9648 ix86_adjust_stack_and_probe (allocate);
9653 HOST_WIDE_INT size = allocate;
9655 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9656 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9658 if (TARGET_STACK_PROBE)
9659 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9661 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9667 else if (!ix86_target_stack_probe ()
9668 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9670 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9671 GEN_INT (-allocate), -1,
9672 m->fs.cfa_reg == stack_pointer_rtx);
9676 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9679 if (cfun->machine->call_abi == MS_ABI)
9682 eax_live = ix86_eax_live_at_start_p ();
9686 emit_insn (gen_push (eax));
9687 allocate -= UNITS_PER_WORD;
9690 emit_move_insn (eax, GEN_INT (allocate));
9692 insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9694 if (m->fs.cfa_reg == stack_pointer_rtx)
9696 m->fs.cfa_offset += allocate;
9697 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
9698 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9699 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9700 RTX_FRAME_RELATED_P (insn) = 1;
9702 m->fs.sp_offset += allocate;
9706 t = choose_baseaddr (m->fs.sp_offset - allocate);
9707 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9710 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9712 if (!int_registers_saved)
9713 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9715 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9717 pic_reg_used = false;
9718 if (pic_offset_table_rtx
9719 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9722 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9724 if (alt_pic_reg_used != INVALID_REGNUM)
9725 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9727 pic_reg_used = true;
9734 if (ix86_cmodel == CM_LARGE_PIC)
9736 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9737 rtx label = gen_label_rtx ();
9739 LABEL_PRESERVE_P (label) = 1;
9740 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9741 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9742 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9743 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9744 pic_offset_table_rtx, tmp_reg));
9747 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9750 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9753 /* In the pic_reg_used case, make sure that the got load isn't deleted
9754 when mcount needs it. Blockage to avoid call movement across mcount
9755 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9757 if (crtl->profile && !flag_fentry && pic_reg_used)
9758 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9760 if (crtl->drap_reg && !crtl->stack_realign_needed)
9762 /* vDRAP is setup but after reload it turns out stack realign
9763 isn't necessary, here we will emit prologue to setup DRAP
9764 without stack realign adjustment */
9765 t = choose_baseaddr (0);
9766 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9769 /* Prevent instructions from being scheduled into register save push
9770 sequence when access to the redzone area is done through frame pointer.
9771 The offset between the frame pointer and the stack pointer is calculated
9772 relative to the value of the stack pointer at the end of the function
9773 prologue, and moving instructions that access redzone area via frame
9774 pointer inside push sequence violates this assumption. */
9775 if (frame_pointer_needed && frame.red_zone_size)
9776 emit_insn (gen_memory_blockage ());
9778 /* Emit cld instruction if stringops are used in the function. */
9779 if (TARGET_CLD && ix86_current_function_needs_cld)
9780 emit_insn (gen_cld ());
9783 /* Emit code to restore REG using a POP insn. */
9786 ix86_emit_restore_reg_using_pop (rtx reg)
9788 struct machine_function *m = cfun->machine;
9789 rtx insn = emit_insn (ix86_gen_pop1 (reg));
9791 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9792 m->fs.sp_offset -= UNITS_PER_WORD;
9794 if (m->fs.cfa_reg == crtl->drap_reg
9795 && REGNO (reg) == REGNO (crtl->drap_reg))
9797 /* Previously we'd represented the CFA as an expression
9798 like *(%ebp - 8). We've just popped that value from
9799 the stack, which means we need to reset the CFA to
9800 the drap register. This will remain until we restore
9801 the stack pointer. */
9802 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9803 RTX_FRAME_RELATED_P (insn) = 1;
9805 /* This means that the DRAP register is valid for addressing too. */
9806 m->fs.drap_valid = true;
9810 if (m->fs.cfa_reg == stack_pointer_rtx)
9812 m->fs.cfa_offset -= UNITS_PER_WORD;
9813 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9814 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9815 RTX_FRAME_RELATED_P (insn) = 1;
9818 /* When the frame pointer is the CFA, and we pop it, we are
9819 swapping back to the stack pointer as the CFA. This happens
9820 for stack frames that don't allocate other data, so we assume
9821 the stack pointer is now pointing at the return address, i.e.
9822 the function entry state, which makes the offset be 1 word. */
9823 if (reg == hard_frame_pointer_rtx)
9825 m->fs.fp_valid = false;
9826 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9828 m->fs.cfa_reg = stack_pointer_rtx;
9829 m->fs.cfa_offset -= UNITS_PER_WORD;
9831 add_reg_note (insn, REG_CFA_DEF_CFA,
9832 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9833 GEN_INT (m->fs.cfa_offset)));
9834 RTX_FRAME_RELATED_P (insn) = 1;
9839 /* Emit code to restore saved registers using POP insns. */
9842 ix86_emit_restore_regs_using_pop (void)
9846 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9847 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9848 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9851 /* Emit code and notes for the LEAVE instruction. */
9854 ix86_emit_leave (void)
9856 struct machine_function *m = cfun->machine;
9857 rtx insn = emit_insn (ix86_gen_leave ());
9859 ix86_add_queued_cfa_restore_notes (insn);
9861 gcc_assert (m->fs.fp_valid);
9862 m->fs.sp_valid = true;
9863 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9864 m->fs.fp_valid = false;
9866 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9868 m->fs.cfa_reg = stack_pointer_rtx;
9869 m->fs.cfa_offset = m->fs.sp_offset;
9871 add_reg_note (insn, REG_CFA_DEF_CFA,
9872 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
9873 RTX_FRAME_RELATED_P (insn) = 1;
9874 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9879 /* Emit code to restore saved registers using MOV insns.
9880 First register is restored from CFA - CFA_OFFSET. */
9882 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9883 int maybe_eh_return)
9885 struct machine_function *m = cfun->machine;
9888 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9889 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9891 rtx reg = gen_rtx_REG (Pmode, regno);
9894 mem = choose_baseaddr (cfa_offset);
9895 mem = gen_frame_mem (Pmode, mem);
9896 insn = emit_move_insn (reg, mem);
9898 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9900 /* Previously we'd represented the CFA as an expression
9901 like *(%ebp - 8). We've just popped that value from
9902 the stack, which means we need to reset the CFA to
9903 the drap register. This will remain until we restore
9904 the stack pointer. */
9905 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9906 RTX_FRAME_RELATED_P (insn) = 1;
9908 /* This means that the DRAP register is valid for addressing. */
9909 m->fs.drap_valid = true;
9912 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
9914 cfa_offset -= UNITS_PER_WORD;
9918 /* Emit code to restore saved registers using MOV insns.
9919 First register is restored from CFA - CFA_OFFSET. */
9921 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9922 int maybe_eh_return)
9926 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9927 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9929 rtx reg = gen_rtx_REG (V4SFmode, regno);
9932 mem = choose_baseaddr (cfa_offset);
9933 mem = gen_rtx_MEM (V4SFmode, mem);
9934 set_mem_align (mem, 128);
9935 emit_move_insn (reg, mem);
9937 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
9943 /* Restore function stack, frame, and registers. */
9946 ix86_expand_epilogue (int style)
9948 struct machine_function *m = cfun->machine;
9949 struct machine_frame_state frame_state_save = m->fs;
9950 struct ix86_frame frame;
9951 bool restore_regs_via_mov;
9954 ix86_finalize_stack_realign_flags ();
9955 ix86_compute_frame_layout (&frame);
9957 m->fs.sp_valid = (!frame_pointer_needed
9958 || (current_function_sp_is_unchanging
9959 && !stack_realign_fp));
9960 gcc_assert (!m->fs.sp_valid
9961 || m->fs.sp_offset == frame.stack_pointer_offset);
9963 /* The FP must be valid if the frame pointer is present. */
9964 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9965 gcc_assert (!m->fs.fp_valid
9966 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9968 /* We must have *some* valid pointer to the stack frame. */
9969 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9971 /* The DRAP is never valid at this point. */
9972 gcc_assert (!m->fs.drap_valid);
9974 /* See the comment about red zone and frame
9975 pointer usage in ix86_expand_prologue. */
9976 if (frame_pointer_needed && frame.red_zone_size)
9977 emit_insn (gen_memory_blockage ());
9979 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9980 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
9982 /* Determine the CFA offset of the end of the red-zone. */
9983 m->fs.red_zone_offset = 0;
9984 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
9986 /* The red-zone begins below the return address. */
9987 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
9989 /* When the register save area is in the aligned portion of
9990 the stack, determine the maximum runtime displacement that
9991 matches up with the aligned frame. */
9992 if (stack_realign_drap)
9993 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
9997 /* Special care must be taken for the normal return case of a function
9998 using eh_return: the eax and edx registers are marked as saved, but
9999 not restored along this path. Adjust the save location to match. */
10000 if (crtl->calls_eh_return && style != 2)
10001 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10003 /* If we're only restoring one register and sp is not valid then
10004 using a move instruction to restore the register since it's
10005 less work than reloading sp and popping the register. */
10006 if (!m->fs.sp_valid && frame.nregs <= 1)
10007 restore_regs_via_mov = true;
10008 /* EH_RETURN requires the use of moves to function properly. */
10009 else if (crtl->calls_eh_return)
10010 restore_regs_via_mov = true;
10011 else if (TARGET_EPILOGUE_USING_MOVE
10012 && cfun->machine->use_fast_prologue_epilogue
10013 && (frame.nregs > 1
10014 || m->fs.sp_offset != frame.reg_save_offset))
10015 restore_regs_via_mov = true;
10016 else if (frame_pointer_needed
10018 && m->fs.sp_offset != frame.reg_save_offset)
10019 restore_regs_via_mov = true;
10020 else if (frame_pointer_needed
10021 && TARGET_USE_LEAVE
10022 && cfun->machine->use_fast_prologue_epilogue
10023 && frame.nregs == 1)
10024 restore_regs_via_mov = true;
10026 restore_regs_via_mov = false;
10028 if (restore_regs_via_mov || frame.nsseregs)
10030 /* Ensure that the entire register save area is addressable via
10031 the stack pointer, if we will restore via sp. */
10033 && m->fs.sp_offset > 0x7fffffff
10034 && !(m->fs.fp_valid || m->fs.drap_valid)
10035 && (frame.nsseregs + frame.nregs) != 0)
10037 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10038 GEN_INT (m->fs.sp_offset
10039 - frame.sse_reg_save_offset),
10041 m->fs.cfa_reg == stack_pointer_rtx);
10045 /* If there are any SSE registers to restore, then we have to do it
10046 via moves, since there's obviously no pop for SSE regs. */
10047 if (frame.nsseregs)
10048 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10051 if (restore_regs_via_mov)
10056 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10058 /* eh_return epilogues need %ecx added to the stack pointer. */
10061 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10063 /* Stack align doesn't work with eh_return. */
10064 gcc_assert (!stack_realign_drap);
10065 /* Neither does regparm nested functions. */
10066 gcc_assert (!ix86_static_chain_on_stack);
10068 if (frame_pointer_needed)
10070 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10071 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10072 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10074 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10075 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10077 /* Note that we use SA as a temporary CFA, as the return
10078 address is at the proper place relative to it. We
10079 pretend this happens at the FP restore insn because
10080 prior to this insn the FP would be stored at the wrong
10081 offset relative to SA, and after this insn we have no
10082 other reasonable register to use for the CFA. We don't
10083 bother resetting the CFA to the SP for the duration of
10084 the return insn. */
10085 add_reg_note (insn, REG_CFA_DEF_CFA,
10086 plus_constant (sa, UNITS_PER_WORD));
10087 ix86_add_queued_cfa_restore_notes (insn);
10088 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10089 RTX_FRAME_RELATED_P (insn) = 1;
10091 m->fs.cfa_reg = sa;
10092 m->fs.cfa_offset = UNITS_PER_WORD;
10093 m->fs.fp_valid = false;
10095 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10096 const0_rtx, style, false);
10100 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10101 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10102 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10103 ix86_add_queued_cfa_restore_notes (insn);
10105 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10106 if (m->fs.cfa_offset != UNITS_PER_WORD)
10108 m->fs.cfa_offset = UNITS_PER_WORD;
10109 add_reg_note (insn, REG_CFA_DEF_CFA,
10110 plus_constant (stack_pointer_rtx,
10112 RTX_FRAME_RELATED_P (insn) = 1;
10115 m->fs.sp_offset = UNITS_PER_WORD;
10120 /* First step is to deallocate the stack frame so that we can
10121 pop the registers. */
10122 if (!m->fs.sp_valid)
10124 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10125 GEN_INT (m->fs.fp_offset
10126 - frame.reg_save_offset),
10129 else if (m->fs.sp_offset != frame.reg_save_offset)
10131 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10132 GEN_INT (m->fs.sp_offset
10133 - frame.reg_save_offset),
10135 m->fs.cfa_reg == stack_pointer_rtx);
10138 ix86_emit_restore_regs_using_pop ();
10141 /* If we used a stack pointer and haven't already got rid of it,
10143 if (m->fs.fp_valid)
10145 /* If the stack pointer is valid and pointing at the frame
10146 pointer store address, then we only need a pop. */
10147 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10148 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10149 /* Leave results in shorter dependency chains on CPUs that are
10150 able to grok it fast. */
10151 else if (TARGET_USE_LEAVE
10152 || optimize_function_for_size_p (cfun)
10153 || !cfun->machine->use_fast_prologue_epilogue)
10154 ix86_emit_leave ();
10157 pro_epilogue_adjust_stack (stack_pointer_rtx,
10158 hard_frame_pointer_rtx,
10159 const0_rtx, style, !using_drap);
10160 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10166 int param_ptr_offset = UNITS_PER_WORD;
10169 gcc_assert (stack_realign_drap);
10171 if (ix86_static_chain_on_stack)
10172 param_ptr_offset += UNITS_PER_WORD;
10173 if (!call_used_regs[REGNO (crtl->drap_reg)])
10174 param_ptr_offset += UNITS_PER_WORD;
10176 insn = emit_insn (gen_rtx_SET
10177 (VOIDmode, stack_pointer_rtx,
10178 gen_rtx_PLUS (Pmode,
10180 GEN_INT (-param_ptr_offset))));
10181 m->fs.cfa_reg = stack_pointer_rtx;
10182 m->fs.cfa_offset = param_ptr_offset;
10183 m->fs.sp_offset = param_ptr_offset;
10184 m->fs.realigned = false;
10186 add_reg_note (insn, REG_CFA_DEF_CFA,
10187 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10188 GEN_INT (param_ptr_offset)));
10189 RTX_FRAME_RELATED_P (insn) = 1;
10191 if (!call_used_regs[REGNO (crtl->drap_reg)])
10192 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10195 /* At this point the stack pointer must be valid, and we must have
10196 restored all of the registers. We may not have deallocated the
10197 entire stack frame. We've delayed this until now because it may
10198 be possible to merge the local stack deallocation with the
10199 deallocation forced by ix86_static_chain_on_stack. */
10200 gcc_assert (m->fs.sp_valid);
10201 gcc_assert (!m->fs.fp_valid);
10202 gcc_assert (!m->fs.realigned);
10203 if (m->fs.sp_offset != UNITS_PER_WORD)
10205 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10206 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10210 /* Sibcall epilogues don't want a return instruction. */
10213 m->fs = frame_state_save;
10217 if (crtl->args.pops_args && crtl->args.size)
10219 rtx popc = GEN_INT (crtl->args.pops_args);
10221 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10222 address, do explicit add, and jump indirectly to the caller. */
10224 if (crtl->args.pops_args >= 65536)
10226 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10229 /* There is no "pascal" calling convention in any 64bit ABI. */
10230 gcc_assert (!TARGET_64BIT);
10232 insn = emit_insn (gen_popsi1 (ecx));
10233 m->fs.cfa_offset -= UNITS_PER_WORD;
10234 m->fs.sp_offset -= UNITS_PER_WORD;
10236 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10237 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10238 add_reg_note (insn, REG_CFA_REGISTER,
10239 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10240 RTX_FRAME_RELATED_P (insn) = 1;
10242 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10244 emit_jump_insn (gen_return_indirect_internal (ecx));
10247 emit_jump_insn (gen_return_pop_internal (popc));
10250 emit_jump_insn (gen_return_internal ());
10252 /* Restore the state back to the state from the prologue,
10253 so that it's correct for the next epilogue. */
10254 m->fs = frame_state_save;
10257 /* Reset from the function's potential modifications. */
10260 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10261 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10263 if (pic_offset_table_rtx)
10264 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10266 /* Mach-O doesn't support labels at the end of objects, so if
10267 it looks like we might want one, insert a NOP. */
10269 rtx insn = get_last_insn ();
10272 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10273 insn = PREV_INSN (insn);
10277 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10278 fputs ("\tnop\n", file);
10284 /* Extract the parts of an RTL expression that is a valid memory address
10285 for an instruction. Return 0 if the structure of the address is
10286 grossly off. Return -1 if the address contains ASHIFT, so it is not
10287 strictly valid, but still used for computing length of lea instruction. */
10290 ix86_decompose_address (rtx addr, struct ix86_address *out)
10292 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10293 rtx base_reg, index_reg;
10294 HOST_WIDE_INT scale = 1;
10295 rtx scale_rtx = NULL_RTX;
10298 enum ix86_address_seg seg = SEG_DEFAULT;
10300 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10302 else if (GET_CODE (addr) == PLUS)
10304 rtx addends[4], op;
10312 addends[n++] = XEXP (op, 1);
10315 while (GET_CODE (op) == PLUS);
10320 for (i = n; i >= 0; --i)
10323 switch (GET_CODE (op))
10328 index = XEXP (op, 0);
10329 scale_rtx = XEXP (op, 1);
10335 index = XEXP (op, 0);
10336 tmp = XEXP (op, 1);
10337 if (!CONST_INT_P (tmp))
10339 scale = INTVAL (tmp);
10340 if ((unsigned HOST_WIDE_INT) scale > 3)
10342 scale = 1 << scale;
10346 if (XINT (op, 1) == UNSPEC_TP
10347 && TARGET_TLS_DIRECT_SEG_REFS
10348 && seg == SEG_DEFAULT)
10349 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10378 else if (GET_CODE (addr) == MULT)
10380 index = XEXP (addr, 0); /* index*scale */
10381 scale_rtx = XEXP (addr, 1);
10383 else if (GET_CODE (addr) == ASHIFT)
10385 /* We're called for lea too, which implements ashift on occasion. */
10386 index = XEXP (addr, 0);
10387 tmp = XEXP (addr, 1);
10388 if (!CONST_INT_P (tmp))
10390 scale = INTVAL (tmp);
10391 if ((unsigned HOST_WIDE_INT) scale > 3)
10393 scale = 1 << scale;
10397 disp = addr; /* displacement */
10399 /* Extract the integral value of scale. */
10402 if (!CONST_INT_P (scale_rtx))
10404 scale = INTVAL (scale_rtx);
10407 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10408 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10410 /* Avoid useless 0 displacement. */
10411 if (disp == const0_rtx && (base || index))
10414 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10415 if (base_reg && index_reg && scale == 1
10416 && (index_reg == arg_pointer_rtx
10417 || index_reg == frame_pointer_rtx
10418 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10421 tmp = base, base = index, index = tmp;
10422 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10425 /* Special case: %ebp cannot be encoded as a base without a displacement.
10429 && (base_reg == hard_frame_pointer_rtx
10430 || base_reg == frame_pointer_rtx
10431 || base_reg == arg_pointer_rtx
10432 || (REG_P (base_reg)
10433 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10434 || REGNO (base_reg) == R13_REG))))
10437 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10438 Avoid this by transforming to [%esi+0].
10439 Reload calls address legitimization without cfun defined, so we need
10440 to test cfun for being non-NULL. */
10441 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10442 && base_reg && !index_reg && !disp
10443 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10446 /* Special case: encode reg+reg instead of reg*2. */
10447 if (!base && index && scale == 2)
10448 base = index, base_reg = index_reg, scale = 1;
10450 /* Special case: scaling cannot be encoded without base or displacement. */
10451 if (!base && !disp && index && scale != 1)
10455 out->index = index;
10457 out->scale = scale;
10463 /* Return cost of the memory address x.
10464 For i386, it is better to use a complex address than let gcc copy
10465 the address into a reg and make a new pseudo. But not if the address
10466 requires to two regs - that would mean more pseudos with longer
10469 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10471 struct ix86_address parts;
10473 int ok = ix86_decompose_address (x, &parts);
10477 if (parts.base && GET_CODE (parts.base) == SUBREG)
10478 parts.base = SUBREG_REG (parts.base);
10479 if (parts.index && GET_CODE (parts.index) == SUBREG)
10480 parts.index = SUBREG_REG (parts.index);
10482 /* Attempt to minimize number of registers in the address. */
10484 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10486 && (!REG_P (parts.index)
10487 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10491 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10493 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10494 && parts.base != parts.index)
10497 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10498 since it's predecode logic can't detect the length of instructions
10499 and it degenerates to vector decoded. Increase cost of such
10500 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10501 to split such addresses or even refuse such addresses at all.
10503 Following addressing modes are affected:
10508 The first and last case may be avoidable by explicitly coding the zero in
10509 memory address, but I don't have AMD-K6 machine handy to check this
10513 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10514 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10515 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10521 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10522 this is used for to form addresses to local data when -fPIC is in
10526 darwin_local_data_pic (rtx disp)
10528 return (GET_CODE (disp) == UNSPEC
10529 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10532 /* Determine if a given RTX is a valid constant. We already know this
10533 satisfies CONSTANT_P. */
10536 legitimate_constant_p (rtx x)
10538 switch (GET_CODE (x))
10543 if (GET_CODE (x) == PLUS)
10545 if (!CONST_INT_P (XEXP (x, 1)))
10550 if (TARGET_MACHO && darwin_local_data_pic (x))
10553 /* Only some unspecs are valid as "constants". */
10554 if (GET_CODE (x) == UNSPEC)
10555 switch (XINT (x, 1))
10558 case UNSPEC_GOTOFF:
10559 case UNSPEC_PLTOFF:
10560 return TARGET_64BIT;
10562 case UNSPEC_NTPOFF:
10563 x = XVECEXP (x, 0, 0);
10564 return (GET_CODE (x) == SYMBOL_REF
10565 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10566 case UNSPEC_DTPOFF:
10567 x = XVECEXP (x, 0, 0);
10568 return (GET_CODE (x) == SYMBOL_REF
10569 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10574 /* We must have drilled down to a symbol. */
10575 if (GET_CODE (x) == LABEL_REF)
10577 if (GET_CODE (x) != SYMBOL_REF)
10582 /* TLS symbols are never valid. */
10583 if (SYMBOL_REF_TLS_MODEL (x))
10586 /* DLLIMPORT symbols are never valid. */
10587 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10588 && SYMBOL_REF_DLLIMPORT_P (x))
10593 if (GET_MODE (x) == TImode
10594 && x != CONST0_RTX (TImode)
10600 if (!standard_sse_constant_p (x))
10607 /* Otherwise we handle everything else in the move patterns. */
10611 /* Determine if it's legal to put X into the constant pool. This
10612 is not possible for the address of thread-local symbols, which
10613 is checked above. */
10616 ix86_cannot_force_const_mem (rtx x)
10618 /* We can always put integral constants and vectors in memory. */
10619 switch (GET_CODE (x))
10629 return !legitimate_constant_p (x);
10633 /* Nonzero if the constant value X is a legitimate general operand
10634 when generating PIC code. It is given that flag_pic is on and
10635 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
10638 legitimate_pic_operand_p (rtx x)
10642 switch (GET_CODE (x))
10645 inner = XEXP (x, 0);
10646 if (GET_CODE (inner) == PLUS
10647 && CONST_INT_P (XEXP (inner, 1)))
10648 inner = XEXP (inner, 0);
10650 /* Only some unspecs are valid as "constants". */
10651 if (GET_CODE (inner) == UNSPEC)
10652 switch (XINT (inner, 1))
10655 case UNSPEC_GOTOFF:
10656 case UNSPEC_PLTOFF:
10657 return TARGET_64BIT;
10659 x = XVECEXP (inner, 0, 0);
10660 return (GET_CODE (x) == SYMBOL_REF
10661 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10662 case UNSPEC_MACHOPIC_OFFSET:
10663 return legitimate_pic_address_disp_p (x);
10671 return legitimate_pic_address_disp_p (x);
10678 /* Determine if a given CONST RTX is a valid memory displacement
10682 legitimate_pic_address_disp_p (rtx disp)
10686 /* In 64bit mode we can allow direct addresses of symbols and labels
10687 when they are not dynamic symbols. */
10690 rtx op0 = disp, op1;
10692 switch (GET_CODE (disp))
10698 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10700 op0 = XEXP (XEXP (disp, 0), 0);
10701 op1 = XEXP (XEXP (disp, 0), 1);
10702 if (!CONST_INT_P (op1)
10703 || INTVAL (op1) >= 16*1024*1024
10704 || INTVAL (op1) < -16*1024*1024)
10706 if (GET_CODE (op0) == LABEL_REF)
10708 if (GET_CODE (op0) != SYMBOL_REF)
10713 /* TLS references should always be enclosed in UNSPEC. */
10714 if (SYMBOL_REF_TLS_MODEL (op0))
10716 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
10717 && ix86_cmodel != CM_LARGE_PIC)
10725 if (GET_CODE (disp) != CONST)
10727 disp = XEXP (disp, 0);
10731 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10732 of GOT tables. We should not need these anyway. */
10733 if (GET_CODE (disp) != UNSPEC
10734 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10735 && XINT (disp, 1) != UNSPEC_GOTOFF
10736 && XINT (disp, 1) != UNSPEC_PLTOFF))
10739 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10740 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10746 if (GET_CODE (disp) == PLUS)
10748 if (!CONST_INT_P (XEXP (disp, 1)))
10750 disp = XEXP (disp, 0);
10754 if (TARGET_MACHO && darwin_local_data_pic (disp))
10757 if (GET_CODE (disp) != UNSPEC)
10760 switch (XINT (disp, 1))
10765 /* We need to check for both symbols and labels because VxWorks loads
10766 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10768 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10769 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10770 case UNSPEC_GOTOFF:
10771 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10772 While ABI specify also 32bit relocation but we don't produce it in
10773 small PIC model at all. */
10774 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10775 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10777 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10779 case UNSPEC_GOTTPOFF:
10780 case UNSPEC_GOTNTPOFF:
10781 case UNSPEC_INDNTPOFF:
10784 disp = XVECEXP (disp, 0, 0);
10785 return (GET_CODE (disp) == SYMBOL_REF
10786 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10787 case UNSPEC_NTPOFF:
10788 disp = XVECEXP (disp, 0, 0);
10789 return (GET_CODE (disp) == SYMBOL_REF
10790 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10791 case UNSPEC_DTPOFF:
10792 disp = XVECEXP (disp, 0, 0);
10793 return (GET_CODE (disp) == SYMBOL_REF
10794 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10800 /* Recognizes RTL expressions that are valid memory addresses for an
10801 instruction. The MODE argument is the machine mode for the MEM
10802 expression that wants to use this address.
10804 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10805 convert common non-canonical forms to canonical form so that they will
10809 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
10810 rtx addr, bool strict)
10812 struct ix86_address parts;
10813 rtx base, index, disp;
10814 HOST_WIDE_INT scale;
10816 if (ix86_decompose_address (addr, &parts) <= 0)
10817 /* Decomposition failed. */
10821 index = parts.index;
10823 scale = parts.scale;
10825 /* Validate base register.
10827 Don't allow SUBREG's that span more than a word here. It can lead to spill
10828 failures when the base is one word out of a two word structure, which is
10829 represented internally as a DImode int. */
10837 else if (GET_CODE (base) == SUBREG
10838 && REG_P (SUBREG_REG (base))
10839 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
10841 reg = SUBREG_REG (base);
10843 /* Base is not a register. */
10846 if (GET_MODE (base) != Pmode)
10847 /* Base is not in Pmode. */
10850 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10851 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10852 /* Base is not valid. */
10856 /* Validate index register.
10858 Don't allow SUBREG's that span more than a word here -- same as above. */
10866 else if (GET_CODE (index) == SUBREG
10867 && REG_P (SUBREG_REG (index))
10868 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
10870 reg = SUBREG_REG (index);
10872 /* Index is not a register. */
10875 if (GET_MODE (index) != Pmode)
10876 /* Index is not in Pmode. */
10879 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10880 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10881 /* Index is not valid. */
10885 /* Validate scale factor. */
10889 /* Scale without index. */
10892 if (scale != 2 && scale != 4 && scale != 8)
10893 /* Scale is not a valid multiplier. */
10897 /* Validate displacement. */
10900 if (GET_CODE (disp) == CONST
10901 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10902 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10903 switch (XINT (XEXP (disp, 0), 1))
10905 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
10906 used. While ABI specify also 32bit relocations, we don't produce
10907 them at all and use IP relative instead. */
10909 case UNSPEC_GOTOFF:
10910 gcc_assert (flag_pic);
10912 goto is_legitimate_pic;
10914 /* 64bit address unspec. */
10917 case UNSPEC_GOTPCREL:
10918 gcc_assert (flag_pic);
10919 goto is_legitimate_pic;
10921 case UNSPEC_GOTTPOFF:
10922 case UNSPEC_GOTNTPOFF:
10923 case UNSPEC_INDNTPOFF:
10924 case UNSPEC_NTPOFF:
10925 case UNSPEC_DTPOFF:
10929 /* Invalid address unspec. */
10933 else if (SYMBOLIC_CONST (disp)
10937 && MACHOPIC_INDIRECT
10938 && !machopic_operand_p (disp)
10944 if (TARGET_64BIT && (index || base))
10946 /* foo@dtpoff(%rX) is ok. */
10947 if (GET_CODE (disp) != CONST
10948 || GET_CODE (XEXP (disp, 0)) != PLUS
10949 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10950 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10951 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10952 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10953 /* Non-constant pic memory reference. */
10956 else if (! legitimate_pic_address_disp_p (disp))
10957 /* Displacement is an invalid pic construct. */
10960 /* This code used to verify that a symbolic pic displacement
10961 includes the pic_offset_table_rtx register.
10963 While this is good idea, unfortunately these constructs may
10964 be created by "adds using lea" optimization for incorrect
10973 This code is nonsensical, but results in addressing
10974 GOT table with pic_offset_table_rtx base. We can't
10975 just refuse it easily, since it gets matched by
10976 "addsi3" pattern, that later gets split to lea in the
10977 case output register differs from input. While this
10978 can be handled by separate addsi pattern for this case
10979 that never results in lea, this seems to be easier and
10980 correct fix for crash to disable this test. */
10982 else if (GET_CODE (disp) != LABEL_REF
10983 && !CONST_INT_P (disp)
10984 && (GET_CODE (disp) != CONST
10985 || !legitimate_constant_p (disp))
10986 && (GET_CODE (disp) != SYMBOL_REF
10987 || !legitimate_constant_p (disp)))
10988 /* Displacement is not constant. */
10990 else if (TARGET_64BIT
10991 && !x86_64_immediate_operand (disp, VOIDmode))
10992 /* Displacement is out of range. */
10996 /* Everything looks valid. */
11000 /* Determine if a given RTX is a valid constant address. */
11003 constant_address_p (rtx x)
11005 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11008 /* Return a unique alias set for the GOT. */
11010 static alias_set_type
11011 ix86_GOT_alias_set (void)
11013 static alias_set_type set = -1;
11015 set = new_alias_set ();
11019 /* Return a legitimate reference for ORIG (an address) using the
11020 register REG. If REG is 0, a new pseudo is generated.
11022 There are two types of references that must be handled:
11024 1. Global data references must load the address from the GOT, via
11025 the PIC reg. An insn is emitted to do this load, and the reg is
11028 2. Static data references, constant pool addresses, and code labels
11029 compute the address as an offset from the GOT, whose base is in
11030 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11031 differentiate them from global data objects. The returned
11032 address is the PIC reg + an unspec constant.
11034 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11035 reg also appears in the address. */
11038 legitimize_pic_address (rtx orig, rtx reg)
11041 rtx new_rtx = orig;
11045 if (TARGET_MACHO && !TARGET_64BIT)
11048 reg = gen_reg_rtx (Pmode);
11049 /* Use the generic Mach-O PIC machinery. */
11050 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11054 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11056 else if (TARGET_64BIT
11057 && ix86_cmodel != CM_SMALL_PIC
11058 && gotoff_operand (addr, Pmode))
11061 /* This symbol may be referenced via a displacement from the PIC
11062 base address (@GOTOFF). */
11064 if (reload_in_progress)
11065 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11066 if (GET_CODE (addr) == CONST)
11067 addr = XEXP (addr, 0);
11068 if (GET_CODE (addr) == PLUS)
11070 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11072 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11075 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11076 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11078 tmpreg = gen_reg_rtx (Pmode);
11081 emit_move_insn (tmpreg, new_rtx);
11085 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11086 tmpreg, 1, OPTAB_DIRECT);
11089 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11091 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11093 /* This symbol may be referenced via a displacement from the PIC
11094 base address (@GOTOFF). */
11096 if (reload_in_progress)
11097 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11098 if (GET_CODE (addr) == CONST)
11099 addr = XEXP (addr, 0);
11100 if (GET_CODE (addr) == PLUS)
11102 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11104 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11107 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11108 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11109 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11113 emit_move_insn (reg, new_rtx);
11117 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11118 /* We can't use @GOTOFF for text labels on VxWorks;
11119 see gotoff_operand. */
11120 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11122 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11124 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11125 return legitimize_dllimport_symbol (addr, true);
11126 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11127 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11128 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11130 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11131 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11135 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11137 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11138 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11139 new_rtx = gen_const_mem (Pmode, new_rtx);
11140 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11143 reg = gen_reg_rtx (Pmode);
11144 /* Use directly gen_movsi, otherwise the address is loaded
11145 into register for CSE. We don't want to CSE this addresses,
11146 instead we CSE addresses from the GOT table, so skip this. */
11147 emit_insn (gen_movsi (reg, new_rtx));
11152 /* This symbol must be referenced via a load from the
11153 Global Offset Table (@GOT). */
11155 if (reload_in_progress)
11156 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11157 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11158 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11160 new_rtx = force_reg (Pmode, new_rtx);
11161 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11162 new_rtx = gen_const_mem (Pmode, new_rtx);
11163 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11166 reg = gen_reg_rtx (Pmode);
11167 emit_move_insn (reg, new_rtx);
11173 if (CONST_INT_P (addr)
11174 && !x86_64_immediate_operand (addr, VOIDmode))
11178 emit_move_insn (reg, addr);
11182 new_rtx = force_reg (Pmode, addr);
11184 else if (GET_CODE (addr) == CONST)
11186 addr = XEXP (addr, 0);
11188 /* We must match stuff we generate before. Assume the only
11189 unspecs that can get here are ours. Not that we could do
11190 anything with them anyway.... */
11191 if (GET_CODE (addr) == UNSPEC
11192 || (GET_CODE (addr) == PLUS
11193 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11195 gcc_assert (GET_CODE (addr) == PLUS);
11197 if (GET_CODE (addr) == PLUS)
11199 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11201 /* Check first to see if this is a constant offset from a @GOTOFF
11202 symbol reference. */
11203 if (gotoff_operand (op0, Pmode)
11204 && CONST_INT_P (op1))
11208 if (reload_in_progress)
11209 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11210 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11212 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11213 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11214 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11218 emit_move_insn (reg, new_rtx);
11224 if (INTVAL (op1) < -16*1024*1024
11225 || INTVAL (op1) >= 16*1024*1024)
11227 if (!x86_64_immediate_operand (op1, Pmode))
11228 op1 = force_reg (Pmode, op1);
11229 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11235 base = legitimize_pic_address (XEXP (addr, 0), reg);
11236 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11237 base == reg ? NULL_RTX : reg);
11239 if (CONST_INT_P (new_rtx))
11240 new_rtx = plus_constant (base, INTVAL (new_rtx));
11243 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11245 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11246 new_rtx = XEXP (new_rtx, 1);
11248 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11256 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11259 get_thread_pointer (int to_reg)
11263 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11267 reg = gen_reg_rtx (Pmode);
11268 insn = gen_rtx_SET (VOIDmode, reg, tp);
11269 insn = emit_insn (insn);
11274 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11275 false if we expect this to be used for a memory address and true if
11276 we expect to load the address into a register. */
11279 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11281 rtx dest, base, off, pic, tp;
11286 case TLS_MODEL_GLOBAL_DYNAMIC:
11287 dest = gen_reg_rtx (Pmode);
11288 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11290 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11292 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11295 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11296 insns = get_insns ();
11299 RTL_CONST_CALL_P (insns) = 1;
11300 emit_libcall_block (insns, dest, rax, x);
11302 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11303 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11305 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11307 if (TARGET_GNU2_TLS)
11309 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11311 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11315 case TLS_MODEL_LOCAL_DYNAMIC:
11316 base = gen_reg_rtx (Pmode);
11317 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11319 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11321 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11324 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11325 insns = get_insns ();
11328 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11329 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11330 RTL_CONST_CALL_P (insns) = 1;
11331 emit_libcall_block (insns, base, rax, note);
11333 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11334 emit_insn (gen_tls_local_dynamic_base_64 (base));
11336 emit_insn (gen_tls_local_dynamic_base_32 (base));
11338 if (TARGET_GNU2_TLS)
11340 rtx x = ix86_tls_module_base ();
11342 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11343 gen_rtx_MINUS (Pmode, x, tp));
11346 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11347 off = gen_rtx_CONST (Pmode, off);
11349 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11351 if (TARGET_GNU2_TLS)
11353 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11355 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11360 case TLS_MODEL_INITIAL_EXEC:
11364 type = UNSPEC_GOTNTPOFF;
11368 if (reload_in_progress)
11369 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11370 pic = pic_offset_table_rtx;
11371 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11373 else if (!TARGET_ANY_GNU_TLS)
11375 pic = gen_reg_rtx (Pmode);
11376 emit_insn (gen_set_got (pic));
11377 type = UNSPEC_GOTTPOFF;
11382 type = UNSPEC_INDNTPOFF;
11385 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11386 off = gen_rtx_CONST (Pmode, off);
11388 off = gen_rtx_PLUS (Pmode, pic, off);
11389 off = gen_const_mem (Pmode, off);
11390 set_mem_alias_set (off, ix86_GOT_alias_set ());
11392 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11394 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11395 off = force_reg (Pmode, off);
11396 return gen_rtx_PLUS (Pmode, base, off);
11400 base = get_thread_pointer (true);
11401 dest = gen_reg_rtx (Pmode);
11402 emit_insn (gen_subsi3 (dest, base, off));
11406 case TLS_MODEL_LOCAL_EXEC:
11407 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11408 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11409 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11410 off = gen_rtx_CONST (Pmode, off);
11412 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11414 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11415 return gen_rtx_PLUS (Pmode, base, off);
11419 base = get_thread_pointer (true);
11420 dest = gen_reg_rtx (Pmode);
11421 emit_insn (gen_subsi3 (dest, base, off));
11426 gcc_unreachable ();
11432 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11435 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11436 htab_t dllimport_map;
11439 get_dllimport_decl (tree decl)
11441 struct tree_map *h, in;
11444 const char *prefix;
11445 size_t namelen, prefixlen;
11450 if (!dllimport_map)
11451 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11453 in.hash = htab_hash_pointer (decl);
11454 in.base.from = decl;
11455 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11456 h = (struct tree_map *) *loc;
11460 *loc = h = ggc_alloc_tree_map ();
11462 h->base.from = decl;
11463 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11464 VAR_DECL, NULL, ptr_type_node);
11465 DECL_ARTIFICIAL (to) = 1;
11466 DECL_IGNORED_P (to) = 1;
11467 DECL_EXTERNAL (to) = 1;
11468 TREE_READONLY (to) = 1;
11470 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11471 name = targetm.strip_name_encoding (name);
11472 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11473 ? "*__imp_" : "*__imp__";
11474 namelen = strlen (name);
11475 prefixlen = strlen (prefix);
11476 imp_name = (char *) alloca (namelen + prefixlen + 1);
11477 memcpy (imp_name, prefix, prefixlen);
11478 memcpy (imp_name + prefixlen, name, namelen + 1);
11480 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11481 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11482 SET_SYMBOL_REF_DECL (rtl, to);
11483 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11485 rtl = gen_const_mem (Pmode, rtl);
11486 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11488 SET_DECL_RTL (to, rtl);
11489 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11494 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11495 true if we require the result be a register. */
11498 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11503 gcc_assert (SYMBOL_REF_DECL (symbol));
11504 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11506 x = DECL_RTL (imp_decl);
11508 x = force_reg (Pmode, x);
11512 /* Try machine-dependent ways of modifying an illegitimate address
11513 to be legitimate. If we find one, return the new, valid address.
11514 This macro is used in only one place: `memory_address' in explow.c.
11516 OLDX is the address as it was before break_out_memory_refs was called.
11517 In some cases it is useful to look at this to decide what needs to be done.
11519 It is always safe for this macro to do nothing. It exists to recognize
11520 opportunities to optimize the output.
11522 For the 80386, we handle X+REG by loading X into a register R and
11523 using R+REG. R will go in a general reg and indexing will be used.
11524 However, if REG is a broken-out memory address or multiplication,
11525 nothing needs to be done because REG can certainly go in a general reg.
11527 When -fpic is used, special handling is needed for symbolic references.
11528 See comments by legitimize_pic_address in i386.c for details. */
11531 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11532 enum machine_mode mode)
11537 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11539 return legitimize_tls_address (x, (enum tls_model) log, false);
11540 if (GET_CODE (x) == CONST
11541 && GET_CODE (XEXP (x, 0)) == PLUS
11542 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11543 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11545 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11546 (enum tls_model) log, false);
11547 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11550 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11552 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11553 return legitimize_dllimport_symbol (x, true);
11554 if (GET_CODE (x) == CONST
11555 && GET_CODE (XEXP (x, 0)) == PLUS
11556 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11557 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11559 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11560 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11564 if (flag_pic && SYMBOLIC_CONST (x))
11565 return legitimize_pic_address (x, 0);
11567 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11568 if (GET_CODE (x) == ASHIFT
11569 && CONST_INT_P (XEXP (x, 1))
11570 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11573 log = INTVAL (XEXP (x, 1));
11574 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11575 GEN_INT (1 << log));
11578 if (GET_CODE (x) == PLUS)
11580 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11582 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11583 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11584 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11587 log = INTVAL (XEXP (XEXP (x, 0), 1));
11588 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11589 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11590 GEN_INT (1 << log));
11593 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11594 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11595 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11598 log = INTVAL (XEXP (XEXP (x, 1), 1));
11599 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11600 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11601 GEN_INT (1 << log));
11604 /* Put multiply first if it isn't already. */
11605 if (GET_CODE (XEXP (x, 1)) == MULT)
11607 rtx tmp = XEXP (x, 0);
11608 XEXP (x, 0) = XEXP (x, 1);
11613 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11614 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11615 created by virtual register instantiation, register elimination, and
11616 similar optimizations. */
11617 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11620 x = gen_rtx_PLUS (Pmode,
11621 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11622 XEXP (XEXP (x, 1), 0)),
11623 XEXP (XEXP (x, 1), 1));
11627 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11628 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11629 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11630 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11631 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11632 && CONSTANT_P (XEXP (x, 1)))
11635 rtx other = NULL_RTX;
11637 if (CONST_INT_P (XEXP (x, 1)))
11639 constant = XEXP (x, 1);
11640 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11642 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11644 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11645 other = XEXP (x, 1);
11653 x = gen_rtx_PLUS (Pmode,
11654 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11655 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11656 plus_constant (other, INTVAL (constant)));
11660 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11663 if (GET_CODE (XEXP (x, 0)) == MULT)
11666 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
11669 if (GET_CODE (XEXP (x, 1)) == MULT)
11672 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
11676 && REG_P (XEXP (x, 1))
11677 && REG_P (XEXP (x, 0)))
11680 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11683 x = legitimize_pic_address (x, 0);
11686 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11689 if (REG_P (XEXP (x, 0)))
11691 rtx temp = gen_reg_rtx (Pmode);
11692 rtx val = force_operand (XEXP (x, 1), temp);
11694 emit_move_insn (temp, val);
11696 XEXP (x, 1) = temp;
11700 else if (REG_P (XEXP (x, 1)))
11702 rtx temp = gen_reg_rtx (Pmode);
11703 rtx val = force_operand (XEXP (x, 0), temp);
11705 emit_move_insn (temp, val);
11707 XEXP (x, 0) = temp;
11715 /* Print an integer constant expression in assembler syntax. Addition
11716 and subtraction are the only arithmetic that may appear in these
11717 expressions. FILE is the stdio stream to write to, X is the rtx, and
11718 CODE is the operand print code from the output string. */
11721 output_pic_addr_const (FILE *file, rtx x, int code)
11725 switch (GET_CODE (x))
11728 gcc_assert (flag_pic);
11733 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
11734 output_addr_const (file, x);
11737 const char *name = XSTR (x, 0);
11739 /* Mark the decl as referenced so that cgraph will
11740 output the function. */
11741 if (SYMBOL_REF_DECL (x))
11742 mark_decl_referenced (SYMBOL_REF_DECL (x));
11745 if (MACHOPIC_INDIRECT
11746 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11747 name = machopic_indirection_name (x, /*stub_p=*/true);
11749 assemble_name (file, name);
11751 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11752 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11753 fputs ("@PLT", file);
11760 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11761 assemble_name (asm_out_file, buf);
11765 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11769 /* This used to output parentheses around the expression,
11770 but that does not work on the 386 (either ATT or BSD assembler). */
11771 output_pic_addr_const (file, XEXP (x, 0), code);
11775 if (GET_MODE (x) == VOIDmode)
11777 /* We can use %d if the number is <32 bits and positive. */
11778 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
11779 fprintf (file, "0x%lx%08lx",
11780 (unsigned long) CONST_DOUBLE_HIGH (x),
11781 (unsigned long) CONST_DOUBLE_LOW (x));
11783 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
11786 /* We can't handle floating point constants;
11787 TARGET_PRINT_OPERAND must handle them. */
11788 output_operand_lossage ("floating constant misused");
11792 /* Some assemblers need integer constants to appear first. */
11793 if (CONST_INT_P (XEXP (x, 0)))
11795 output_pic_addr_const (file, XEXP (x, 0), code);
11797 output_pic_addr_const (file, XEXP (x, 1), code);
11801 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11802 output_pic_addr_const (file, XEXP (x, 1), code);
11804 output_pic_addr_const (file, XEXP (x, 0), code);
11810 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11811 output_pic_addr_const (file, XEXP (x, 0), code);
11813 output_pic_addr_const (file, XEXP (x, 1), code);
11815 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11819 gcc_assert (XVECLEN (x, 0) == 1);
11820 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11821 switch (XINT (x, 1))
11824 fputs ("@GOT", file);
11826 case UNSPEC_GOTOFF:
11827 fputs ("@GOTOFF", file);
11829 case UNSPEC_PLTOFF:
11830 fputs ("@PLTOFF", file);
11832 case UNSPEC_GOTPCREL:
11833 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11834 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11836 case UNSPEC_GOTTPOFF:
11837 /* FIXME: This might be @TPOFF in Sun ld too. */
11838 fputs ("@gottpoff", file);
11841 fputs ("@tpoff", file);
11843 case UNSPEC_NTPOFF:
11845 fputs ("@tpoff", file);
11847 fputs ("@ntpoff", file);
11849 case UNSPEC_DTPOFF:
11850 fputs ("@dtpoff", file);
11852 case UNSPEC_GOTNTPOFF:
11854 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11855 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11857 fputs ("@gotntpoff", file);
11859 case UNSPEC_INDNTPOFF:
11860 fputs ("@indntpoff", file);
11863 case UNSPEC_MACHOPIC_OFFSET:
11865 machopic_output_function_base_name (file);
11869 output_operand_lossage ("invalid UNSPEC as operand");
11875 output_operand_lossage ("invalid expression as operand");
11879 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11880 We need to emit DTP-relative relocations. */
11882 static void ATTRIBUTE_UNUSED
11883 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11885 fputs (ASM_LONG, file);
11886 output_addr_const (file, x);
11887 fputs ("@dtpoff", file);
11893 fputs (", 0", file);
11896 gcc_unreachable ();
11900 /* Return true if X is a representation of the PIC register. This copes
11901 with calls from ix86_find_base_term, where the register might have
11902 been replaced by a cselib value. */
11905 ix86_pic_register_p (rtx x)
11907 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11908 return (pic_offset_table_rtx
11909 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11911 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11914 /* In the name of slightly smaller debug output, and to cater to
11915 general assembler lossage, recognize PIC+GOTOFF and turn it back
11916 into a direct symbol reference.
11918 On Darwin, this is necessary to avoid a crash, because Darwin
11919 has a different PIC label for each routine but the DWARF debugging
11920 information is not associated with any particular routine, so it's
11921 necessary to remove references to the PIC label from RTL stored by
11922 the DWARF output code. */
11925 ix86_delegitimize_address (rtx x)
11927 rtx orig_x = delegitimize_mem_from_attrs (x);
11928 /* addend is NULL or some rtx if x is something+GOTOFF where
11929 something doesn't include the PIC register. */
11930 rtx addend = NULL_RTX;
11931 /* reg_addend is NULL or a multiple of some register. */
11932 rtx reg_addend = NULL_RTX;
11933 /* const_addend is NULL or a const_int. */
11934 rtx const_addend = NULL_RTX;
11935 /* This is the result, or NULL. */
11936 rtx result = NULL_RTX;
11945 if (GET_CODE (x) != CONST
11946 || GET_CODE (XEXP (x, 0)) != UNSPEC
11947 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
11948 || !MEM_P (orig_x))
11950 x = XVECEXP (XEXP (x, 0), 0, 0);
11951 if (GET_MODE (orig_x) != Pmode)
11952 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
11956 if (GET_CODE (x) != PLUS
11957 || GET_CODE (XEXP (x, 1)) != CONST)
11960 if (ix86_pic_register_p (XEXP (x, 0)))
11961 /* %ebx + GOT/GOTOFF */
11963 else if (GET_CODE (XEXP (x, 0)) == PLUS)
11965 /* %ebx + %reg * scale + GOT/GOTOFF */
11966 reg_addend = XEXP (x, 0);
11967 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11968 reg_addend = XEXP (reg_addend, 1);
11969 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11970 reg_addend = XEXP (reg_addend, 0);
11973 reg_addend = NULL_RTX;
11974 addend = XEXP (x, 0);
11978 addend = XEXP (x, 0);
11980 x = XEXP (XEXP (x, 1), 0);
11981 if (GET_CODE (x) == PLUS
11982 && CONST_INT_P (XEXP (x, 1)))
11984 const_addend = XEXP (x, 1);
11988 if (GET_CODE (x) == UNSPEC
11989 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
11990 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
11991 result = XVECEXP (x, 0, 0);
11993 if (TARGET_MACHO && darwin_local_data_pic (x)
11994 && !MEM_P (orig_x))
11995 result = XVECEXP (x, 0, 0);
12001 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12003 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12006 /* If the rest of original X doesn't involve the PIC register, add
12007 addend and subtract pic_offset_table_rtx. This can happen e.g.
12009 leal (%ebx, %ecx, 4), %ecx
12011 movl foo@GOTOFF(%ecx), %edx
12012 in which case we return (%ecx - %ebx) + foo. */
12013 if (pic_offset_table_rtx)
12014 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12015 pic_offset_table_rtx),
12020 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12021 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12025 /* If X is a machine specific address (i.e. a symbol or label being
12026 referenced as a displacement from the GOT implemented using an
12027 UNSPEC), then return the base term. Otherwise return X. */
12030 ix86_find_base_term (rtx x)
12036 if (GET_CODE (x) != CONST)
12038 term = XEXP (x, 0);
12039 if (GET_CODE (term) == PLUS
12040 && (CONST_INT_P (XEXP (term, 1))
12041 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12042 term = XEXP (term, 0);
12043 if (GET_CODE (term) != UNSPEC
12044 || XINT (term, 1) != UNSPEC_GOTPCREL)
12047 return XVECEXP (term, 0, 0);
12050 return ix86_delegitimize_address (x);
12054 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12055 int fp, FILE *file)
12057 const char *suffix;
12059 if (mode == CCFPmode || mode == CCFPUmode)
12061 code = ix86_fp_compare_code_to_integer (code);
12065 code = reverse_condition (code);
12116 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12120 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12121 Those same assemblers have the same but opposite lossage on cmov. */
12122 if (mode == CCmode)
12123 suffix = fp ? "nbe" : "a";
12124 else if (mode == CCCmode)
12127 gcc_unreachable ();
12143 gcc_unreachable ();
12147 gcc_assert (mode == CCmode || mode == CCCmode);
12164 gcc_unreachable ();
12168 /* ??? As above. */
12169 gcc_assert (mode == CCmode || mode == CCCmode);
12170 suffix = fp ? "nb" : "ae";
12173 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12177 /* ??? As above. */
12178 if (mode == CCmode)
12180 else if (mode == CCCmode)
12181 suffix = fp ? "nb" : "ae";
12183 gcc_unreachable ();
12186 suffix = fp ? "u" : "p";
12189 suffix = fp ? "nu" : "np";
12192 gcc_unreachable ();
12194 fputs (suffix, file);
12197 /* Print the name of register X to FILE based on its machine mode and number.
12198 If CODE is 'w', pretend the mode is HImode.
12199 If CODE is 'b', pretend the mode is QImode.
12200 If CODE is 'k', pretend the mode is SImode.
12201 If CODE is 'q', pretend the mode is DImode.
12202 If CODE is 'x', pretend the mode is V4SFmode.
12203 If CODE is 't', pretend the mode is V8SFmode.
12204 If CODE is 'h', pretend the reg is the 'high' byte register.
12205 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12206 If CODE is 'd', duplicate the operand for AVX instruction.
12210 print_reg (rtx x, int code, FILE *file)
12213 bool duplicated = code == 'd' && TARGET_AVX;
12215 gcc_assert (x == pc_rtx
12216 || (REGNO (x) != ARG_POINTER_REGNUM
12217 && REGNO (x) != FRAME_POINTER_REGNUM
12218 && REGNO (x) != FLAGS_REG
12219 && REGNO (x) != FPSR_REG
12220 && REGNO (x) != FPCR_REG));
12222 if (ASSEMBLER_DIALECT == ASM_ATT)
12227 gcc_assert (TARGET_64BIT);
12228 fputs ("rip", file);
12232 if (code == 'w' || MMX_REG_P (x))
12234 else if (code == 'b')
12236 else if (code == 'k')
12238 else if (code == 'q')
12240 else if (code == 'y')
12242 else if (code == 'h')
12244 else if (code == 'x')
12246 else if (code == 't')
12249 code = GET_MODE_SIZE (GET_MODE (x));
12251 /* Irritatingly, AMD extended registers use different naming convention
12252 from the normal registers. */
12253 if (REX_INT_REG_P (x))
12255 gcc_assert (TARGET_64BIT);
12259 error ("extended registers have no high halves");
12262 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12265 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12268 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12271 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12274 error ("unsupported operand size for extended register");
12284 if (STACK_TOP_P (x))
12293 if (! ANY_FP_REG_P (x))
12294 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12299 reg = hi_reg_name[REGNO (x)];
12302 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12304 reg = qi_reg_name[REGNO (x)];
12307 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12309 reg = qi_high_reg_name[REGNO (x)];
12314 gcc_assert (!duplicated);
12316 fputs (hi_reg_name[REGNO (x)] + 1, file);
12321 gcc_unreachable ();
12327 if (ASSEMBLER_DIALECT == ASM_ATT)
12328 fprintf (file, ", %%%s", reg);
12330 fprintf (file, ", %s", reg);
12334 /* Locate some local-dynamic symbol still in use by this function
12335 so that we can print its name in some tls_local_dynamic_base
12339 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12343 if (GET_CODE (x) == SYMBOL_REF
12344 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12346 cfun->machine->some_ld_name = XSTR (x, 0);
12353 static const char *
12354 get_some_local_dynamic_name (void)
12358 if (cfun->machine->some_ld_name)
12359 return cfun->machine->some_ld_name;
12361 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12362 if (NONDEBUG_INSN_P (insn)
12363 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12364 return cfun->machine->some_ld_name;
12369 /* Meaning of CODE:
12370 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12371 C -- print opcode suffix for set/cmov insn.
12372 c -- like C, but print reversed condition
12373 F,f -- likewise, but for floating-point.
12374 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12376 R -- print the prefix for register names.
12377 z -- print the opcode suffix for the size of the current operand.
12378 Z -- likewise, with special suffixes for x87 instructions.
12379 * -- print a star (in certain assembler syntax)
12380 A -- print an absolute memory reference.
12381 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12382 s -- print a shift double count, followed by the assemblers argument
12384 b -- print the QImode name of the register for the indicated operand.
12385 %b0 would print %al if operands[0] is reg 0.
12386 w -- likewise, print the HImode name of the register.
12387 k -- likewise, print the SImode name of the register.
12388 q -- likewise, print the DImode name of the register.
12389 x -- likewise, print the V4SFmode name of the register.
12390 t -- likewise, print the V8SFmode name of the register.
12391 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12392 y -- print "st(0)" instead of "st" as a register.
12393 d -- print duplicated register operand for AVX instruction.
12394 D -- print condition for SSE cmp instruction.
12395 P -- if PIC, print an @PLT suffix.
12396 X -- don't print any sort of PIC '@' suffix for a symbol.
12397 & -- print some in-use local-dynamic symbol name.
12398 H -- print a memory address offset by 8; used for sse high-parts
12399 Y -- print condition for XOP pcom* instruction.
12400 + -- print a branch hint as 'cs' or 'ds' prefix
12401 ; -- print a semicolon (after prefixes due to bug in older gas).
12405 ix86_print_operand (FILE *file, rtx x, int code)
12412 if (ASSEMBLER_DIALECT == ASM_ATT)
12418 const char *name = get_some_local_dynamic_name ();
12420 output_operand_lossage ("'%%&' used without any "
12421 "local dynamic TLS references");
12423 assemble_name (file, name);
12428 switch (ASSEMBLER_DIALECT)
12435 /* Intel syntax. For absolute addresses, registers should not
12436 be surrounded by braces. */
12440 ix86_print_operand (file, x, 0);
12447 gcc_unreachable ();
12450 ix86_print_operand (file, x, 0);
12455 if (ASSEMBLER_DIALECT == ASM_ATT)
12460 if (ASSEMBLER_DIALECT == ASM_ATT)
12465 if (ASSEMBLER_DIALECT == ASM_ATT)
12470 if (ASSEMBLER_DIALECT == ASM_ATT)
12475 if (ASSEMBLER_DIALECT == ASM_ATT)
12480 if (ASSEMBLER_DIALECT == ASM_ATT)
12485 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12487 /* Opcodes don't get size suffixes if using Intel opcodes. */
12488 if (ASSEMBLER_DIALECT == ASM_INTEL)
12491 switch (GET_MODE_SIZE (GET_MODE (x)))
12510 output_operand_lossage
12511 ("invalid operand size for operand code '%c'", code);
12516 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12518 (0, "non-integer operand used with operand code '%c'", code);
12522 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12523 if (ASSEMBLER_DIALECT == ASM_INTEL)
12526 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12528 switch (GET_MODE_SIZE (GET_MODE (x)))
12531 #ifdef HAVE_AS_IX86_FILDS
12541 #ifdef HAVE_AS_IX86_FILDQ
12544 fputs ("ll", file);
12552 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12554 /* 387 opcodes don't get size suffixes
12555 if the operands are registers. */
12556 if (STACK_REG_P (x))
12559 switch (GET_MODE_SIZE (GET_MODE (x)))
12580 output_operand_lossage
12581 ("invalid operand type used with operand code '%c'", code);
12585 output_operand_lossage
12586 ("invalid operand size for operand code '%c'", code);
12603 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12605 ix86_print_operand (file, x, 0);
12606 fputs (", ", file);
12611 /* Little bit of braindamage here. The SSE compare instructions
12612 does use completely different names for the comparisons that the
12613 fp conditional moves. */
12616 switch (GET_CODE (x))
12619 fputs ("eq", file);
12622 fputs ("eq_us", file);
12625 fputs ("lt", file);
12628 fputs ("nge", file);
12631 fputs ("le", file);
12634 fputs ("ngt", file);
12637 fputs ("unord", file);
12640 fputs ("neq", file);
12643 fputs ("neq_oq", file);
12646 fputs ("ge", file);
12649 fputs ("nlt", file);
12652 fputs ("gt", file);
12655 fputs ("nle", file);
12658 fputs ("ord", file);
12661 output_operand_lossage ("operand is not a condition code, "
12662 "invalid operand code 'D'");
12668 switch (GET_CODE (x))
12672 fputs ("eq", file);
12676 fputs ("lt", file);
12680 fputs ("le", file);
12683 fputs ("unord", file);
12687 fputs ("neq", file);
12691 fputs ("nlt", file);
12695 fputs ("nle", file);
12698 fputs ("ord", file);
12701 output_operand_lossage ("operand is not a condition code, "
12702 "invalid operand code 'D'");
12708 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12709 if (ASSEMBLER_DIALECT == ASM_ATT)
12711 switch (GET_MODE (x))
12713 case HImode: putc ('w', file); break;
12715 case SFmode: putc ('l', file); break;
12717 case DFmode: putc ('q', file); break;
12718 default: gcc_unreachable ();
12725 if (!COMPARISON_P (x))
12727 output_operand_lossage ("operand is neither a constant nor a "
12728 "condition code, invalid operand code "
12732 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
12735 if (!COMPARISON_P (x))
12737 output_operand_lossage ("operand is neither a constant nor a "
12738 "condition code, invalid operand code "
12742 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12743 if (ASSEMBLER_DIALECT == ASM_ATT)
12746 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
12749 /* Like above, but reverse condition */
12751 /* Check to see if argument to %c is really a constant
12752 and not a condition code which needs to be reversed. */
12753 if (!COMPARISON_P (x))
12755 output_operand_lossage ("operand is neither a constant nor a "
12756 "condition code, invalid operand "
12760 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
12763 if (!COMPARISON_P (x))
12765 output_operand_lossage ("operand is neither a constant nor a "
12766 "condition code, invalid operand "
12770 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12771 if (ASSEMBLER_DIALECT == ASM_ATT)
12774 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
12778 /* It doesn't actually matter what mode we use here, as we're
12779 only going to use this for printing. */
12780 x = adjust_address_nv (x, DImode, 8);
12788 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
12791 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12794 int pred_val = INTVAL (XEXP (x, 0));
12796 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12797 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12799 int taken = pred_val > REG_BR_PROB_BASE / 2;
12800 int cputaken = final_forward_branch_p (current_output_insn) == 0;
12802 /* Emit hints only in the case default branch prediction
12803 heuristics would fail. */
12804 if (taken != cputaken)
12806 /* We use 3e (DS) prefix for taken branches and
12807 2e (CS) prefix for not taken branches. */
12809 fputs ("ds ; ", file);
12811 fputs ("cs ; ", file);
12819 switch (GET_CODE (x))
12822 fputs ("neq", file);
12825 fputs ("eq", file);
12829 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12833 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12837 fputs ("le", file);
12841 fputs ("lt", file);
12844 fputs ("unord", file);
12847 fputs ("ord", file);
12850 fputs ("ueq", file);
12853 fputs ("nlt", file);
12856 fputs ("nle", file);
12859 fputs ("ule", file);
12862 fputs ("ult", file);
12865 fputs ("une", file);
12868 output_operand_lossage ("operand is not a condition code, "
12869 "invalid operand code 'Y'");
12875 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12881 output_operand_lossage ("invalid operand code '%c'", code);
12886 print_reg (x, code, file);
12888 else if (MEM_P (x))
12890 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
12891 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
12892 && GET_MODE (x) != BLKmode)
12895 switch (GET_MODE_SIZE (GET_MODE (x)))
12897 case 1: size = "BYTE"; break;
12898 case 2: size = "WORD"; break;
12899 case 4: size = "DWORD"; break;
12900 case 8: size = "QWORD"; break;
12901 case 12: size = "TBYTE"; break;
12903 if (GET_MODE (x) == XFmode)
12908 case 32: size = "YMMWORD"; break;
12910 gcc_unreachable ();
12913 /* Check for explicit size override (codes 'b', 'w' and 'k') */
12916 else if (code == 'w')
12918 else if (code == 'k')
12921 fputs (size, file);
12922 fputs (" PTR ", file);
12926 /* Avoid (%rip) for call operands. */
12927 if (CONSTANT_ADDRESS_P (x) && code == 'P'
12928 && !CONST_INT_P (x))
12929 output_addr_const (file, x);
12930 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
12931 output_operand_lossage ("invalid constraints for operand");
12933 output_address (x);
12936 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
12941 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12942 REAL_VALUE_TO_TARGET_SINGLE (r, l);
12944 if (ASSEMBLER_DIALECT == ASM_ATT)
12946 /* Sign extend 32bit SFmode immediate to 8 bytes. */
12948 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
12950 fprintf (file, "0x%08x", (unsigned int) l);
12953 /* These float cases don't actually occur as immediate operands. */
12954 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
12958 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12959 fputs (dstr, file);
12962 else if (GET_CODE (x) == CONST_DOUBLE
12963 && GET_MODE (x) == XFmode)
12967 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12968 fputs (dstr, file);
12973 /* We have patterns that allow zero sets of memory, for instance.
12974 In 64-bit mode, we should probably support all 8-byte vectors,
12975 since we can in fact encode that into an immediate. */
12976 if (GET_CODE (x) == CONST_VECTOR)
12978 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
12984 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
12986 if (ASSEMBLER_DIALECT == ASM_ATT)
12989 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
12990 || GET_CODE (x) == LABEL_REF)
12992 if (ASSEMBLER_DIALECT == ASM_ATT)
12995 fputs ("OFFSET FLAT:", file);
12998 if (CONST_INT_P (x))
12999 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13001 output_pic_addr_const (file, x, code);
13003 output_addr_const (file, x);
13008 ix86_print_operand_punct_valid_p (unsigned char code)
13010 return (code == '*' || code == '+' || code == '&' || code == ';');
13013 /* Print a memory operand whose address is ADDR. */
13016 ix86_print_operand_address (FILE *file, rtx addr)
13018 struct ix86_address parts;
13019 rtx base, index, disp;
13021 int ok = ix86_decompose_address (addr, &parts);
13026 index = parts.index;
13028 scale = parts.scale;
13036 if (ASSEMBLER_DIALECT == ASM_ATT)
13038 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13041 gcc_unreachable ();
13044 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13045 if (TARGET_64BIT && !base && !index)
13049 if (GET_CODE (disp) == CONST
13050 && GET_CODE (XEXP (disp, 0)) == PLUS
13051 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13052 symbol = XEXP (XEXP (disp, 0), 0);
13054 if (GET_CODE (symbol) == LABEL_REF
13055 || (GET_CODE (symbol) == SYMBOL_REF
13056 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13059 if (!base && !index)
13061 /* Displacement only requires special attention. */
13063 if (CONST_INT_P (disp))
13065 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13066 fputs ("ds:", file);
13067 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13070 output_pic_addr_const (file, disp, 0);
13072 output_addr_const (file, disp);
13076 if (ASSEMBLER_DIALECT == ASM_ATT)
13081 output_pic_addr_const (file, disp, 0);
13082 else if (GET_CODE (disp) == LABEL_REF)
13083 output_asm_label (disp);
13085 output_addr_const (file, disp);
13090 print_reg (base, 0, file);
13094 print_reg (index, 0, file);
13096 fprintf (file, ",%d", scale);
13102 rtx offset = NULL_RTX;
13106 /* Pull out the offset of a symbol; print any symbol itself. */
13107 if (GET_CODE (disp) == CONST
13108 && GET_CODE (XEXP (disp, 0)) == PLUS
13109 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13111 offset = XEXP (XEXP (disp, 0), 1);
13112 disp = gen_rtx_CONST (VOIDmode,
13113 XEXP (XEXP (disp, 0), 0));
13117 output_pic_addr_const (file, disp, 0);
13118 else if (GET_CODE (disp) == LABEL_REF)
13119 output_asm_label (disp);
13120 else if (CONST_INT_P (disp))
13123 output_addr_const (file, disp);
13129 print_reg (base, 0, file);
13132 if (INTVAL (offset) >= 0)
13134 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13138 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13145 print_reg (index, 0, file);
13147 fprintf (file, "*%d", scale);
13154 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13157 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13161 if (GET_CODE (x) != UNSPEC)
13164 op = XVECEXP (x, 0, 0);
13165 switch (XINT (x, 1))
13167 case UNSPEC_GOTTPOFF:
13168 output_addr_const (file, op);
13169 /* FIXME: This might be @TPOFF in Sun ld. */
13170 fputs ("@gottpoff", file);
13173 output_addr_const (file, op);
13174 fputs ("@tpoff", file);
13176 case UNSPEC_NTPOFF:
13177 output_addr_const (file, op);
13179 fputs ("@tpoff", file);
13181 fputs ("@ntpoff", file);
13183 case UNSPEC_DTPOFF:
13184 output_addr_const (file, op);
13185 fputs ("@dtpoff", file);
13187 case UNSPEC_GOTNTPOFF:
13188 output_addr_const (file, op);
13190 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13191 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13193 fputs ("@gotntpoff", file);
13195 case UNSPEC_INDNTPOFF:
13196 output_addr_const (file, op);
13197 fputs ("@indntpoff", file);
13200 case UNSPEC_MACHOPIC_OFFSET:
13201 output_addr_const (file, op);
13203 machopic_output_function_base_name (file);
13214 /* Split one or more DImode RTL references into pairs of SImode
13215 references. The RTL can be REG, offsettable MEM, integer constant, or
13216 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13217 split and "num" is its length. lo_half and hi_half are output arrays
13218 that parallel "operands". */
13221 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13225 rtx op = operands[num];
13227 /* simplify_subreg refuse to split volatile memory addresses,
13228 but we still have to handle it. */
13231 lo_half[num] = adjust_address (op, SImode, 0);
13232 hi_half[num] = adjust_address (op, SImode, 4);
13236 lo_half[num] = simplify_gen_subreg (SImode, op,
13237 GET_MODE (op) == VOIDmode
13238 ? DImode : GET_MODE (op), 0);
13239 hi_half[num] = simplify_gen_subreg (SImode, op,
13240 GET_MODE (op) == VOIDmode
13241 ? DImode : GET_MODE (op), 4);
13245 /* Split one or more TImode RTL references into pairs of DImode
13246 references. The RTL can be REG, offsettable MEM, integer constant, or
13247 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13248 split and "num" is its length. lo_half and hi_half are output arrays
13249 that parallel "operands". */
13252 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13256 rtx op = operands[num];
13258 /* simplify_subreg refuse to split volatile memory addresses, but we
13259 still have to handle it. */
13262 lo_half[num] = adjust_address (op, DImode, 0);
13263 hi_half[num] = adjust_address (op, DImode, 8);
13267 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
13268 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
13273 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13274 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13275 is the expression of the binary operation. The output may either be
13276 emitted here, or returned to the caller, like all output_* functions.
13278 There is no guarantee that the operands are the same mode, as they
13279 might be within FLOAT or FLOAT_EXTEND expressions. */
13281 #ifndef SYSV386_COMPAT
13282 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13283 wants to fix the assemblers because that causes incompatibility
13284 with gcc. No-one wants to fix gcc because that causes
13285 incompatibility with assemblers... You can use the option of
13286 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13287 #define SYSV386_COMPAT 1
13291 output_387_binary_op (rtx insn, rtx *operands)
13293 static char buf[40];
13296 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13298 #ifdef ENABLE_CHECKING
13299 /* Even if we do not want to check the inputs, this documents input
13300 constraints. Which helps in understanding the following code. */
13301 if (STACK_REG_P (operands[0])
13302 && ((REG_P (operands[1])
13303 && REGNO (operands[0]) == REGNO (operands[1])
13304 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13305 || (REG_P (operands[2])
13306 && REGNO (operands[0]) == REGNO (operands[2])
13307 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13308 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13311 gcc_assert (is_sse);
13314 switch (GET_CODE (operands[3]))
13317 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13318 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13326 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13327 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13335 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13336 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13344 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13345 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13353 gcc_unreachable ();
13360 strcpy (buf, ssep);
13361 if (GET_MODE (operands[0]) == SFmode)
13362 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13364 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13368 strcpy (buf, ssep + 1);
13369 if (GET_MODE (operands[0]) == SFmode)
13370 strcat (buf, "ss\t{%2, %0|%0, %2}");
13372 strcat (buf, "sd\t{%2, %0|%0, %2}");
13378 switch (GET_CODE (operands[3]))
13382 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13384 rtx temp = operands[2];
13385 operands[2] = operands[1];
13386 operands[1] = temp;
13389 /* know operands[0] == operands[1]. */
13391 if (MEM_P (operands[2]))
13397 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13399 if (STACK_TOP_P (operands[0]))
13400 /* How is it that we are storing to a dead operand[2]?
13401 Well, presumably operands[1] is dead too. We can't
13402 store the result to st(0) as st(0) gets popped on this
13403 instruction. Instead store to operands[2] (which I
13404 think has to be st(1)). st(1) will be popped later.
13405 gcc <= 2.8.1 didn't have this check and generated
13406 assembly code that the Unixware assembler rejected. */
13407 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13409 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13413 if (STACK_TOP_P (operands[0]))
13414 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13416 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13421 if (MEM_P (operands[1]))
13427 if (MEM_P (operands[2]))
13433 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13436 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13437 derived assemblers, confusingly reverse the direction of
13438 the operation for fsub{r} and fdiv{r} when the
13439 destination register is not st(0). The Intel assembler
13440 doesn't have this brain damage. Read !SYSV386_COMPAT to
13441 figure out what the hardware really does. */
13442 if (STACK_TOP_P (operands[0]))
13443 p = "{p\t%0, %2|rp\t%2, %0}";
13445 p = "{rp\t%2, %0|p\t%0, %2}";
13447 if (STACK_TOP_P (operands[0]))
13448 /* As above for fmul/fadd, we can't store to st(0). */
13449 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13451 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13456 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13459 if (STACK_TOP_P (operands[0]))
13460 p = "{rp\t%0, %1|p\t%1, %0}";
13462 p = "{p\t%1, %0|rp\t%0, %1}";
13464 if (STACK_TOP_P (operands[0]))
13465 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13467 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13472 if (STACK_TOP_P (operands[0]))
13474 if (STACK_TOP_P (operands[1]))
13475 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13477 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13480 else if (STACK_TOP_P (operands[1]))
13483 p = "{\t%1, %0|r\t%0, %1}";
13485 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13491 p = "{r\t%2, %0|\t%0, %2}";
13493 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13499 gcc_unreachable ();
13506 /* Return needed mode for entity in optimize_mode_switching pass. */
13509 ix86_mode_needed (int entity, rtx insn)
13511 enum attr_i387_cw mode;
13513 /* The mode UNINITIALIZED is used to store control word after a
13514 function call or ASM pattern. The mode ANY specify that function
13515 has no requirements on the control word and make no changes in the
13516 bits we are interested in. */
13519 || (NONJUMP_INSN_P (insn)
13520 && (asm_noperands (PATTERN (insn)) >= 0
13521 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13522 return I387_CW_UNINITIALIZED;
13524 if (recog_memoized (insn) < 0)
13525 return I387_CW_ANY;
13527 mode = get_attr_i387_cw (insn);
13532 if (mode == I387_CW_TRUNC)
13537 if (mode == I387_CW_FLOOR)
13542 if (mode == I387_CW_CEIL)
13547 if (mode == I387_CW_MASK_PM)
13552 gcc_unreachable ();
13555 return I387_CW_ANY;
13558 /* Output code to initialize control word copies used by trunc?f?i and
13559 rounding patterns. CURRENT_MODE is set to current control word,
13560 while NEW_MODE is set to new control word. */
13563 emit_i387_cw_initialization (int mode)
13565 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13568 enum ix86_stack_slot slot;
13570 rtx reg = gen_reg_rtx (HImode);
13572 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13573 emit_move_insn (reg, copy_rtx (stored_mode));
13575 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13576 || optimize_function_for_size_p (cfun))
13580 case I387_CW_TRUNC:
13581 /* round toward zero (truncate) */
13582 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13583 slot = SLOT_CW_TRUNC;
13586 case I387_CW_FLOOR:
13587 /* round down toward -oo */
13588 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13589 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
13590 slot = SLOT_CW_FLOOR;
13594 /* round up toward +oo */
13595 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13596 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
13597 slot = SLOT_CW_CEIL;
13600 case I387_CW_MASK_PM:
13601 /* mask precision exception for nearbyint() */
13602 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13603 slot = SLOT_CW_MASK_PM;
13607 gcc_unreachable ();
13614 case I387_CW_TRUNC:
13615 /* round toward zero (truncate) */
13616 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
13617 slot = SLOT_CW_TRUNC;
13620 case I387_CW_FLOOR:
13621 /* round down toward -oo */
13622 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
13623 slot = SLOT_CW_FLOOR;
13627 /* round up toward +oo */
13628 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
13629 slot = SLOT_CW_CEIL;
13632 case I387_CW_MASK_PM:
13633 /* mask precision exception for nearbyint() */
13634 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13635 slot = SLOT_CW_MASK_PM;
13639 gcc_unreachable ();
13643 gcc_assert (slot < MAX_386_STACK_LOCALS);
13645 new_mode = assign_386_stack_local (HImode, slot);
13646 emit_move_insn (new_mode, reg);
13649 /* Output code for INSN to convert a float to a signed int. OPERANDS
13650 are the insn operands. The output may be [HSD]Imode and the input
13651 operand may be [SDX]Fmode. */
13654 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
13656 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13657 int dimode_p = GET_MODE (operands[0]) == DImode;
13658 int round_mode = get_attr_i387_cw (insn);
13660 /* Jump through a hoop or two for DImode, since the hardware has no
13661 non-popping instruction. We used to do this a different way, but
13662 that was somewhat fragile and broke with post-reload splitters. */
13663 if ((dimode_p || fisttp) && !stack_top_dies)
13664 output_asm_insn ("fld\t%y1", operands);
13666 gcc_assert (STACK_TOP_P (operands[1]));
13667 gcc_assert (MEM_P (operands[0]));
13668 gcc_assert (GET_MODE (operands[1]) != TFmode);
13671 output_asm_insn ("fisttp%Z0\t%0", operands);
13674 if (round_mode != I387_CW_ANY)
13675 output_asm_insn ("fldcw\t%3", operands);
13676 if (stack_top_dies || dimode_p)
13677 output_asm_insn ("fistp%Z0\t%0", operands);
13679 output_asm_insn ("fist%Z0\t%0", operands);
13680 if (round_mode != I387_CW_ANY)
13681 output_asm_insn ("fldcw\t%2", operands);
13687 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13688 have the values zero or one, indicates the ffreep insn's operand
13689 from the OPERANDS array. */
13691 static const char *
13692 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
13694 if (TARGET_USE_FFREEP)
13695 #ifdef HAVE_AS_IX86_FFREEP
13696 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
13699 static char retval[32];
13700 int regno = REGNO (operands[opno]);
13702 gcc_assert (FP_REGNO_P (regno));
13704 regno -= FIRST_STACK_REG;
13706 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
13711 return opno ? "fstp\t%y1" : "fstp\t%y0";
13715 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13716 should be used. UNORDERED_P is true when fucom should be used. */
13719 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
13721 int stack_top_dies;
13722 rtx cmp_op0, cmp_op1;
13723 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
13727 cmp_op0 = operands[0];
13728 cmp_op1 = operands[1];
13732 cmp_op0 = operands[1];
13733 cmp_op1 = operands[2];
13738 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
13739 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
13740 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
13741 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
13743 if (GET_MODE (operands[0]) == SFmode)
13745 return &ucomiss[TARGET_AVX ? 0 : 1];
13747 return &comiss[TARGET_AVX ? 0 : 1];
13750 return &ucomisd[TARGET_AVX ? 0 : 1];
13752 return &comisd[TARGET_AVX ? 0 : 1];
13755 gcc_assert (STACK_TOP_P (cmp_op0));
13757 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13759 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
13761 if (stack_top_dies)
13763 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
13764 return output_387_ffreep (operands, 1);
13767 return "ftst\n\tfnstsw\t%0";
13770 if (STACK_REG_P (cmp_op1)
13772 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
13773 && REGNO (cmp_op1) != FIRST_STACK_REG)
13775 /* If both the top of the 387 stack dies, and the other operand
13776 is also a stack register that dies, then this must be a
13777 `fcompp' float compare */
13781 /* There is no double popping fcomi variant. Fortunately,
13782 eflags is immune from the fstp's cc clobbering. */
13784 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
13786 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
13787 return output_387_ffreep (operands, 0);
13792 return "fucompp\n\tfnstsw\t%0";
13794 return "fcompp\n\tfnstsw\t%0";
13799 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
13801 static const char * const alt[16] =
13803 "fcom%Z2\t%y2\n\tfnstsw\t%0",
13804 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
13805 "fucom%Z2\t%y2\n\tfnstsw\t%0",
13806 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
13808 "ficom%Z2\t%y2\n\tfnstsw\t%0",
13809 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
13813 "fcomi\t{%y1, %0|%0, %y1}",
13814 "fcomip\t{%y1, %0|%0, %y1}",
13815 "fucomi\t{%y1, %0|%0, %y1}",
13816 "fucomip\t{%y1, %0|%0, %y1}",
13827 mask = eflags_p << 3;
13828 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
13829 mask |= unordered_p << 1;
13830 mask |= stack_top_dies;
13832 gcc_assert (mask < 16);
13841 ix86_output_addr_vec_elt (FILE *file, int value)
13843 const char *directive = ASM_LONG;
13847 directive = ASM_QUAD;
13849 gcc_assert (!TARGET_64BIT);
13852 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
13856 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
13858 const char *directive = ASM_LONG;
13861 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
13862 directive = ASM_QUAD;
13864 gcc_assert (!TARGET_64BIT);
13866 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
13867 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
13868 fprintf (file, "%s%s%d-%s%d\n",
13869 directive, LPREFIX, value, LPREFIX, rel);
13870 else if (HAVE_AS_GOTOFF_IN_DATA)
13871 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
13873 else if (TARGET_MACHO)
13875 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
13876 machopic_output_function_base_name (file);
13881 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
13882 GOT_SYMBOL_NAME, LPREFIX, value);
13885 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
13889 ix86_expand_clear (rtx dest)
13893 /* We play register width games, which are only valid after reload. */
13894 gcc_assert (reload_completed);
13896 /* Avoid HImode and its attendant prefix byte. */
13897 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
13898 dest = gen_rtx_REG (SImode, REGNO (dest));
13899 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
13901 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
13902 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
13904 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13905 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
13911 /* X is an unchanging MEM. If it is a constant pool reference, return
13912 the constant pool rtx, else NULL. */
13915 maybe_get_pool_constant (rtx x)
13917 x = ix86_delegitimize_address (XEXP (x, 0));
13919 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
13920 return get_pool_constant (x);
13926 ix86_expand_move (enum machine_mode mode, rtx operands[])
13929 enum tls_model model;
13934 if (GET_CODE (op1) == SYMBOL_REF)
13936 model = SYMBOL_REF_TLS_MODEL (op1);
13939 op1 = legitimize_tls_address (op1, model, true);
13940 op1 = force_operand (op1, op0);
13944 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13945 && SYMBOL_REF_DLLIMPORT_P (op1))
13946 op1 = legitimize_dllimport_symbol (op1, false);
13948 else if (GET_CODE (op1) == CONST
13949 && GET_CODE (XEXP (op1, 0)) == PLUS
13950 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
13952 rtx addend = XEXP (XEXP (op1, 0), 1);
13953 rtx symbol = XEXP (XEXP (op1, 0), 0);
13956 model = SYMBOL_REF_TLS_MODEL (symbol);
13958 tmp = legitimize_tls_address (symbol, model, true);
13959 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13960 && SYMBOL_REF_DLLIMPORT_P (symbol))
13961 tmp = legitimize_dllimport_symbol (symbol, true);
13965 tmp = force_operand (tmp, NULL);
13966 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
13967 op0, 1, OPTAB_DIRECT);
13973 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
13975 if (TARGET_MACHO && !TARGET_64BIT)
13980 rtx temp = ((reload_in_progress
13981 || ((op0 && REG_P (op0))
13983 ? op0 : gen_reg_rtx (Pmode));
13984 op1 = machopic_indirect_data_reference (op1, temp);
13985 op1 = machopic_legitimize_pic_address (op1, mode,
13986 temp == op1 ? 0 : temp);
13988 else if (MACHOPIC_INDIRECT)
13989 op1 = machopic_indirect_data_reference (op1, 0);
13997 op1 = force_reg (Pmode, op1);
13998 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14000 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14001 op1 = legitimize_pic_address (op1, reg);
14010 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14011 || !push_operand (op0, mode))
14013 op1 = force_reg (mode, op1);
14015 if (push_operand (op0, mode)
14016 && ! general_no_elim_operand (op1, mode))
14017 op1 = copy_to_mode_reg (mode, op1);
14019 /* Force large constants in 64bit compilation into register
14020 to get them CSEed. */
14021 if (can_create_pseudo_p ()
14022 && (mode == DImode) && TARGET_64BIT
14023 && immediate_operand (op1, mode)
14024 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14025 && !register_operand (op0, mode)
14027 op1 = copy_to_mode_reg (mode, op1);
14029 if (can_create_pseudo_p ()
14030 && FLOAT_MODE_P (mode)
14031 && GET_CODE (op1) == CONST_DOUBLE)
14033 /* If we are loading a floating point constant to a register,
14034 force the value to memory now, since we'll get better code
14035 out the back end. */
14037 op1 = validize_mem (force_const_mem (mode, op1));
14038 if (!register_operand (op0, mode))
14040 rtx temp = gen_reg_rtx (mode);
14041 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14042 emit_move_insn (op0, temp);
14048 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14052 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14054 rtx op0 = operands[0], op1 = operands[1];
14055 unsigned int align = GET_MODE_ALIGNMENT (mode);
14057 /* Force constants other than zero into memory. We do not know how
14058 the instructions used to build constants modify the upper 64 bits
14059 of the register, once we have that information we may be able
14060 to handle some of them more efficiently. */
14061 if (can_create_pseudo_p ()
14062 && register_operand (op0, mode)
14063 && (CONSTANT_P (op1)
14064 || (GET_CODE (op1) == SUBREG
14065 && CONSTANT_P (SUBREG_REG (op1))))
14066 && !standard_sse_constant_p (op1))
14067 op1 = validize_mem (force_const_mem (mode, op1));
14069 /* We need to check memory alignment for SSE mode since attribute
14070 can make operands unaligned. */
14071 if (can_create_pseudo_p ()
14072 && SSE_REG_MODE_P (mode)
14073 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14074 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14078 /* ix86_expand_vector_move_misalign() does not like constants ... */
14079 if (CONSTANT_P (op1)
14080 || (GET_CODE (op1) == SUBREG
14081 && CONSTANT_P (SUBREG_REG (op1))))
14082 op1 = validize_mem (force_const_mem (mode, op1));
14084 /* ... nor both arguments in memory. */
14085 if (!register_operand (op0, mode)
14086 && !register_operand (op1, mode))
14087 op1 = force_reg (mode, op1);
14089 tmp[0] = op0; tmp[1] = op1;
14090 ix86_expand_vector_move_misalign (mode, tmp);
14094 /* Make operand1 a register if it isn't already. */
14095 if (can_create_pseudo_p ()
14096 && !register_operand (op0, mode)
14097 && !register_operand (op1, mode))
14099 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14103 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14106 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14107 straight to ix86_expand_vector_move. */
14108 /* Code generation for scalar reg-reg moves of single and double precision data:
14109 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14113 if (x86_sse_partial_reg_dependency == true)
14118 Code generation for scalar loads of double precision data:
14119 if (x86_sse_split_regs == true)
14120 movlpd mem, reg (gas syntax)
14124 Code generation for unaligned packed loads of single precision data
14125 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14126 if (x86_sse_unaligned_move_optimal)
14129 if (x86_sse_partial_reg_dependency == true)
14141 Code generation for unaligned packed loads of double precision data
14142 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14143 if (x86_sse_unaligned_move_optimal)
14146 if (x86_sse_split_regs == true)
14159 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14168 switch (GET_MODE_CLASS (mode))
14170 case MODE_VECTOR_INT:
14172 switch (GET_MODE_SIZE (mode))
14175 /* If we're optimizing for size, movups is the smallest. */
14176 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14178 op0 = gen_lowpart (V4SFmode, op0);
14179 op1 = gen_lowpart (V4SFmode, op1);
14180 emit_insn (gen_avx_movups (op0, op1));
14183 op0 = gen_lowpart (V16QImode, op0);
14184 op1 = gen_lowpart (V16QImode, op1);
14185 emit_insn (gen_avx_movdqu (op0, op1));
14188 op0 = gen_lowpart (V32QImode, op0);
14189 op1 = gen_lowpart (V32QImode, op1);
14190 emit_insn (gen_avx_movdqu256 (op0, op1));
14193 gcc_unreachable ();
14196 case MODE_VECTOR_FLOAT:
14197 op0 = gen_lowpart (mode, op0);
14198 op1 = gen_lowpart (mode, op1);
14203 emit_insn (gen_avx_movups (op0, op1));
14206 emit_insn (gen_avx_movups256 (op0, op1));
14209 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14211 op0 = gen_lowpart (V4SFmode, op0);
14212 op1 = gen_lowpart (V4SFmode, op1);
14213 emit_insn (gen_avx_movups (op0, op1));
14216 emit_insn (gen_avx_movupd (op0, op1));
14219 emit_insn (gen_avx_movupd256 (op0, op1));
14222 gcc_unreachable ();
14227 gcc_unreachable ();
14235 /* If we're optimizing for size, movups is the smallest. */
14236 if (optimize_insn_for_size_p ()
14237 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14239 op0 = gen_lowpart (V4SFmode, op0);
14240 op1 = gen_lowpart (V4SFmode, op1);
14241 emit_insn (gen_sse_movups (op0, op1));
14245 /* ??? If we have typed data, then it would appear that using
14246 movdqu is the only way to get unaligned data loaded with
14248 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14250 op0 = gen_lowpart (V16QImode, op0);
14251 op1 = gen_lowpart (V16QImode, op1);
14252 emit_insn (gen_sse2_movdqu (op0, op1));
14256 if (TARGET_SSE2 && mode == V2DFmode)
14260 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14262 op0 = gen_lowpart (V2DFmode, op0);
14263 op1 = gen_lowpart (V2DFmode, op1);
14264 emit_insn (gen_sse2_movupd (op0, op1));
14268 /* When SSE registers are split into halves, we can avoid
14269 writing to the top half twice. */
14270 if (TARGET_SSE_SPLIT_REGS)
14272 emit_clobber (op0);
14277 /* ??? Not sure about the best option for the Intel chips.
14278 The following would seem to satisfy; the register is
14279 entirely cleared, breaking the dependency chain. We
14280 then store to the upper half, with a dependency depth
14281 of one. A rumor has it that Intel recommends two movsd
14282 followed by an unpacklpd, but this is unconfirmed. And
14283 given that the dependency depth of the unpacklpd would
14284 still be one, I'm not sure why this would be better. */
14285 zero = CONST0_RTX (V2DFmode);
14288 m = adjust_address (op1, DFmode, 0);
14289 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14290 m = adjust_address (op1, DFmode, 8);
14291 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14295 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14297 op0 = gen_lowpart (V4SFmode, op0);
14298 op1 = gen_lowpart (V4SFmode, op1);
14299 emit_insn (gen_sse_movups (op0, op1));
14303 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14304 emit_move_insn (op0, CONST0_RTX (mode));
14306 emit_clobber (op0);
14308 if (mode != V4SFmode)
14309 op0 = gen_lowpart (V4SFmode, op0);
14310 m = adjust_address (op1, V2SFmode, 0);
14311 emit_insn (gen_sse_loadlps (op0, op0, m));
14312 m = adjust_address (op1, V2SFmode, 8);
14313 emit_insn (gen_sse_loadhps (op0, op0, m));
14316 else if (MEM_P (op0))
14318 /* If we're optimizing for size, movups is the smallest. */
14319 if (optimize_insn_for_size_p ()
14320 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14322 op0 = gen_lowpart (V4SFmode, op0);
14323 op1 = gen_lowpart (V4SFmode, op1);
14324 emit_insn (gen_sse_movups (op0, op1));
14328 /* ??? Similar to above, only less clear because of quote
14329 typeless stores unquote. */
14330 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14331 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14333 op0 = gen_lowpart (V16QImode, op0);
14334 op1 = gen_lowpart (V16QImode, op1);
14335 emit_insn (gen_sse2_movdqu (op0, op1));
14339 if (TARGET_SSE2 && mode == V2DFmode)
14341 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14343 op0 = gen_lowpart (V2DFmode, op0);
14344 op1 = gen_lowpart (V2DFmode, op1);
14345 emit_insn (gen_sse2_movupd (op0, op1));
14349 m = adjust_address (op0, DFmode, 0);
14350 emit_insn (gen_sse2_storelpd (m, op1));
14351 m = adjust_address (op0, DFmode, 8);
14352 emit_insn (gen_sse2_storehpd (m, op1));
14357 if (mode != V4SFmode)
14358 op1 = gen_lowpart (V4SFmode, op1);
14360 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14362 op0 = gen_lowpart (V4SFmode, op0);
14363 emit_insn (gen_sse_movups (op0, op1));
14367 m = adjust_address (op0, V2SFmode, 0);
14368 emit_insn (gen_sse_storelps (m, op1));
14369 m = adjust_address (op0, V2SFmode, 8);
14370 emit_insn (gen_sse_storehps (m, op1));
14375 gcc_unreachable ();
14378 /* Expand a push in MODE. This is some mode for which we do not support
14379 proper push instructions, at least from the registers that we expect
14380 the value to live in. */
14383 ix86_expand_push (enum machine_mode mode, rtx x)
14387 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14388 GEN_INT (-GET_MODE_SIZE (mode)),
14389 stack_pointer_rtx, 1, OPTAB_DIRECT);
14390 if (tmp != stack_pointer_rtx)
14391 emit_move_insn (stack_pointer_rtx, tmp);
14393 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14395 /* When we push an operand onto stack, it has to be aligned at least
14396 at the function argument boundary. However since we don't have
14397 the argument type, we can't determine the actual argument
14399 emit_move_insn (tmp, x);
14402 /* Helper function of ix86_fixup_binary_operands to canonicalize
14403 operand order. Returns true if the operands should be swapped. */
14406 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14409 rtx dst = operands[0];
14410 rtx src1 = operands[1];
14411 rtx src2 = operands[2];
14413 /* If the operation is not commutative, we can't do anything. */
14414 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14417 /* Highest priority is that src1 should match dst. */
14418 if (rtx_equal_p (dst, src1))
14420 if (rtx_equal_p (dst, src2))
14423 /* Next highest priority is that immediate constants come second. */
14424 if (immediate_operand (src2, mode))
14426 if (immediate_operand (src1, mode))
14429 /* Lowest priority is that memory references should come second. */
14439 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14440 destination to use for the operation. If different from the true
14441 destination in operands[0], a copy operation will be required. */
14444 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14447 rtx dst = operands[0];
14448 rtx src1 = operands[1];
14449 rtx src2 = operands[2];
14451 /* Canonicalize operand order. */
14452 if (ix86_swap_binary_operands_p (code, mode, operands))
14456 /* It is invalid to swap operands of different modes. */
14457 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14464 /* Both source operands cannot be in memory. */
14465 if (MEM_P (src1) && MEM_P (src2))
14467 /* Optimization: Only read from memory once. */
14468 if (rtx_equal_p (src1, src2))
14470 src2 = force_reg (mode, src2);
14474 src2 = force_reg (mode, src2);
14477 /* If the destination is memory, and we do not have matching source
14478 operands, do things in registers. */
14479 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14480 dst = gen_reg_rtx (mode);
14482 /* Source 1 cannot be a constant. */
14483 if (CONSTANT_P (src1))
14484 src1 = force_reg (mode, src1);
14486 /* Source 1 cannot be a non-matching memory. */
14487 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14488 src1 = force_reg (mode, src1);
14490 operands[1] = src1;
14491 operands[2] = src2;
14495 /* Similarly, but assume that the destination has already been
14496 set up properly. */
14499 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14500 enum machine_mode mode, rtx operands[])
14502 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14503 gcc_assert (dst == operands[0]);
14506 /* Attempt to expand a binary operator. Make the expansion closer to the
14507 actual machine, then just general_operand, which will allow 3 separate
14508 memory references (one output, two input) in a single insn. */
14511 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14514 rtx src1, src2, dst, op, clob;
14516 dst = ix86_fixup_binary_operands (code, mode, operands);
14517 src1 = operands[1];
14518 src2 = operands[2];
14520 /* Emit the instruction. */
14522 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14523 if (reload_in_progress)
14525 /* Reload doesn't know about the flags register, and doesn't know that
14526 it doesn't want to clobber it. We can only do this with PLUS. */
14527 gcc_assert (code == PLUS);
14532 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14533 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14536 /* Fix up the destination if needed. */
14537 if (dst != operands[0])
14538 emit_move_insn (operands[0], dst);
14541 /* Return TRUE or FALSE depending on whether the binary operator meets the
14542 appropriate constraints. */
14545 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14548 rtx dst = operands[0];
14549 rtx src1 = operands[1];
14550 rtx src2 = operands[2];
14552 /* Both source operands cannot be in memory. */
14553 if (MEM_P (src1) && MEM_P (src2))
14556 /* Canonicalize operand order for commutative operators. */
14557 if (ix86_swap_binary_operands_p (code, mode, operands))
14564 /* If the destination is memory, we must have a matching source operand. */
14565 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14568 /* Source 1 cannot be a constant. */
14569 if (CONSTANT_P (src1))
14572 /* Source 1 cannot be a non-matching memory. */
14573 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14579 /* Attempt to expand a unary operator. Make the expansion closer to the
14580 actual machine, then just general_operand, which will allow 2 separate
14581 memory references (one output, one input) in a single insn. */
14584 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
14587 int matching_memory;
14588 rtx src, dst, op, clob;
14593 /* If the destination is memory, and we do not have matching source
14594 operands, do things in registers. */
14595 matching_memory = 0;
14598 if (rtx_equal_p (dst, src))
14599 matching_memory = 1;
14601 dst = gen_reg_rtx (mode);
14604 /* When source operand is memory, destination must match. */
14605 if (MEM_P (src) && !matching_memory)
14606 src = force_reg (mode, src);
14608 /* Emit the instruction. */
14610 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
14611 if (reload_in_progress || code == NOT)
14613 /* Reload doesn't know about the flags register, and doesn't know that
14614 it doesn't want to clobber it. */
14615 gcc_assert (code == NOT);
14620 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14621 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14624 /* Fix up the destination if needed. */
14625 if (dst != operands[0])
14626 emit_move_insn (operands[0], dst);
14629 #define LEA_SEARCH_THRESHOLD 12
14631 /* Search backward for non-agu definition of register number REGNO1
14632 or register number REGNO2 in INSN's basic block until
14633 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14634 2. Reach BB boundary, or
14635 3. Reach agu definition.
14636 Returns the distance between the non-agu definition point and INSN.
14637 If no definition point, returns -1. */
14640 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14643 basic_block bb = BLOCK_FOR_INSN (insn);
14646 enum attr_type insn_type;
14648 if (insn != BB_HEAD (bb))
14650 rtx prev = PREV_INSN (insn);
14651 while (prev && distance < LEA_SEARCH_THRESHOLD)
14653 if (NONDEBUG_INSN_P (prev))
14656 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14657 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14658 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14659 && (regno1 == DF_REF_REGNO (*def_rec)
14660 || regno2 == DF_REF_REGNO (*def_rec)))
14662 insn_type = get_attr_type (prev);
14663 if (insn_type != TYPE_LEA)
14667 if (prev == BB_HEAD (bb))
14669 prev = PREV_INSN (prev);
14673 if (distance < LEA_SEARCH_THRESHOLD)
14677 bool simple_loop = false;
14679 FOR_EACH_EDGE (e, ei, bb->preds)
14682 simple_loop = true;
14688 rtx prev = BB_END (bb);
14691 && distance < LEA_SEARCH_THRESHOLD)
14693 if (NONDEBUG_INSN_P (prev))
14696 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14697 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14698 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14699 && (regno1 == DF_REF_REGNO (*def_rec)
14700 || regno2 == DF_REF_REGNO (*def_rec)))
14702 insn_type = get_attr_type (prev);
14703 if (insn_type != TYPE_LEA)
14707 prev = PREV_INSN (prev);
14715 /* get_attr_type may modify recog data. We want to make sure
14716 that recog data is valid for instruction INSN, on which
14717 distance_non_agu_define is called. INSN is unchanged here. */
14718 extract_insn_cached (insn);
14722 /* Return the distance between INSN and the next insn that uses
14723 register number REGNO0 in memory address. Return -1 if no such
14724 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14727 distance_agu_use (unsigned int regno0, rtx insn)
14729 basic_block bb = BLOCK_FOR_INSN (insn);
14734 if (insn != BB_END (bb))
14736 rtx next = NEXT_INSN (insn);
14737 while (next && distance < LEA_SEARCH_THRESHOLD)
14739 if (NONDEBUG_INSN_P (next))
14743 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14744 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14745 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14746 && regno0 == DF_REF_REGNO (*use_rec))
14748 /* Return DISTANCE if OP0 is used in memory
14749 address in NEXT. */
14753 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14754 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14755 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14756 && regno0 == DF_REF_REGNO (*def_rec))
14758 /* Return -1 if OP0 is set in NEXT. */
14762 if (next == BB_END (bb))
14764 next = NEXT_INSN (next);
14768 if (distance < LEA_SEARCH_THRESHOLD)
14772 bool simple_loop = false;
14774 FOR_EACH_EDGE (e, ei, bb->succs)
14777 simple_loop = true;
14783 rtx next = BB_HEAD (bb);
14786 && distance < LEA_SEARCH_THRESHOLD)
14788 if (NONDEBUG_INSN_P (next))
14792 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14793 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14794 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14795 && regno0 == DF_REF_REGNO (*use_rec))
14797 /* Return DISTANCE if OP0 is used in memory
14798 address in NEXT. */
14802 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14803 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14804 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14805 && regno0 == DF_REF_REGNO (*def_rec))
14807 /* Return -1 if OP0 is set in NEXT. */
14812 next = NEXT_INSN (next);
14820 /* Define this macro to tune LEA priority vs ADD, it take effect when
14821 there is a dilemma of choicing LEA or ADD
14822 Negative value: ADD is more preferred than LEA
14824 Positive value: LEA is more preferred than ADD*/
14825 #define IX86_LEA_PRIORITY 2
14827 /* Return true if it is ok to optimize an ADD operation to LEA
14828 operation to avoid flag register consumation. For most processors,
14829 ADD is faster than LEA. For the processors like ATOM, if the
14830 destination register of LEA holds an actual address which will be
14831 used soon, LEA is better and otherwise ADD is better. */
14834 ix86_lea_for_add_ok (rtx insn, rtx operands[])
14836 unsigned int regno0 = true_regnum (operands[0]);
14837 unsigned int regno1 = true_regnum (operands[1]);
14838 unsigned int regno2 = true_regnum (operands[2]);
14840 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14841 if (regno0 != regno1 && regno0 != regno2)
14844 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14848 int dist_define, dist_use;
14849 dist_define = distance_non_agu_define (regno1, regno2, insn);
14850 if (dist_define <= 0)
14853 /* If this insn has both backward non-agu dependence and forward
14854 agu dependence, the one with short distance take effect. */
14855 dist_use = distance_agu_use (regno0, insn);
14857 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
14864 /* Return true if destination reg of SET_BODY is shift count of
14868 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14874 /* Retrieve destination of SET_BODY. */
14875 switch (GET_CODE (set_body))
14878 set_dest = SET_DEST (set_body);
14879 if (!set_dest || !REG_P (set_dest))
14883 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14884 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14892 /* Retrieve shift count of USE_BODY. */
14893 switch (GET_CODE (use_body))
14896 shift_rtx = XEXP (use_body, 1);
14899 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14900 if (ix86_dep_by_shift_count_body (set_body,
14901 XVECEXP (use_body, 0, i)))
14909 && (GET_CODE (shift_rtx) == ASHIFT
14910 || GET_CODE (shift_rtx) == LSHIFTRT
14911 || GET_CODE (shift_rtx) == ASHIFTRT
14912 || GET_CODE (shift_rtx) == ROTATE
14913 || GET_CODE (shift_rtx) == ROTATERT))
14915 rtx shift_count = XEXP (shift_rtx, 1);
14917 /* Return true if shift count is dest of SET_BODY. */
14918 if (REG_P (shift_count)
14919 && true_regnum (set_dest) == true_regnum (shift_count))
14926 /* Return true if destination reg of SET_INSN is shift count of
14930 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
14932 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
14933 PATTERN (use_insn));
14936 /* Return TRUE or FALSE depending on whether the unary operator meets the
14937 appropriate constraints. */
14940 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14941 enum machine_mode mode ATTRIBUTE_UNUSED,
14942 rtx operands[2] ATTRIBUTE_UNUSED)
14944 /* If one of operands is memory, source and destination must match. */
14945 if ((MEM_P (operands[0])
14946 || MEM_P (operands[1]))
14947 && ! rtx_equal_p (operands[0], operands[1]))
14952 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14953 are ok, keeping in mind the possible movddup alternative. */
14956 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
14958 if (MEM_P (operands[0]))
14959 return rtx_equal_p (operands[0], operands[1 + high]);
14960 if (MEM_P (operands[1]) && MEM_P (operands[2]))
14961 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
14965 /* Post-reload splitter for converting an SF or DFmode value in an
14966 SSE register into an unsigned SImode. */
14969 ix86_split_convert_uns_si_sse (rtx operands[])
14971 enum machine_mode vecmode;
14972 rtx value, large, zero_or_two31, input, two31, x;
14974 large = operands[1];
14975 zero_or_two31 = operands[2];
14976 input = operands[3];
14977 two31 = operands[4];
14978 vecmode = GET_MODE (large);
14979 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
14981 /* Load up the value into the low element. We must ensure that the other
14982 elements are valid floats -- zero is the easiest such value. */
14985 if (vecmode == V4SFmode)
14986 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
14988 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
14992 input = gen_rtx_REG (vecmode, REGNO (input));
14993 emit_move_insn (value, CONST0_RTX (vecmode));
14994 if (vecmode == V4SFmode)
14995 emit_insn (gen_sse_movss (value, value, input));
14997 emit_insn (gen_sse2_movsd (value, value, input));
15000 emit_move_insn (large, two31);
15001 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
15003 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
15004 emit_insn (gen_rtx_SET (VOIDmode, large, x));
15006 x = gen_rtx_AND (vecmode, zero_or_two31, large);
15007 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
15009 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
15010 emit_insn (gen_rtx_SET (VOIDmode, value, x));
15012 large = gen_rtx_REG (V4SImode, REGNO (large));
15013 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
15015 x = gen_rtx_REG (V4SImode, REGNO (value));
15016 if (vecmode == V4SFmode)
15017 emit_insn (gen_sse2_cvttps2dq (x, value));
15019 emit_insn (gen_sse2_cvttpd2dq (x, value));
15022 emit_insn (gen_xorv4si3 (value, value, large));
15025 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15026 Expects the 64-bit DImode to be supplied in a pair of integral
15027 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15028 -mfpmath=sse, !optimize_size only. */
15031 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15033 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15034 rtx int_xmm, fp_xmm;
15035 rtx biases, exponents;
15038 int_xmm = gen_reg_rtx (V4SImode);
15039 if (TARGET_INTER_UNIT_MOVES)
15040 emit_insn (gen_movdi_to_sse (int_xmm, input));
15041 else if (TARGET_SSE_SPLIT_REGS)
15043 emit_clobber (int_xmm);
15044 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15048 x = gen_reg_rtx (V2DImode);
15049 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15050 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15053 x = gen_rtx_CONST_VECTOR (V4SImode,
15054 gen_rtvec (4, GEN_INT (0x43300000UL),
15055 GEN_INT (0x45300000UL),
15056 const0_rtx, const0_rtx));
15057 exponents = validize_mem (force_const_mem (V4SImode, x));
15059 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15060 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15062 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15063 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15064 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15065 (0x1.0p84 + double(fp_value_hi_xmm)).
15066 Note these exponents differ by 32. */
15068 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15070 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15071 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15072 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15073 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15074 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15075 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15076 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15077 biases = validize_mem (force_const_mem (V2DFmode, biases));
15078 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15080 /* Add the upper and lower DFmode values together. */
15082 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15085 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15086 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15087 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15090 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15093 /* Not used, but eases macroization of patterns. */
15095 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15096 rtx input ATTRIBUTE_UNUSED)
15098 gcc_unreachable ();
15101 /* Convert an unsigned SImode value into a DFmode. Only currently used
15102 for SSE, but applicable anywhere. */
15105 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15107 REAL_VALUE_TYPE TWO31r;
15110 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15111 NULL, 1, OPTAB_DIRECT);
15113 fp = gen_reg_rtx (DFmode);
15114 emit_insn (gen_floatsidf2 (fp, x));
15116 real_ldexp (&TWO31r, &dconst1, 31);
15117 x = const_double_from_real_value (TWO31r, DFmode);
15119 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15121 emit_move_insn (target, x);
15124 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15125 32-bit mode; otherwise we have a direct convert instruction. */
15128 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15130 REAL_VALUE_TYPE TWO32r;
15131 rtx fp_lo, fp_hi, x;
15133 fp_lo = gen_reg_rtx (DFmode);
15134 fp_hi = gen_reg_rtx (DFmode);
15136 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15138 real_ldexp (&TWO32r, &dconst1, 32);
15139 x = const_double_from_real_value (TWO32r, DFmode);
15140 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15142 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15144 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15147 emit_move_insn (target, x);
15150 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15151 For x86_32, -mfpmath=sse, !optimize_size only. */
15153 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15155 REAL_VALUE_TYPE ONE16r;
15156 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15158 real_ldexp (&ONE16r, &dconst1, 16);
15159 x = const_double_from_real_value (ONE16r, SFmode);
15160 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15161 NULL, 0, OPTAB_DIRECT);
15162 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15163 NULL, 0, OPTAB_DIRECT);
15164 fp_hi = gen_reg_rtx (SFmode);
15165 fp_lo = gen_reg_rtx (SFmode);
15166 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15167 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15168 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15170 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15172 if (!rtx_equal_p (target, fp_hi))
15173 emit_move_insn (target, fp_hi);
15176 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15177 then replicate the value for all elements of the vector
15181 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15188 v = gen_rtvec (4, value, value, value, value);
15189 return gen_rtx_CONST_VECTOR (V4SImode, v);
15193 v = gen_rtvec (2, value, value);
15194 return gen_rtx_CONST_VECTOR (V2DImode, v);
15198 v = gen_rtvec (4, value, value, value, value);
15200 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15201 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15202 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15206 v = gen_rtvec (2, value, value);
15208 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15209 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15212 gcc_unreachable ();
15216 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15217 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15218 for an SSE register. If VECT is true, then replicate the mask for
15219 all elements of the vector register. If INVERT is true, then create
15220 a mask excluding the sign bit. */
15223 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15225 enum machine_mode vec_mode, imode;
15226 HOST_WIDE_INT hi, lo;
15231 /* Find the sign bit, sign extended to 2*HWI. */
15237 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15238 lo = 0x80000000, hi = lo < 0;
15244 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15245 if (HOST_BITS_PER_WIDE_INT >= 64)
15246 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15248 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15253 vec_mode = VOIDmode;
15254 if (HOST_BITS_PER_WIDE_INT >= 64)
15257 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15264 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15268 lo = ~lo, hi = ~hi;
15274 mask = immed_double_const (lo, hi, imode);
15276 vec = gen_rtvec (2, v, mask);
15277 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15278 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15285 gcc_unreachable ();
15289 lo = ~lo, hi = ~hi;
15291 /* Force this value into the low part of a fp vector constant. */
15292 mask = immed_double_const (lo, hi, imode);
15293 mask = gen_lowpart (mode, mask);
15295 if (vec_mode == VOIDmode)
15296 return force_reg (mode, mask);
15298 v = ix86_build_const_vector (mode, vect, mask);
15299 return force_reg (vec_mode, v);
15302 /* Generate code for floating point ABS or NEG. */
15305 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15308 rtx mask, set, use, clob, dst, src;
15309 bool use_sse = false;
15310 bool vector_mode = VECTOR_MODE_P (mode);
15311 enum machine_mode elt_mode = mode;
15315 elt_mode = GET_MODE_INNER (mode);
15318 else if (mode == TFmode)
15320 else if (TARGET_SSE_MATH)
15321 use_sse = SSE_FLOAT_MODE_P (mode);
15323 /* NEG and ABS performed with SSE use bitwise mask operations.
15324 Create the appropriate mask now. */
15326 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15335 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15336 set = gen_rtx_SET (VOIDmode, dst, set);
15341 set = gen_rtx_fmt_e (code, mode, src);
15342 set = gen_rtx_SET (VOIDmode, dst, set);
15345 use = gen_rtx_USE (VOIDmode, mask);
15346 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15347 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15348 gen_rtvec (3, set, use, clob)));
15355 /* Expand a copysign operation. Special case operand 0 being a constant. */
15358 ix86_expand_copysign (rtx operands[])
15360 enum machine_mode mode;
15361 rtx dest, op0, op1, mask, nmask;
15363 dest = operands[0];
15367 mode = GET_MODE (dest);
15369 if (GET_CODE (op0) == CONST_DOUBLE)
15371 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15373 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15374 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15376 if (mode == SFmode || mode == DFmode)
15378 enum machine_mode vmode;
15380 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15382 if (op0 == CONST0_RTX (mode))
15383 op0 = CONST0_RTX (vmode);
15386 rtx v = ix86_build_const_vector (mode, false, op0);
15388 op0 = force_reg (vmode, v);
15391 else if (op0 != CONST0_RTX (mode))
15392 op0 = force_reg (mode, op0);
15394 mask = ix86_build_signbit_mask (mode, 0, 0);
15396 if (mode == SFmode)
15397 copysign_insn = gen_copysignsf3_const;
15398 else if (mode == DFmode)
15399 copysign_insn = gen_copysigndf3_const;
15401 copysign_insn = gen_copysigntf3_const;
15403 emit_insn (copysign_insn (dest, op0, op1, mask));
15407 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15409 nmask = ix86_build_signbit_mask (mode, 0, 1);
15410 mask = ix86_build_signbit_mask (mode, 0, 0);
15412 if (mode == SFmode)
15413 copysign_insn = gen_copysignsf3_var;
15414 else if (mode == DFmode)
15415 copysign_insn = gen_copysigndf3_var;
15417 copysign_insn = gen_copysigntf3_var;
15419 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15423 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15424 be a constant, and so has already been expanded into a vector constant. */
15427 ix86_split_copysign_const (rtx operands[])
15429 enum machine_mode mode, vmode;
15430 rtx dest, op0, mask, x;
15432 dest = operands[0];
15434 mask = operands[3];
15436 mode = GET_MODE (dest);
15437 vmode = GET_MODE (mask);
15439 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15440 x = gen_rtx_AND (vmode, dest, mask);
15441 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15443 if (op0 != CONST0_RTX (vmode))
15445 x = gen_rtx_IOR (vmode, dest, op0);
15446 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15450 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15451 so we have to do two masks. */
15454 ix86_split_copysign_var (rtx operands[])
15456 enum machine_mode mode, vmode;
15457 rtx dest, scratch, op0, op1, mask, nmask, x;
15459 dest = operands[0];
15460 scratch = operands[1];
15463 nmask = operands[4];
15464 mask = operands[5];
15466 mode = GET_MODE (dest);
15467 vmode = GET_MODE (mask);
15469 if (rtx_equal_p (op0, op1))
15471 /* Shouldn't happen often (it's useless, obviously), but when it does
15472 we'd generate incorrect code if we continue below. */
15473 emit_move_insn (dest, op0);
15477 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15479 gcc_assert (REGNO (op1) == REGNO (scratch));
15481 x = gen_rtx_AND (vmode, scratch, mask);
15482 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15485 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15486 x = gen_rtx_NOT (vmode, dest);
15487 x = gen_rtx_AND (vmode, x, op0);
15488 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15492 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
15494 x = gen_rtx_AND (vmode, scratch, mask);
15496 else /* alternative 2,4 */
15498 gcc_assert (REGNO (mask) == REGNO (scratch));
15499 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
15500 x = gen_rtx_AND (vmode, scratch, op1);
15502 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15504 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
15506 dest = simplify_gen_subreg (vmode, op0, mode, 0);
15507 x = gen_rtx_AND (vmode, dest, nmask);
15509 else /* alternative 3,4 */
15511 gcc_assert (REGNO (nmask) == REGNO (dest));
15513 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15514 x = gen_rtx_AND (vmode, dest, op0);
15516 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15519 x = gen_rtx_IOR (vmode, dest, scratch);
15520 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15523 /* Return TRUE or FALSE depending on whether the first SET in INSN
15524 has source and destination with matching CC modes, and that the
15525 CC mode is at least as constrained as REQ_MODE. */
15528 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
15531 enum machine_mode set_mode;
15533 set = PATTERN (insn);
15534 if (GET_CODE (set) == PARALLEL)
15535 set = XVECEXP (set, 0, 0);
15536 gcc_assert (GET_CODE (set) == SET);
15537 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15539 set_mode = GET_MODE (SET_DEST (set));
15543 if (req_mode != CCNOmode
15544 && (req_mode != CCmode
15545 || XEXP (SET_SRC (set), 1) != const0_rtx))
15549 if (req_mode == CCGCmode)
15553 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15557 if (req_mode == CCZmode)
15568 gcc_unreachable ();
15571 return (GET_MODE (SET_SRC (set)) == set_mode);
15574 /* Generate insn patterns to do an integer compare of OPERANDS. */
15577 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
15579 enum machine_mode cmpmode;
15582 cmpmode = SELECT_CC_MODE (code, op0, op1);
15583 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
15585 /* This is very simple, but making the interface the same as in the
15586 FP case makes the rest of the code easier. */
15587 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
15588 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
15590 /* Return the test that should be put into the flags user, i.e.
15591 the bcc, scc, or cmov instruction. */
15592 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
15595 /* Figure out whether to use ordered or unordered fp comparisons.
15596 Return the appropriate mode to use. */
15599 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
15601 /* ??? In order to make all comparisons reversible, we do all comparisons
15602 non-trapping when compiling for IEEE. Once gcc is able to distinguish
15603 all forms trapping and nontrapping comparisons, we can make inequality
15604 comparisons trapping again, since it results in better code when using
15605 FCOM based compares. */
15606 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
15610 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15612 enum machine_mode mode = GET_MODE (op0);
15614 if (SCALAR_FLOAT_MODE_P (mode))
15616 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15617 return ix86_fp_compare_mode (code);
15622 /* Only zero flag is needed. */
15623 case EQ: /* ZF=0 */
15624 case NE: /* ZF!=0 */
15626 /* Codes needing carry flag. */
15627 case GEU: /* CF=0 */
15628 case LTU: /* CF=1 */
15629 /* Detect overflow checks. They need just the carry flag. */
15630 if (GET_CODE (op0) == PLUS
15631 && rtx_equal_p (op1, XEXP (op0, 0)))
15635 case GTU: /* CF=0 & ZF=0 */
15636 case LEU: /* CF=1 | ZF=1 */
15637 /* Detect overflow checks. They need just the carry flag. */
15638 if (GET_CODE (op0) == MINUS
15639 && rtx_equal_p (op1, XEXP (op0, 0)))
15643 /* Codes possibly doable only with sign flag when
15644 comparing against zero. */
15645 case GE: /* SF=OF or SF=0 */
15646 case LT: /* SF<>OF or SF=1 */
15647 if (op1 == const0_rtx)
15650 /* For other cases Carry flag is not required. */
15652 /* Codes doable only with sign flag when comparing
15653 against zero, but we miss jump instruction for it
15654 so we need to use relational tests against overflow
15655 that thus needs to be zero. */
15656 case GT: /* ZF=0 & SF=OF */
15657 case LE: /* ZF=1 | SF<>OF */
15658 if (op1 == const0_rtx)
15662 /* strcmp pattern do (use flags) and combine may ask us for proper
15667 gcc_unreachable ();
15671 /* Return the fixed registers used for condition codes. */
15674 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15681 /* If two condition code modes are compatible, return a condition code
15682 mode which is compatible with both. Otherwise, return
15685 static enum machine_mode
15686 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
15691 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15694 if ((m1 == CCGCmode && m2 == CCGOCmode)
15695 || (m1 == CCGOCmode && m2 == CCGCmode))
15701 gcc_unreachable ();
15731 /* These are only compatible with themselves, which we already
15738 /* Return a comparison we can do and that it is equivalent to
15739 swap_condition (code) apart possibly from orderedness.
15740 But, never change orderedness if TARGET_IEEE_FP, returning
15741 UNKNOWN in that case if necessary. */
15743 static enum rtx_code
15744 ix86_fp_swap_condition (enum rtx_code code)
15748 case GT: /* GTU - CF=0 & ZF=0 */
15749 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
15750 case GE: /* GEU - CF=0 */
15751 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
15752 case UNLT: /* LTU - CF=1 */
15753 return TARGET_IEEE_FP ? UNKNOWN : GT;
15754 case UNLE: /* LEU - CF=1 | ZF=1 */
15755 return TARGET_IEEE_FP ? UNKNOWN : GE;
15757 return swap_condition (code);
15761 /* Return cost of comparison CODE using the best strategy for performance.
15762 All following functions do use number of instructions as a cost metrics.
15763 In future this should be tweaked to compute bytes for optimize_size and
15764 take into account performance of various instructions on various CPUs. */
15767 ix86_fp_comparison_cost (enum rtx_code code)
15771 /* The cost of code using bit-twiddling on %ah. */
15788 arith_cost = TARGET_IEEE_FP ? 5 : 4;
15792 arith_cost = TARGET_IEEE_FP ? 6 : 4;
15795 gcc_unreachable ();
15798 switch (ix86_fp_comparison_strategy (code))
15800 case IX86_FPCMP_COMI:
15801 return arith_cost > 4 ? 3 : 2;
15802 case IX86_FPCMP_SAHF:
15803 return arith_cost > 4 ? 4 : 3;
15809 /* Return strategy to use for floating-point. We assume that fcomi is always
15810 preferrable where available, since that is also true when looking at size
15811 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15813 enum ix86_fpcmp_strategy
15814 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
15816 /* Do fcomi/sahf based test when profitable. */
15819 return IX86_FPCMP_COMI;
15821 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
15822 return IX86_FPCMP_SAHF;
15824 return IX86_FPCMP_ARITH;
15827 /* Swap, force into registers, or otherwise massage the two operands
15828 to a fp comparison. The operands are updated in place; the new
15829 comparison code is returned. */
15831 static enum rtx_code
15832 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
15834 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
15835 rtx op0 = *pop0, op1 = *pop1;
15836 enum machine_mode op_mode = GET_MODE (op0);
15837 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
15839 /* All of the unordered compare instructions only work on registers.
15840 The same is true of the fcomi compare instructions. The XFmode
15841 compare instructions require registers except when comparing
15842 against zero or when converting operand 1 from fixed point to
15846 && (fpcmp_mode == CCFPUmode
15847 || (op_mode == XFmode
15848 && ! (standard_80387_constant_p (op0) == 1
15849 || standard_80387_constant_p (op1) == 1)
15850 && GET_CODE (op1) != FLOAT)
15851 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
15853 op0 = force_reg (op_mode, op0);
15854 op1 = force_reg (op_mode, op1);
15858 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
15859 things around if they appear profitable, otherwise force op0
15860 into a register. */
15862 if (standard_80387_constant_p (op0) == 0
15864 && ! (standard_80387_constant_p (op1) == 0
15867 enum rtx_code new_code = ix86_fp_swap_condition (code);
15868 if (new_code != UNKNOWN)
15871 tmp = op0, op0 = op1, op1 = tmp;
15877 op0 = force_reg (op_mode, op0);
15879 if (CONSTANT_P (op1))
15881 int tmp = standard_80387_constant_p (op1);
15883 op1 = validize_mem (force_const_mem (op_mode, op1));
15887 op1 = force_reg (op_mode, op1);
15890 op1 = force_reg (op_mode, op1);
15894 /* Try to rearrange the comparison to make it cheaper. */
15895 if (ix86_fp_comparison_cost (code)
15896 > ix86_fp_comparison_cost (swap_condition (code))
15897 && (REG_P (op1) || can_create_pseudo_p ()))
15900 tmp = op0, op0 = op1, op1 = tmp;
15901 code = swap_condition (code);
15903 op0 = force_reg (op_mode, op0);
15911 /* Convert comparison codes we use to represent FP comparison to integer
15912 code that will result in proper branch. Return UNKNOWN if no such code
15916 ix86_fp_compare_code_to_integer (enum rtx_code code)
15945 /* Generate insn patterns to do a floating point compare of OPERANDS. */
15948 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
15950 enum machine_mode fpcmp_mode, intcmp_mode;
15953 fpcmp_mode = ix86_fp_compare_mode (code);
15954 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
15956 /* Do fcomi/sahf based test when profitable. */
15957 switch (ix86_fp_comparison_strategy (code))
15959 case IX86_FPCMP_COMI:
15960 intcmp_mode = fpcmp_mode;
15961 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15962 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15967 case IX86_FPCMP_SAHF:
15968 intcmp_mode = fpcmp_mode;
15969 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15970 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15974 scratch = gen_reg_rtx (HImode);
15975 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
15976 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
15979 case IX86_FPCMP_ARITH:
15980 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
15981 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15982 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
15984 scratch = gen_reg_rtx (HImode);
15985 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
15987 /* In the unordered case, we have to check C2 for NaN's, which
15988 doesn't happen to work out to anything nice combination-wise.
15989 So do some bit twiddling on the value we've got in AH to come
15990 up with an appropriate set of condition codes. */
15992 intcmp_mode = CCNOmode;
15997 if (code == GT || !TARGET_IEEE_FP)
15999 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16004 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16005 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16006 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
16007 intcmp_mode = CCmode;
16013 if (code == LT && TARGET_IEEE_FP)
16015 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16016 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16017 intcmp_mode = CCmode;
16022 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16028 if (code == GE || !TARGET_IEEE_FP)
16030 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16035 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16036 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16042 if (code == LE && TARGET_IEEE_FP)
16044 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16045 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16046 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16047 intcmp_mode = CCmode;
16052 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16058 if (code == EQ && TARGET_IEEE_FP)
16060 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16061 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16062 intcmp_mode = CCmode;
16067 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16073 if (code == NE && TARGET_IEEE_FP)
16075 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16076 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16082 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16088 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16092 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16097 gcc_unreachable ();
16105 /* Return the test that should be put into the flags user, i.e.
16106 the bcc, scc, or cmov instruction. */
16107 return gen_rtx_fmt_ee (code, VOIDmode,
16108 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16113 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16117 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16118 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16120 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16122 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16123 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16126 ret = ix86_expand_int_compare (code, op0, op1);
16132 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16136 switch (GET_MODE (op0))
16145 tmp = ix86_expand_compare (code, op0, op1);
16146 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16147 gen_rtx_LABEL_REF (VOIDmode, label),
16149 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16156 /* Expand DImode branch into multiple compare+branch. */
16158 rtx lo[2], hi[2], label2;
16159 enum rtx_code code1, code2, code3;
16160 enum machine_mode submode;
16162 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16164 tmp = op0, op0 = op1, op1 = tmp;
16165 code = swap_condition (code);
16167 if (GET_MODE (op0) == DImode)
16169 split_di (&op0, 1, lo+0, hi+0);
16170 split_di (&op1, 1, lo+1, hi+1);
16175 split_ti (&op0, 1, lo+0, hi+0);
16176 split_ti (&op1, 1, lo+1, hi+1);
16180 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16181 avoid two branches. This costs one extra insn, so disable when
16182 optimizing for size. */
16184 if ((code == EQ || code == NE)
16185 && (!optimize_insn_for_size_p ()
16186 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16191 if (hi[1] != const0_rtx)
16192 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16193 NULL_RTX, 0, OPTAB_WIDEN);
16196 if (lo[1] != const0_rtx)
16197 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16198 NULL_RTX, 0, OPTAB_WIDEN);
16200 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16201 NULL_RTX, 0, OPTAB_WIDEN);
16203 ix86_expand_branch (code, tmp, const0_rtx, label);
16207 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16208 op1 is a constant and the low word is zero, then we can just
16209 examine the high word. Similarly for low word -1 and
16210 less-or-equal-than or greater-than. */
16212 if (CONST_INT_P (hi[1]))
16215 case LT: case LTU: case GE: case GEU:
16216 if (lo[1] == const0_rtx)
16218 ix86_expand_branch (code, hi[0], hi[1], label);
16222 case LE: case LEU: case GT: case GTU:
16223 if (lo[1] == constm1_rtx)
16225 ix86_expand_branch (code, hi[0], hi[1], label);
16233 /* Otherwise, we need two or three jumps. */
16235 label2 = gen_label_rtx ();
16238 code2 = swap_condition (code);
16239 code3 = unsigned_condition (code);
16243 case LT: case GT: case LTU: case GTU:
16246 case LE: code1 = LT; code2 = GT; break;
16247 case GE: code1 = GT; code2 = LT; break;
16248 case LEU: code1 = LTU; code2 = GTU; break;
16249 case GEU: code1 = GTU; code2 = LTU; break;
16251 case EQ: code1 = UNKNOWN; code2 = NE; break;
16252 case NE: code2 = UNKNOWN; break;
16255 gcc_unreachable ();
16260 * if (hi(a) < hi(b)) goto true;
16261 * if (hi(a) > hi(b)) goto false;
16262 * if (lo(a) < lo(b)) goto true;
16266 if (code1 != UNKNOWN)
16267 ix86_expand_branch (code1, hi[0], hi[1], label);
16268 if (code2 != UNKNOWN)
16269 ix86_expand_branch (code2, hi[0], hi[1], label2);
16271 ix86_expand_branch (code3, lo[0], lo[1], label);
16273 if (code2 != UNKNOWN)
16274 emit_label (label2);
16279 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16284 /* Split branch based on floating point condition. */
16286 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16287 rtx target1, rtx target2, rtx tmp, rtx pushed)
16292 if (target2 != pc_rtx)
16295 code = reverse_condition_maybe_unordered (code);
16300 condition = ix86_expand_fp_compare (code, op1, op2,
16303 /* Remove pushed operand from stack. */
16305 ix86_free_from_memory (GET_MODE (pushed));
16307 i = emit_jump_insn (gen_rtx_SET
16309 gen_rtx_IF_THEN_ELSE (VOIDmode,
16310 condition, target1, target2)));
16311 if (split_branch_probability >= 0)
16312 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16316 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16320 gcc_assert (GET_MODE (dest) == QImode);
16322 ret = ix86_expand_compare (code, op0, op1);
16323 PUT_MODE (ret, QImode);
16324 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16327 /* Expand comparison setting or clearing carry flag. Return true when
16328 successful and set pop for the operation. */
16330 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16332 enum machine_mode mode =
16333 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16335 /* Do not handle DImode compares that go through special path. */
16336 if (mode == (TARGET_64BIT ? TImode : DImode))
16339 if (SCALAR_FLOAT_MODE_P (mode))
16341 rtx compare_op, compare_seq;
16343 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16345 /* Shortcut: following common codes never translate
16346 into carry flag compares. */
16347 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16348 || code == ORDERED || code == UNORDERED)
16351 /* These comparisons require zero flag; swap operands so they won't. */
16352 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16353 && !TARGET_IEEE_FP)
16358 code = swap_condition (code);
16361 /* Try to expand the comparison and verify that we end up with
16362 carry flag based comparison. This fails to be true only when
16363 we decide to expand comparison using arithmetic that is not
16364 too common scenario. */
16366 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16367 compare_seq = get_insns ();
16370 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16371 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16372 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16374 code = GET_CODE (compare_op);
16376 if (code != LTU && code != GEU)
16379 emit_insn (compare_seq);
16384 if (!INTEGRAL_MODE_P (mode))
16393 /* Convert a==0 into (unsigned)a<1. */
16396 if (op1 != const0_rtx)
16399 code = (code == EQ ? LTU : GEU);
16402 /* Convert a>b into b<a or a>=b-1. */
16405 if (CONST_INT_P (op1))
16407 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16408 /* Bail out on overflow. We still can swap operands but that
16409 would force loading of the constant into register. */
16410 if (op1 == const0_rtx
16411 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16413 code = (code == GTU ? GEU : LTU);
16420 code = (code == GTU ? LTU : GEU);
16424 /* Convert a>=0 into (unsigned)a<0x80000000. */
16427 if (mode == DImode || op1 != const0_rtx)
16429 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16430 code = (code == LT ? GEU : LTU);
16434 if (mode == DImode || op1 != constm1_rtx)
16436 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16437 code = (code == LE ? GEU : LTU);
16443 /* Swapping operands may cause constant to appear as first operand. */
16444 if (!nonimmediate_operand (op0, VOIDmode))
16446 if (!can_create_pseudo_p ())
16448 op0 = force_reg (mode, op0);
16450 *pop = ix86_expand_compare (code, op0, op1);
16451 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16456 ix86_expand_int_movcc (rtx operands[])
16458 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16459 rtx compare_seq, compare_op;
16460 enum machine_mode mode = GET_MODE (operands[0]);
16461 bool sign_bit_compare_p = false;
16462 rtx op0 = XEXP (operands[1], 0);
16463 rtx op1 = XEXP (operands[1], 1);
16466 compare_op = ix86_expand_compare (code, op0, op1);
16467 compare_seq = get_insns ();
16470 compare_code = GET_CODE (compare_op);
16472 if ((op1 == const0_rtx && (code == GE || code == LT))
16473 || (op1 == constm1_rtx && (code == GT || code == LE)))
16474 sign_bit_compare_p = true;
16476 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16477 HImode insns, we'd be swallowed in word prefix ops. */
16479 if ((mode != HImode || TARGET_FAST_PREFIX)
16480 && (mode != (TARGET_64BIT ? TImode : DImode))
16481 && CONST_INT_P (operands[2])
16482 && CONST_INT_P (operands[3]))
16484 rtx out = operands[0];
16485 HOST_WIDE_INT ct = INTVAL (operands[2]);
16486 HOST_WIDE_INT cf = INTVAL (operands[3]);
16487 HOST_WIDE_INT diff;
16490 /* Sign bit compares are better done using shifts than we do by using
16492 if (sign_bit_compare_p
16493 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
16495 /* Detect overlap between destination and compare sources. */
16498 if (!sign_bit_compare_p)
16501 bool fpcmp = false;
16503 compare_code = GET_CODE (compare_op);
16505 flags = XEXP (compare_op, 0);
16507 if (GET_MODE (flags) == CCFPmode
16508 || GET_MODE (flags) == CCFPUmode)
16512 = ix86_fp_compare_code_to_integer (compare_code);
16515 /* To simplify rest of code, restrict to the GEU case. */
16516 if (compare_code == LTU)
16518 HOST_WIDE_INT tmp = ct;
16521 compare_code = reverse_condition (compare_code);
16522 code = reverse_condition (code);
16527 PUT_CODE (compare_op,
16528 reverse_condition_maybe_unordered
16529 (GET_CODE (compare_op)));
16531 PUT_CODE (compare_op,
16532 reverse_condition (GET_CODE (compare_op)));
16536 if (reg_overlap_mentioned_p (out, op0)
16537 || reg_overlap_mentioned_p (out, op1))
16538 tmp = gen_reg_rtx (mode);
16540 if (mode == DImode)
16541 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
16543 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
16544 flags, compare_op));
16548 if (code == GT || code == GE)
16549 code = reverse_condition (code);
16552 HOST_WIDE_INT tmp = ct;
16557 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
16570 tmp = expand_simple_binop (mode, PLUS,
16572 copy_rtx (tmp), 1, OPTAB_DIRECT);
16583 tmp = expand_simple_binop (mode, IOR,
16585 copy_rtx (tmp), 1, OPTAB_DIRECT);
16587 else if (diff == -1 && ct)
16597 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16599 tmp = expand_simple_binop (mode, PLUS,
16600 copy_rtx (tmp), GEN_INT (cf),
16601 copy_rtx (tmp), 1, OPTAB_DIRECT);
16609 * andl cf - ct, dest
16619 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16622 tmp = expand_simple_binop (mode, AND,
16624 gen_int_mode (cf - ct, mode),
16625 copy_rtx (tmp), 1, OPTAB_DIRECT);
16627 tmp = expand_simple_binop (mode, PLUS,
16628 copy_rtx (tmp), GEN_INT (ct),
16629 copy_rtx (tmp), 1, OPTAB_DIRECT);
16632 if (!rtx_equal_p (tmp, out))
16633 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
16635 return 1; /* DONE */
16640 enum machine_mode cmp_mode = GET_MODE (op0);
16643 tmp = ct, ct = cf, cf = tmp;
16646 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16648 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16650 /* We may be reversing unordered compare to normal compare, that
16651 is not valid in general (we may convert non-trapping condition
16652 to trapping one), however on i386 we currently emit all
16653 comparisons unordered. */
16654 compare_code = reverse_condition_maybe_unordered (compare_code);
16655 code = reverse_condition_maybe_unordered (code);
16659 compare_code = reverse_condition (compare_code);
16660 code = reverse_condition (code);
16664 compare_code = UNKNOWN;
16665 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
16666 && CONST_INT_P (op1))
16668 if (op1 == const0_rtx
16669 && (code == LT || code == GE))
16670 compare_code = code;
16671 else if (op1 == constm1_rtx)
16675 else if (code == GT)
16680 /* Optimize dest = (op0 < 0) ? -1 : cf. */
16681 if (compare_code != UNKNOWN
16682 && GET_MODE (op0) == GET_MODE (out)
16683 && (cf == -1 || ct == -1))
16685 /* If lea code below could be used, only optimize
16686 if it results in a 2 insn sequence. */
16688 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
16689 || diff == 3 || diff == 5 || diff == 9)
16690 || (compare_code == LT && ct == -1)
16691 || (compare_code == GE && cf == -1))
16694 * notl op1 (if necessary)
16702 code = reverse_condition (code);
16705 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16707 out = expand_simple_binop (mode, IOR,
16709 out, 1, OPTAB_DIRECT);
16710 if (out != operands[0])
16711 emit_move_insn (operands[0], out);
16713 return 1; /* DONE */
16718 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
16719 || diff == 3 || diff == 5 || diff == 9)
16720 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
16722 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
16728 * lea cf(dest*(ct-cf)),dest
16732 * This also catches the degenerate setcc-only case.
16738 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16741 /* On x86_64 the lea instruction operates on Pmode, so we need
16742 to get arithmetics done in proper mode to match. */
16744 tmp = copy_rtx (out);
16748 out1 = copy_rtx (out);
16749 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
16753 tmp = gen_rtx_PLUS (mode, tmp, out1);
16759 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
16762 if (!rtx_equal_p (tmp, out))
16765 out = force_operand (tmp, copy_rtx (out));
16767 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
16769 if (!rtx_equal_p (out, operands[0]))
16770 emit_move_insn (operands[0], copy_rtx (out));
16772 return 1; /* DONE */
16776 * General case: Jumpful:
16777 * xorl dest,dest cmpl op1, op2
16778 * cmpl op1, op2 movl ct, dest
16779 * setcc dest jcc 1f
16780 * decl dest movl cf, dest
16781 * andl (cf-ct),dest 1:
16784 * Size 20. Size 14.
16786 * This is reasonably steep, but branch mispredict costs are
16787 * high on modern cpus, so consider failing only if optimizing
16791 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16792 && BRANCH_COST (optimize_insn_for_speed_p (),
16797 enum machine_mode cmp_mode = GET_MODE (op0);
16802 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16804 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16806 /* We may be reversing unordered compare to normal compare,
16807 that is not valid in general (we may convert non-trapping
16808 condition to trapping one), however on i386 we currently
16809 emit all comparisons unordered. */
16810 code = reverse_condition_maybe_unordered (code);
16814 code = reverse_condition (code);
16815 if (compare_code != UNKNOWN)
16816 compare_code = reverse_condition (compare_code);
16820 if (compare_code != UNKNOWN)
16822 /* notl op1 (if needed)
16827 For x < 0 (resp. x <= -1) there will be no notl,
16828 so if possible swap the constants to get rid of the
16830 True/false will be -1/0 while code below (store flag
16831 followed by decrement) is 0/-1, so the constants need
16832 to be exchanged once more. */
16834 if (compare_code == GE || !cf)
16836 code = reverse_condition (code);
16841 HOST_WIDE_INT tmp = cf;
16846 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16850 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16852 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
16854 copy_rtx (out), 1, OPTAB_DIRECT);
16857 out = expand_simple_binop (mode, AND, copy_rtx (out),
16858 gen_int_mode (cf - ct, mode),
16859 copy_rtx (out), 1, OPTAB_DIRECT);
16861 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
16862 copy_rtx (out), 1, OPTAB_DIRECT);
16863 if (!rtx_equal_p (out, operands[0]))
16864 emit_move_insn (operands[0], copy_rtx (out));
16866 return 1; /* DONE */
16870 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16872 /* Try a few things more with specific constants and a variable. */
16875 rtx var, orig_out, out, tmp;
16877 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
16878 return 0; /* FAIL */
16880 /* If one of the two operands is an interesting constant, load a
16881 constant with the above and mask it in with a logical operation. */
16883 if (CONST_INT_P (operands[2]))
16886 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
16887 operands[3] = constm1_rtx, op = and_optab;
16888 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
16889 operands[3] = const0_rtx, op = ior_optab;
16891 return 0; /* FAIL */
16893 else if (CONST_INT_P (operands[3]))
16896 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
16897 operands[2] = constm1_rtx, op = and_optab;
16898 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
16899 operands[2] = const0_rtx, op = ior_optab;
16901 return 0; /* FAIL */
16904 return 0; /* FAIL */
16906 orig_out = operands[0];
16907 tmp = gen_reg_rtx (mode);
16910 /* Recurse to get the constant loaded. */
16911 if (ix86_expand_int_movcc (operands) == 0)
16912 return 0; /* FAIL */
16914 /* Mask in the interesting variable. */
16915 out = expand_binop (mode, op, var, tmp, orig_out, 0,
16917 if (!rtx_equal_p (out, orig_out))
16918 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
16920 return 1; /* DONE */
16924 * For comparison with above,
16934 if (! nonimmediate_operand (operands[2], mode))
16935 operands[2] = force_reg (mode, operands[2]);
16936 if (! nonimmediate_operand (operands[3], mode))
16937 operands[3] = force_reg (mode, operands[3]);
16939 if (! register_operand (operands[2], VOIDmode)
16941 || ! register_operand (operands[3], VOIDmode)))
16942 operands[2] = force_reg (mode, operands[2]);
16945 && ! register_operand (operands[3], VOIDmode))
16946 operands[3] = force_reg (mode, operands[3]);
16948 emit_insn (compare_seq);
16949 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16950 gen_rtx_IF_THEN_ELSE (mode,
16951 compare_op, operands[2],
16954 return 1; /* DONE */
16957 /* Swap, force into registers, or otherwise massage the two operands
16958 to an sse comparison with a mask result. Thus we differ a bit from
16959 ix86_prepare_fp_compare_args which expects to produce a flags result.
16961 The DEST operand exists to help determine whether to commute commutative
16962 operators. The POP0/POP1 operands are updated in place. The new
16963 comparison code is returned, or UNKNOWN if not implementable. */
16965 static enum rtx_code
16966 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
16967 rtx *pop0, rtx *pop1)
16975 /* We have no LTGT as an operator. We could implement it with
16976 NE & ORDERED, but this requires an extra temporary. It's
16977 not clear that it's worth it. */
16984 /* These are supported directly. */
16991 /* For commutative operators, try to canonicalize the destination
16992 operand to be first in the comparison - this helps reload to
16993 avoid extra moves. */
16994 if (!dest || !rtx_equal_p (dest, *pop1))
17002 /* These are not supported directly. Swap the comparison operands
17003 to transform into something that is supported. */
17007 code = swap_condition (code);
17011 gcc_unreachable ();
17017 /* Detect conditional moves that exactly match min/max operational
17018 semantics. Note that this is IEEE safe, as long as we don't
17019 interchange the operands.
17021 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17022 and TRUE if the operation is successful and instructions are emitted. */
17025 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17026 rtx cmp_op1, rtx if_true, rtx if_false)
17028 enum machine_mode mode;
17034 else if (code == UNGE)
17037 if_true = if_false;
17043 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17045 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17050 mode = GET_MODE (dest);
17052 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17053 but MODE may be a vector mode and thus not appropriate. */
17054 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17056 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17059 if_true = force_reg (mode, if_true);
17060 v = gen_rtvec (2, if_true, if_false);
17061 tmp = gen_rtx_UNSPEC (mode, v, u);
17065 code = is_min ? SMIN : SMAX;
17066 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17069 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17073 /* Expand an sse vector comparison. Return the register with the result. */
17076 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17077 rtx op_true, rtx op_false)
17079 enum machine_mode mode = GET_MODE (dest);
17082 cmp_op0 = force_reg (mode, cmp_op0);
17083 if (!nonimmediate_operand (cmp_op1, mode))
17084 cmp_op1 = force_reg (mode, cmp_op1);
17087 || reg_overlap_mentioned_p (dest, op_true)
17088 || reg_overlap_mentioned_p (dest, op_false))
17089 dest = gen_reg_rtx (mode);
17091 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17092 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17097 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17098 operations. This is used for both scalar and vector conditional moves. */
17101 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17103 enum machine_mode mode = GET_MODE (dest);
17106 if (op_false == CONST0_RTX (mode))
17108 op_true = force_reg (mode, op_true);
17109 x = gen_rtx_AND (mode, cmp, op_true);
17110 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17112 else if (op_true == CONST0_RTX (mode))
17114 op_false = force_reg (mode, op_false);
17115 x = gen_rtx_NOT (mode, cmp);
17116 x = gen_rtx_AND (mode, x, op_false);
17117 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17119 else if (TARGET_XOP)
17121 rtx pcmov = gen_rtx_SET (mode, dest,
17122 gen_rtx_IF_THEN_ELSE (mode, cmp,
17129 op_true = force_reg (mode, op_true);
17130 op_false = force_reg (mode, op_false);
17132 t2 = gen_reg_rtx (mode);
17134 t3 = gen_reg_rtx (mode);
17138 x = gen_rtx_AND (mode, op_true, cmp);
17139 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17141 x = gen_rtx_NOT (mode, cmp);
17142 x = gen_rtx_AND (mode, x, op_false);
17143 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17145 x = gen_rtx_IOR (mode, t3, t2);
17146 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17150 /* Expand a floating-point conditional move. Return true if successful. */
17153 ix86_expand_fp_movcc (rtx operands[])
17155 enum machine_mode mode = GET_MODE (operands[0]);
17156 enum rtx_code code = GET_CODE (operands[1]);
17157 rtx tmp, compare_op;
17158 rtx op0 = XEXP (operands[1], 0);
17159 rtx op1 = XEXP (operands[1], 1);
17161 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17163 enum machine_mode cmode;
17165 /* Since we've no cmove for sse registers, don't force bad register
17166 allocation just to gain access to it. Deny movcc when the
17167 comparison mode doesn't match the move mode. */
17168 cmode = GET_MODE (op0);
17169 if (cmode == VOIDmode)
17170 cmode = GET_MODE (op1);
17174 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17175 if (code == UNKNOWN)
17178 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17179 operands[2], operands[3]))
17182 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17183 operands[2], operands[3]);
17184 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17188 /* The floating point conditional move instructions don't directly
17189 support conditions resulting from a signed integer comparison. */
17191 compare_op = ix86_expand_compare (code, op0, op1);
17192 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17194 tmp = gen_reg_rtx (QImode);
17195 ix86_expand_setcc (tmp, code, op0, op1);
17197 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17200 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17201 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17202 operands[2], operands[3])));
17207 /* Expand a floating-point vector conditional move; a vcond operation
17208 rather than a movcc operation. */
17211 ix86_expand_fp_vcond (rtx operands[])
17213 enum rtx_code code = GET_CODE (operands[3]);
17216 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17217 &operands[4], &operands[5]);
17218 if (code == UNKNOWN)
17221 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17222 operands[5], operands[1], operands[2]))
17225 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17226 operands[1], operands[2]);
17227 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17231 /* Expand a signed/unsigned integral vector conditional move. */
17234 ix86_expand_int_vcond (rtx operands[])
17236 enum machine_mode mode = GET_MODE (operands[0]);
17237 enum rtx_code code = GET_CODE (operands[3]);
17238 bool negate = false;
17241 cop0 = operands[4];
17242 cop1 = operands[5];
17244 /* XOP supports all of the comparisons on all vector int types. */
17247 /* Canonicalize the comparison to EQ, GT, GTU. */
17258 code = reverse_condition (code);
17264 code = reverse_condition (code);
17270 code = swap_condition (code);
17271 x = cop0, cop0 = cop1, cop1 = x;
17275 gcc_unreachable ();
17278 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17279 if (mode == V2DImode)
17284 /* SSE4.1 supports EQ. */
17285 if (!TARGET_SSE4_1)
17291 /* SSE4.2 supports GT/GTU. */
17292 if (!TARGET_SSE4_2)
17297 gcc_unreachable ();
17301 /* Unsigned parallel compare is not supported by the hardware.
17302 Play some tricks to turn this into a signed comparison
17306 cop0 = force_reg (mode, cop0);
17314 rtx (*gen_sub3) (rtx, rtx, rtx);
17316 /* Subtract (-(INT MAX) - 1) from both operands to make
17318 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17320 gen_sub3 = (mode == V4SImode
17321 ? gen_subv4si3 : gen_subv2di3);
17322 t1 = gen_reg_rtx (mode);
17323 emit_insn (gen_sub3 (t1, cop0, mask));
17325 t2 = gen_reg_rtx (mode);
17326 emit_insn (gen_sub3 (t2, cop1, mask));
17336 /* Perform a parallel unsigned saturating subtraction. */
17337 x = gen_reg_rtx (mode);
17338 emit_insn (gen_rtx_SET (VOIDmode, x,
17339 gen_rtx_US_MINUS (mode, cop0, cop1)));
17342 cop1 = CONST0_RTX (mode);
17348 gcc_unreachable ();
17353 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17354 operands[1+negate], operands[2-negate]);
17356 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17357 operands[2-negate]);
17361 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17362 true if we should do zero extension, else sign extension. HIGH_P is
17363 true if we want the N/2 high elements, else the low elements. */
17366 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17368 enum machine_mode imode = GET_MODE (operands[1]);
17369 rtx (*unpack)(rtx, rtx, rtx);
17376 unpack = gen_vec_interleave_highv16qi;
17378 unpack = gen_vec_interleave_lowv16qi;
17382 unpack = gen_vec_interleave_highv8hi;
17384 unpack = gen_vec_interleave_lowv8hi;
17388 unpack = gen_vec_interleave_highv4si;
17390 unpack = gen_vec_interleave_lowv4si;
17393 gcc_unreachable ();
17396 dest = gen_lowpart (imode, operands[0]);
17399 se = force_reg (imode, CONST0_RTX (imode));
17401 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17402 operands[1], pc_rtx, pc_rtx);
17404 emit_insn (unpack (dest, operands[1], se));
17407 /* This function performs the same task as ix86_expand_sse_unpack,
17408 but with SSE4.1 instructions. */
17411 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17413 enum machine_mode imode = GET_MODE (operands[1]);
17414 rtx (*unpack)(rtx, rtx);
17421 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17423 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
17427 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17429 unpack = gen_sse4_1_sign_extendv4hiv4si2;
17433 unpack = gen_sse4_1_zero_extendv2siv2di2;
17435 unpack = gen_sse4_1_sign_extendv2siv2di2;
17438 gcc_unreachable ();
17441 dest = operands[0];
17444 /* Shift higher 8 bytes to lower 8 bytes. */
17445 src = gen_reg_rtx (imode);
17446 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17447 gen_lowpart (V1TImode, operands[1]),
17453 emit_insn (unpack (dest, src));
17456 /* Expand conditional increment or decrement using adb/sbb instructions.
17457 The default case using setcc followed by the conditional move can be
17458 done by generic code. */
17460 ix86_expand_int_addcc (rtx operands[])
17462 enum rtx_code code = GET_CODE (operands[1]);
17464 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17466 rtx val = const0_rtx;
17467 bool fpcmp = false;
17468 enum machine_mode mode;
17469 rtx op0 = XEXP (operands[1], 0);
17470 rtx op1 = XEXP (operands[1], 1);
17472 if (operands[3] != const1_rtx
17473 && operands[3] != constm1_rtx)
17475 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17477 code = GET_CODE (compare_op);
17479 flags = XEXP (compare_op, 0);
17481 if (GET_MODE (flags) == CCFPmode
17482 || GET_MODE (flags) == CCFPUmode)
17485 code = ix86_fp_compare_code_to_integer (code);
17492 PUT_CODE (compare_op,
17493 reverse_condition_maybe_unordered
17494 (GET_CODE (compare_op)));
17496 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
17499 mode = GET_MODE (operands[0]);
17501 /* Construct either adc or sbb insn. */
17502 if ((code == LTU) == (operands[3] == constm1_rtx))
17507 insn = gen_subqi3_carry;
17510 insn = gen_subhi3_carry;
17513 insn = gen_subsi3_carry;
17516 insn = gen_subdi3_carry;
17519 gcc_unreachable ();
17527 insn = gen_addqi3_carry;
17530 insn = gen_addhi3_carry;
17533 insn = gen_addsi3_carry;
17536 insn = gen_adddi3_carry;
17539 gcc_unreachable ();
17542 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
17544 return 1; /* DONE */
17548 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
17549 works for floating pointer parameters and nonoffsetable memories.
17550 For pushes, it returns just stack offsets; the values will be saved
17551 in the right order. Maximally three parts are generated. */
17554 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
17559 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
17561 size = (GET_MODE_SIZE (mode) + 4) / 8;
17563 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
17564 gcc_assert (size >= 2 && size <= 4);
17566 /* Optimize constant pool reference to immediates. This is used by fp
17567 moves, that force all constants to memory to allow combining. */
17568 if (MEM_P (operand) && MEM_READONLY_P (operand))
17570 rtx tmp = maybe_get_pool_constant (operand);
17575 if (MEM_P (operand) && !offsettable_memref_p (operand))
17577 /* The only non-offsetable memories we handle are pushes. */
17578 int ok = push_operand (operand, VOIDmode);
17582 operand = copy_rtx (operand);
17583 PUT_MODE (operand, Pmode);
17584 parts[0] = parts[1] = parts[2] = parts[3] = operand;
17588 if (GET_CODE (operand) == CONST_VECTOR)
17590 enum machine_mode imode = int_mode_for_mode (mode);
17591 /* Caution: if we looked through a constant pool memory above,
17592 the operand may actually have a different mode now. That's
17593 ok, since we want to pun this all the way back to an integer. */
17594 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
17595 gcc_assert (operand != NULL);
17601 if (mode == DImode)
17602 split_di (&operand, 1, &parts[0], &parts[1]);
17607 if (REG_P (operand))
17609 gcc_assert (reload_completed);
17610 for (i = 0; i < size; i++)
17611 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
17613 else if (offsettable_memref_p (operand))
17615 operand = adjust_address (operand, SImode, 0);
17616 parts[0] = operand;
17617 for (i = 1; i < size; i++)
17618 parts[i] = adjust_address (operand, SImode, 4 * i);
17620 else if (GET_CODE (operand) == CONST_DOUBLE)
17625 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17629 real_to_target (l, &r, mode);
17630 parts[3] = gen_int_mode (l[3], SImode);
17631 parts[2] = gen_int_mode (l[2], SImode);
17634 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
17635 parts[2] = gen_int_mode (l[2], SImode);
17638 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
17641 gcc_unreachable ();
17643 parts[1] = gen_int_mode (l[1], SImode);
17644 parts[0] = gen_int_mode (l[0], SImode);
17647 gcc_unreachable ();
17652 if (mode == TImode)
17653 split_ti (&operand, 1, &parts[0], &parts[1]);
17654 if (mode == XFmode || mode == TFmode)
17656 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
17657 if (REG_P (operand))
17659 gcc_assert (reload_completed);
17660 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
17661 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
17663 else if (offsettable_memref_p (operand))
17665 operand = adjust_address (operand, DImode, 0);
17666 parts[0] = operand;
17667 parts[1] = adjust_address (operand, upper_mode, 8);
17669 else if (GET_CODE (operand) == CONST_DOUBLE)
17674 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17675 real_to_target (l, &r, mode);
17677 /* Do not use shift by 32 to avoid warning on 32bit systems. */
17678 if (HOST_BITS_PER_WIDE_INT >= 64)
17681 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
17682 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
17685 parts[0] = immed_double_const (l[0], l[1], DImode);
17687 if (upper_mode == SImode)
17688 parts[1] = gen_int_mode (l[2], SImode);
17689 else if (HOST_BITS_PER_WIDE_INT >= 64)
17692 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
17693 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
17696 parts[1] = immed_double_const (l[2], l[3], DImode);
17699 gcc_unreachable ();
17706 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
17707 Return false when normal moves are needed; true when all required
17708 insns have been emitted. Operands 2-4 contain the input values
17709 int the correct order; operands 5-7 contain the output values. */
17712 ix86_split_long_move (rtx operands[])
17717 int collisions = 0;
17718 enum machine_mode mode = GET_MODE (operands[0]);
17719 bool collisionparts[4];
17721 /* The DFmode expanders may ask us to move double.
17722 For 64bit target this is single move. By hiding the fact
17723 here we simplify i386.md splitters. */
17724 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
17726 /* Optimize constant pool reference to immediates. This is used by
17727 fp moves, that force all constants to memory to allow combining. */
17729 if (MEM_P (operands[1])
17730 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
17731 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
17732 operands[1] = get_pool_constant (XEXP (operands[1], 0));
17733 if (push_operand (operands[0], VOIDmode))
17735 operands[0] = copy_rtx (operands[0]);
17736 PUT_MODE (operands[0], Pmode);
17739 operands[0] = gen_lowpart (DImode, operands[0]);
17740 operands[1] = gen_lowpart (DImode, operands[1]);
17741 emit_move_insn (operands[0], operands[1]);
17745 /* The only non-offsettable memory we handle is push. */
17746 if (push_operand (operands[0], VOIDmode))
17749 gcc_assert (!MEM_P (operands[0])
17750 || offsettable_memref_p (operands[0]));
17752 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
17753 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
17755 /* When emitting push, take care for source operands on the stack. */
17756 if (push && MEM_P (operands[1])
17757 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
17759 rtx src_base = XEXP (part[1][nparts - 1], 0);
17761 /* Compensate for the stack decrement by 4. */
17762 if (!TARGET_64BIT && nparts == 3
17763 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
17764 src_base = plus_constant (src_base, 4);
17766 /* src_base refers to the stack pointer and is
17767 automatically decreased by emitted push. */
17768 for (i = 0; i < nparts; i++)
17769 part[1][i] = change_address (part[1][i],
17770 GET_MODE (part[1][i]), src_base);
17773 /* We need to do copy in the right order in case an address register
17774 of the source overlaps the destination. */
17775 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
17779 for (i = 0; i < nparts; i++)
17782 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
17783 if (collisionparts[i])
17787 /* Collision in the middle part can be handled by reordering. */
17788 if (collisions == 1 && nparts == 3 && collisionparts [1])
17790 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17791 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17793 else if (collisions == 1
17795 && (collisionparts [1] || collisionparts [2]))
17797 if (collisionparts [1])
17799 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17800 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17804 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
17805 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
17809 /* If there are more collisions, we can't handle it by reordering.
17810 Do an lea to the last part and use only one colliding move. */
17811 else if (collisions > 1)
17817 base = part[0][nparts - 1];
17819 /* Handle the case when the last part isn't valid for lea.
17820 Happens in 64-bit mode storing the 12-byte XFmode. */
17821 if (GET_MODE (base) != Pmode)
17822 base = gen_rtx_REG (Pmode, REGNO (base));
17824 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
17825 part[1][0] = replace_equiv_address (part[1][0], base);
17826 for (i = 1; i < nparts; i++)
17828 tmp = plus_constant (base, UNITS_PER_WORD * i);
17829 part[1][i] = replace_equiv_address (part[1][i], tmp);
17840 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
17841 emit_insn (gen_addsi3 (stack_pointer_rtx,
17842 stack_pointer_rtx, GEN_INT (-4)));
17843 emit_move_insn (part[0][2], part[1][2]);
17845 else if (nparts == 4)
17847 emit_move_insn (part[0][3], part[1][3]);
17848 emit_move_insn (part[0][2], part[1][2]);
17853 /* In 64bit mode we don't have 32bit push available. In case this is
17854 register, it is OK - we will just use larger counterpart. We also
17855 retype memory - these comes from attempt to avoid REX prefix on
17856 moving of second half of TFmode value. */
17857 if (GET_MODE (part[1][1]) == SImode)
17859 switch (GET_CODE (part[1][1]))
17862 part[1][1] = adjust_address (part[1][1], DImode, 0);
17866 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
17870 gcc_unreachable ();
17873 if (GET_MODE (part[1][0]) == SImode)
17874 part[1][0] = part[1][1];
17877 emit_move_insn (part[0][1], part[1][1]);
17878 emit_move_insn (part[0][0], part[1][0]);
17882 /* Choose correct order to not overwrite the source before it is copied. */
17883 if ((REG_P (part[0][0])
17884 && REG_P (part[1][1])
17885 && (REGNO (part[0][0]) == REGNO (part[1][1])
17887 && REGNO (part[0][0]) == REGNO (part[1][2]))
17889 && REGNO (part[0][0]) == REGNO (part[1][3]))))
17891 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
17893 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
17895 operands[2 + i] = part[0][j];
17896 operands[6 + i] = part[1][j];
17901 for (i = 0; i < nparts; i++)
17903 operands[2 + i] = part[0][i];
17904 operands[6 + i] = part[1][i];
17908 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
17909 if (optimize_insn_for_size_p ())
17911 for (j = 0; j < nparts - 1; j++)
17912 if (CONST_INT_P (operands[6 + j])
17913 && operands[6 + j] != const0_rtx
17914 && REG_P (operands[2 + j]))
17915 for (i = j; i < nparts - 1; i++)
17916 if (CONST_INT_P (operands[7 + i])
17917 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
17918 operands[7 + i] = operands[2 + j];
17921 for (i = 0; i < nparts; i++)
17922 emit_move_insn (operands[2 + i], operands[6 + i]);
17927 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
17928 left shift by a constant, either using a single shift or
17929 a sequence of add instructions. */
17932 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
17936 emit_insn ((mode == DImode
17938 : gen_adddi3) (operand, operand, operand));
17940 else if (!optimize_insn_for_size_p ()
17941 && count * ix86_cost->add <= ix86_cost->shift_const)
17944 for (i=0; i<count; i++)
17946 emit_insn ((mode == DImode
17948 : gen_adddi3) (operand, operand, operand));
17952 emit_insn ((mode == DImode
17954 : gen_ashldi3) (operand, operand, GEN_INT (count)));
17958 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
17960 rtx low[2], high[2];
17962 const int single_width = mode == DImode ? 32 : 64;
17964 if (CONST_INT_P (operands[2]))
17966 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17967 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17969 if (count >= single_width)
17971 emit_move_insn (high[0], low[1]);
17972 emit_move_insn (low[0], const0_rtx);
17974 if (count > single_width)
17975 ix86_expand_ashl_const (high[0], count - single_width, mode);
17979 if (!rtx_equal_p (operands[0], operands[1]))
17980 emit_move_insn (operands[0], operands[1]);
17981 emit_insn ((mode == DImode
17983 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
17984 ix86_expand_ashl_const (low[0], count, mode);
17989 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17991 if (operands[1] == const1_rtx)
17993 /* Assuming we've chosen a QImode capable registers, then 1 << N
17994 can be done with two 32/64-bit shifts, no branches, no cmoves. */
17995 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17997 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17999 ix86_expand_clear (low[0]);
18000 ix86_expand_clear (high[0]);
18001 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
18003 d = gen_lowpart (QImode, low[0]);
18004 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18005 s = gen_rtx_EQ (QImode, flags, const0_rtx);
18006 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18008 d = gen_lowpart (QImode, high[0]);
18009 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18010 s = gen_rtx_NE (QImode, flags, const0_rtx);
18011 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18014 /* Otherwise, we can get the same results by manually performing
18015 a bit extract operation on bit 5/6, and then performing the two
18016 shifts. The two methods of getting 0/1 into low/high are exactly
18017 the same size. Avoiding the shift in the bit extract case helps
18018 pentium4 a bit; no one else seems to care much either way. */
18023 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18024 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
18026 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
18027 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18029 emit_insn ((mode == DImode
18031 : gen_lshrdi3) (high[0], high[0],
18032 GEN_INT (mode == DImode ? 5 : 6)));
18033 emit_insn ((mode == DImode
18035 : gen_anddi3) (high[0], high[0], const1_rtx));
18036 emit_move_insn (low[0], high[0]);
18037 emit_insn ((mode == DImode
18039 : gen_xordi3) (low[0], low[0], const1_rtx));
18042 emit_insn ((mode == DImode
18044 : gen_ashldi3) (low[0], low[0], operands[2]));
18045 emit_insn ((mode == DImode
18047 : gen_ashldi3) (high[0], high[0], operands[2]));
18051 if (operands[1] == constm1_rtx)
18053 /* For -1 << N, we can avoid the shld instruction, because we
18054 know that we're shifting 0...31/63 ones into a -1. */
18055 emit_move_insn (low[0], constm1_rtx);
18056 if (optimize_insn_for_size_p ())
18057 emit_move_insn (high[0], low[0]);
18059 emit_move_insn (high[0], constm1_rtx);
18063 if (!rtx_equal_p (operands[0], operands[1]))
18064 emit_move_insn (operands[0], operands[1]);
18066 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18067 emit_insn ((mode == DImode
18069 : gen_x86_64_shld) (high[0], low[0], operands[2]));
18072 emit_insn ((mode == DImode
18074 : gen_ashldi3) (low[0], low[0], operands[2]));
18076 if (TARGET_CMOVE && scratch)
18078 ix86_expand_clear (scratch);
18079 emit_insn ((mode == DImode
18080 ? gen_x86_shiftsi_adj_1
18081 : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
18085 emit_insn ((mode == DImode
18086 ? gen_x86_shiftsi_adj_2
18087 : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
18091 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18093 rtx low[2], high[2];
18095 const int single_width = mode == DImode ? 32 : 64;
18097 if (CONST_INT_P (operands[2]))
18099 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18100 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18102 if (count == single_width * 2 - 1)
18104 emit_move_insn (high[0], high[1]);
18105 emit_insn ((mode == DImode
18107 : gen_ashrdi3) (high[0], high[0],
18108 GEN_INT (single_width - 1)));
18109 emit_move_insn (low[0], high[0]);
18112 else if (count >= single_width)
18114 emit_move_insn (low[0], high[1]);
18115 emit_move_insn (high[0], low[0]);
18116 emit_insn ((mode == DImode
18118 : gen_ashrdi3) (high[0], high[0],
18119 GEN_INT (single_width - 1)));
18120 if (count > single_width)
18121 emit_insn ((mode == DImode
18123 : gen_ashrdi3) (low[0], low[0],
18124 GEN_INT (count - single_width)));
18128 if (!rtx_equal_p (operands[0], operands[1]))
18129 emit_move_insn (operands[0], operands[1]);
18130 emit_insn ((mode == DImode
18132 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18133 emit_insn ((mode == DImode
18135 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
18140 if (!rtx_equal_p (operands[0], operands[1]))
18141 emit_move_insn (operands[0], operands[1]);
18143 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18145 emit_insn ((mode == DImode
18147 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18148 emit_insn ((mode == DImode
18150 : gen_ashrdi3) (high[0], high[0], operands[2]));
18152 if (TARGET_CMOVE && scratch)
18154 emit_move_insn (scratch, high[0]);
18155 emit_insn ((mode == DImode
18157 : gen_ashrdi3) (scratch, scratch,
18158 GEN_INT (single_width - 1)));
18159 emit_insn ((mode == DImode
18160 ? gen_x86_shiftsi_adj_1
18161 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18165 emit_insn ((mode == DImode
18166 ? gen_x86_shiftsi_adj_3
18167 : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
18172 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18174 rtx low[2], high[2];
18176 const int single_width = mode == DImode ? 32 : 64;
18178 if (CONST_INT_P (operands[2]))
18180 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18181 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18183 if (count >= single_width)
18185 emit_move_insn (low[0], high[1]);
18186 ix86_expand_clear (high[0]);
18188 if (count > single_width)
18189 emit_insn ((mode == DImode
18191 : gen_lshrdi3) (low[0], low[0],
18192 GEN_INT (count - single_width)));
18196 if (!rtx_equal_p (operands[0], operands[1]))
18197 emit_move_insn (operands[0], operands[1]);
18198 emit_insn ((mode == DImode
18200 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18201 emit_insn ((mode == DImode
18203 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
18208 if (!rtx_equal_p (operands[0], operands[1]))
18209 emit_move_insn (operands[0], operands[1]);
18211 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18213 emit_insn ((mode == DImode
18215 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18216 emit_insn ((mode == DImode
18218 : gen_lshrdi3) (high[0], high[0], operands[2]));
18220 /* Heh. By reversing the arguments, we can reuse this pattern. */
18221 if (TARGET_CMOVE && scratch)
18223 ix86_expand_clear (scratch);
18224 emit_insn ((mode == DImode
18225 ? gen_x86_shiftsi_adj_1
18226 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18230 emit_insn ((mode == DImode
18231 ? gen_x86_shiftsi_adj_2
18232 : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
18236 /* Predict just emitted jump instruction to be taken with probability PROB. */
18238 predict_jump (int prob)
18240 rtx insn = get_last_insn ();
18241 gcc_assert (JUMP_P (insn));
18242 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18245 /* Helper function for the string operations below. Dest VARIABLE whether
18246 it is aligned to VALUE bytes. If true, jump to the label. */
18248 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18250 rtx label = gen_label_rtx ();
18251 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18252 if (GET_MODE (variable) == DImode)
18253 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18255 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18256 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18259 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18261 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18265 /* Adjust COUNTER by the VALUE. */
18267 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18269 if (GET_MODE (countreg) == DImode)
18270 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
18272 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
18275 /* Zero extend possibly SImode EXP to Pmode register. */
18277 ix86_zero_extend_to_Pmode (rtx exp)
18280 if (GET_MODE (exp) == VOIDmode)
18281 return force_reg (Pmode, exp);
18282 if (GET_MODE (exp) == Pmode)
18283 return copy_to_mode_reg (Pmode, exp);
18284 r = gen_reg_rtx (Pmode);
18285 emit_insn (gen_zero_extendsidi2 (r, exp));
18289 /* Divide COUNTREG by SCALE. */
18291 scale_counter (rtx countreg, int scale)
18297 if (CONST_INT_P (countreg))
18298 return GEN_INT (INTVAL (countreg) / scale);
18299 gcc_assert (REG_P (countreg));
18301 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18302 GEN_INT (exact_log2 (scale)),
18303 NULL, 1, OPTAB_DIRECT);
18307 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18308 DImode for constant loop counts. */
18310 static enum machine_mode
18311 counter_mode (rtx count_exp)
18313 if (GET_MODE (count_exp) != VOIDmode)
18314 return GET_MODE (count_exp);
18315 if (!CONST_INT_P (count_exp))
18317 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18322 /* When SRCPTR is non-NULL, output simple loop to move memory
18323 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18324 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18325 equivalent loop to set memory by VALUE (supposed to be in MODE).
18327 The size is rounded down to whole number of chunk size moved at once.
18328 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18332 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18333 rtx destptr, rtx srcptr, rtx value,
18334 rtx count, enum machine_mode mode, int unroll,
18337 rtx out_label, top_label, iter, tmp;
18338 enum machine_mode iter_mode = counter_mode (count);
18339 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18340 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18346 top_label = gen_label_rtx ();
18347 out_label = gen_label_rtx ();
18348 iter = gen_reg_rtx (iter_mode);
18350 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18351 NULL, 1, OPTAB_DIRECT);
18352 /* Those two should combine. */
18353 if (piece_size == const1_rtx)
18355 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18357 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18359 emit_move_insn (iter, const0_rtx);
18361 emit_label (top_label);
18363 tmp = convert_modes (Pmode, iter_mode, iter, true);
18364 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18365 destmem = change_address (destmem, mode, x_addr);
18369 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18370 srcmem = change_address (srcmem, mode, y_addr);
18372 /* When unrolling for chips that reorder memory reads and writes,
18373 we can save registers by using single temporary.
18374 Also using 4 temporaries is overkill in 32bit mode. */
18375 if (!TARGET_64BIT && 0)
18377 for (i = 0; i < unroll; i++)
18382 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18384 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18386 emit_move_insn (destmem, srcmem);
18392 gcc_assert (unroll <= 4);
18393 for (i = 0; i < unroll; i++)
18395 tmpreg[i] = gen_reg_rtx (mode);
18399 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18401 emit_move_insn (tmpreg[i], srcmem);
18403 for (i = 0; i < unroll; i++)
18408 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18410 emit_move_insn (destmem, tmpreg[i]);
18415 for (i = 0; i < unroll; i++)
18419 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18420 emit_move_insn (destmem, value);
18423 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18424 true, OPTAB_LIB_WIDEN);
18426 emit_move_insn (iter, tmp);
18428 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18430 if (expected_size != -1)
18432 expected_size /= GET_MODE_SIZE (mode) * unroll;
18433 if (expected_size == 0)
18435 else if (expected_size > REG_BR_PROB_BASE)
18436 predict_jump (REG_BR_PROB_BASE - 1);
18438 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18441 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18442 iter = ix86_zero_extend_to_Pmode (iter);
18443 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18444 true, OPTAB_LIB_WIDEN);
18445 if (tmp != destptr)
18446 emit_move_insn (destptr, tmp);
18449 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18450 true, OPTAB_LIB_WIDEN);
18452 emit_move_insn (srcptr, tmp);
18454 emit_label (out_label);
18457 /* Output "rep; mov" instruction.
18458 Arguments have same meaning as for previous function */
18460 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18461 rtx destptr, rtx srcptr,
18463 enum machine_mode mode)
18469 /* If the size is known, it is shorter to use rep movs. */
18470 if (mode == QImode && CONST_INT_P (count)
18471 && !(INTVAL (count) & 3))
18474 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18475 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18476 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
18477 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
18478 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18479 if (mode != QImode)
18481 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18482 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18483 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18484 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
18485 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18486 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
18490 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18491 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
18493 if (CONST_INT_P (count))
18495 count = GEN_INT (INTVAL (count)
18496 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18497 destmem = shallow_copy_rtx (destmem);
18498 srcmem = shallow_copy_rtx (srcmem);
18499 set_mem_size (destmem, count);
18500 set_mem_size (srcmem, count);
18504 if (MEM_SIZE (destmem))
18505 set_mem_size (destmem, NULL_RTX);
18506 if (MEM_SIZE (srcmem))
18507 set_mem_size (srcmem, NULL_RTX);
18509 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
18513 /* Output "rep; stos" instruction.
18514 Arguments have same meaning as for previous function */
18516 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
18517 rtx count, enum machine_mode mode,
18523 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18524 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18525 value = force_reg (mode, gen_lowpart (mode, value));
18526 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18527 if (mode != QImode)
18529 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18530 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18531 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18534 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18535 if (orig_value == const0_rtx && CONST_INT_P (count))
18537 count = GEN_INT (INTVAL (count)
18538 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18539 destmem = shallow_copy_rtx (destmem);
18540 set_mem_size (destmem, count);
18542 else if (MEM_SIZE (destmem))
18543 set_mem_size (destmem, NULL_RTX);
18544 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
18548 emit_strmov (rtx destmem, rtx srcmem,
18549 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
18551 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
18552 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
18553 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18556 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
18558 expand_movmem_epilogue (rtx destmem, rtx srcmem,
18559 rtx destptr, rtx srcptr, rtx count, int max_size)
18562 if (CONST_INT_P (count))
18564 HOST_WIDE_INT countval = INTVAL (count);
18567 if ((countval & 0x10) && max_size > 16)
18571 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18572 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
18575 gcc_unreachable ();
18578 if ((countval & 0x08) && max_size > 8)
18581 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18584 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18585 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
18589 if ((countval & 0x04) && max_size > 4)
18591 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18594 if ((countval & 0x02) && max_size > 2)
18596 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
18599 if ((countval & 0x01) && max_size > 1)
18601 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
18608 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
18609 count, 1, OPTAB_DIRECT);
18610 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
18611 count, QImode, 1, 4);
18615 /* When there are stringops, we can cheaply increase dest and src pointers.
18616 Otherwise we save code size by maintaining offset (zero is readily
18617 available from preceding rep operation) and using x86 addressing modes.
18619 if (TARGET_SINGLE_STRINGOP)
18623 rtx label = ix86_expand_aligntest (count, 4, true);
18624 src = change_address (srcmem, SImode, srcptr);
18625 dest = change_address (destmem, SImode, destptr);
18626 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18627 emit_label (label);
18628 LABEL_NUSES (label) = 1;
18632 rtx label = ix86_expand_aligntest (count, 2, true);
18633 src = change_address (srcmem, HImode, srcptr);
18634 dest = change_address (destmem, HImode, destptr);
18635 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18636 emit_label (label);
18637 LABEL_NUSES (label) = 1;
18641 rtx label = ix86_expand_aligntest (count, 1, true);
18642 src = change_address (srcmem, QImode, srcptr);
18643 dest = change_address (destmem, QImode, destptr);
18644 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18645 emit_label (label);
18646 LABEL_NUSES (label) = 1;
18651 rtx offset = force_reg (Pmode, const0_rtx);
18656 rtx label = ix86_expand_aligntest (count, 4, true);
18657 src = change_address (srcmem, SImode, srcptr);
18658 dest = change_address (destmem, SImode, destptr);
18659 emit_move_insn (dest, src);
18660 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
18661 true, OPTAB_LIB_WIDEN);
18663 emit_move_insn (offset, tmp);
18664 emit_label (label);
18665 LABEL_NUSES (label) = 1;
18669 rtx label = ix86_expand_aligntest (count, 2, true);
18670 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18671 src = change_address (srcmem, HImode, tmp);
18672 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18673 dest = change_address (destmem, HImode, tmp);
18674 emit_move_insn (dest, src);
18675 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
18676 true, OPTAB_LIB_WIDEN);
18678 emit_move_insn (offset, tmp);
18679 emit_label (label);
18680 LABEL_NUSES (label) = 1;
18684 rtx label = ix86_expand_aligntest (count, 1, true);
18685 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18686 src = change_address (srcmem, QImode, tmp);
18687 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18688 dest = change_address (destmem, QImode, tmp);
18689 emit_move_insn (dest, src);
18690 emit_label (label);
18691 LABEL_NUSES (label) = 1;
18696 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18698 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
18699 rtx count, int max_size)
18702 expand_simple_binop (counter_mode (count), AND, count,
18703 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
18704 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
18705 gen_lowpart (QImode, value), count, QImode,
18709 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18711 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
18715 if (CONST_INT_P (count))
18717 HOST_WIDE_INT countval = INTVAL (count);
18720 if ((countval & 0x10) && max_size > 16)
18724 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18725 emit_insn (gen_strset (destptr, dest, value));
18726 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
18727 emit_insn (gen_strset (destptr, dest, value));
18730 gcc_unreachable ();
18733 if ((countval & 0x08) && max_size > 8)
18737 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18738 emit_insn (gen_strset (destptr, dest, value));
18742 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18743 emit_insn (gen_strset (destptr, dest, value));
18744 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
18745 emit_insn (gen_strset (destptr, dest, value));
18749 if ((countval & 0x04) && max_size > 4)
18751 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18752 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18755 if ((countval & 0x02) && max_size > 2)
18757 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
18758 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18761 if ((countval & 0x01) && max_size > 1)
18763 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
18764 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18771 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
18776 rtx label = ix86_expand_aligntest (count, 16, true);
18779 dest = change_address (destmem, DImode, destptr);
18780 emit_insn (gen_strset (destptr, dest, value));
18781 emit_insn (gen_strset (destptr, dest, value));
18785 dest = change_address (destmem, SImode, destptr);
18786 emit_insn (gen_strset (destptr, dest, value));
18787 emit_insn (gen_strset (destptr, dest, value));
18788 emit_insn (gen_strset (destptr, dest, value));
18789 emit_insn (gen_strset (destptr, dest, value));
18791 emit_label (label);
18792 LABEL_NUSES (label) = 1;
18796 rtx label = ix86_expand_aligntest (count, 8, true);
18799 dest = change_address (destmem, DImode, destptr);
18800 emit_insn (gen_strset (destptr, dest, value));
18804 dest = change_address (destmem, SImode, destptr);
18805 emit_insn (gen_strset (destptr, dest, value));
18806 emit_insn (gen_strset (destptr, dest, value));
18808 emit_label (label);
18809 LABEL_NUSES (label) = 1;
18813 rtx label = ix86_expand_aligntest (count, 4, true);
18814 dest = change_address (destmem, SImode, destptr);
18815 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18816 emit_label (label);
18817 LABEL_NUSES (label) = 1;
18821 rtx label = ix86_expand_aligntest (count, 2, true);
18822 dest = change_address (destmem, HImode, destptr);
18823 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18824 emit_label (label);
18825 LABEL_NUSES (label) = 1;
18829 rtx label = ix86_expand_aligntest (count, 1, true);
18830 dest = change_address (destmem, QImode, destptr);
18831 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18832 emit_label (label);
18833 LABEL_NUSES (label) = 1;
18837 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
18838 DESIRED_ALIGNMENT. */
18840 expand_movmem_prologue (rtx destmem, rtx srcmem,
18841 rtx destptr, rtx srcptr, rtx count,
18842 int align, int desired_alignment)
18844 if (align <= 1 && desired_alignment > 1)
18846 rtx label = ix86_expand_aligntest (destptr, 1, false);
18847 srcmem = change_address (srcmem, QImode, srcptr);
18848 destmem = change_address (destmem, QImode, destptr);
18849 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18850 ix86_adjust_counter (count, 1);
18851 emit_label (label);
18852 LABEL_NUSES (label) = 1;
18854 if (align <= 2 && desired_alignment > 2)
18856 rtx label = ix86_expand_aligntest (destptr, 2, false);
18857 srcmem = change_address (srcmem, HImode, srcptr);
18858 destmem = change_address (destmem, HImode, destptr);
18859 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18860 ix86_adjust_counter (count, 2);
18861 emit_label (label);
18862 LABEL_NUSES (label) = 1;
18864 if (align <= 4 && desired_alignment > 4)
18866 rtx label = ix86_expand_aligntest (destptr, 4, false);
18867 srcmem = change_address (srcmem, SImode, srcptr);
18868 destmem = change_address (destmem, SImode, destptr);
18869 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18870 ix86_adjust_counter (count, 4);
18871 emit_label (label);
18872 LABEL_NUSES (label) = 1;
18874 gcc_assert (desired_alignment <= 8);
18877 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
18878 ALIGN_BYTES is how many bytes need to be copied. */
18880 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
18881 int desired_align, int align_bytes)
18884 rtx src_size, dst_size;
18886 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
18887 if (src_align_bytes >= 0)
18888 src_align_bytes = desired_align - src_align_bytes;
18889 src_size = MEM_SIZE (src);
18890 dst_size = MEM_SIZE (dst);
18891 if (align_bytes & 1)
18893 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18894 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
18896 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18898 if (align_bytes & 2)
18900 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18901 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
18902 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18903 set_mem_align (dst, 2 * BITS_PER_UNIT);
18904 if (src_align_bytes >= 0
18905 && (src_align_bytes & 1) == (align_bytes & 1)
18906 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
18907 set_mem_align (src, 2 * BITS_PER_UNIT);
18909 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18911 if (align_bytes & 4)
18913 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18914 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
18915 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18916 set_mem_align (dst, 4 * BITS_PER_UNIT);
18917 if (src_align_bytes >= 0)
18919 unsigned int src_align = 0;
18920 if ((src_align_bytes & 3) == (align_bytes & 3))
18922 else if ((src_align_bytes & 1) == (align_bytes & 1))
18924 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18925 set_mem_align (src, src_align * BITS_PER_UNIT);
18928 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18930 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18931 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
18932 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18933 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18934 if (src_align_bytes >= 0)
18936 unsigned int src_align = 0;
18937 if ((src_align_bytes & 7) == (align_bytes & 7))
18939 else if ((src_align_bytes & 3) == (align_bytes & 3))
18941 else if ((src_align_bytes & 1) == (align_bytes & 1))
18943 if (src_align > (unsigned int) desired_align)
18944 src_align = desired_align;
18945 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18946 set_mem_align (src, src_align * BITS_PER_UNIT);
18949 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18951 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
18956 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
18957 DESIRED_ALIGNMENT. */
18959 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
18960 int align, int desired_alignment)
18962 if (align <= 1 && desired_alignment > 1)
18964 rtx label = ix86_expand_aligntest (destptr, 1, false);
18965 destmem = change_address (destmem, QImode, destptr);
18966 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
18967 ix86_adjust_counter (count, 1);
18968 emit_label (label);
18969 LABEL_NUSES (label) = 1;
18971 if (align <= 2 && desired_alignment > 2)
18973 rtx label = ix86_expand_aligntest (destptr, 2, false);
18974 destmem = change_address (destmem, HImode, destptr);
18975 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
18976 ix86_adjust_counter (count, 2);
18977 emit_label (label);
18978 LABEL_NUSES (label) = 1;
18980 if (align <= 4 && desired_alignment > 4)
18982 rtx label = ix86_expand_aligntest (destptr, 4, false);
18983 destmem = change_address (destmem, SImode, destptr);
18984 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
18985 ix86_adjust_counter (count, 4);
18986 emit_label (label);
18987 LABEL_NUSES (label) = 1;
18989 gcc_assert (desired_alignment <= 8);
18992 /* Set enough from DST to align DST known to by aligned by ALIGN to
18993 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
18995 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
18996 int desired_align, int align_bytes)
18999 rtx dst_size = MEM_SIZE (dst);
19000 if (align_bytes & 1)
19002 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19004 emit_insn (gen_strset (destreg, dst,
19005 gen_lowpart (QImode, value)));
19007 if (align_bytes & 2)
19009 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19010 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19011 set_mem_align (dst, 2 * BITS_PER_UNIT);
19013 emit_insn (gen_strset (destreg, dst,
19014 gen_lowpart (HImode, value)));
19016 if (align_bytes & 4)
19018 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19019 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19020 set_mem_align (dst, 4 * BITS_PER_UNIT);
19022 emit_insn (gen_strset (destreg, dst,
19023 gen_lowpart (SImode, value)));
19025 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19026 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19027 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19029 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19033 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19034 static enum stringop_alg
19035 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19036 int *dynamic_check)
19038 const struct stringop_algs * algs;
19039 bool optimize_for_speed;
19040 /* Algorithms using the rep prefix want at least edi and ecx;
19041 additionally, memset wants eax and memcpy wants esi. Don't
19042 consider such algorithms if the user has appropriated those
19043 registers for their own purposes. */
19044 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19046 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19048 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19049 || (alg != rep_prefix_1_byte \
19050 && alg != rep_prefix_4_byte \
19051 && alg != rep_prefix_8_byte))
19052 const struct processor_costs *cost;
19054 /* Even if the string operation call is cold, we still might spend a lot
19055 of time processing large blocks. */
19056 if (optimize_function_for_size_p (cfun)
19057 || (optimize_insn_for_size_p ()
19058 && expected_size != -1 && expected_size < 256))
19059 optimize_for_speed = false;
19061 optimize_for_speed = true;
19063 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19065 *dynamic_check = -1;
19067 algs = &cost->memset[TARGET_64BIT != 0];
19069 algs = &cost->memcpy[TARGET_64BIT != 0];
19070 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19071 return stringop_alg;
19072 /* rep; movq or rep; movl is the smallest variant. */
19073 else if (!optimize_for_speed)
19075 if (!count || (count & 3))
19076 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19078 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19080 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19082 else if (expected_size != -1 && expected_size < 4)
19083 return loop_1_byte;
19084 else if (expected_size != -1)
19087 enum stringop_alg alg = libcall;
19088 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
19090 /* We get here if the algorithms that were not libcall-based
19091 were rep-prefix based and we are unable to use rep prefixes
19092 based on global register usage. Break out of the loop and
19093 use the heuristic below. */
19094 if (algs->size[i].max == 0)
19096 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19098 enum stringop_alg candidate = algs->size[i].alg;
19100 if (candidate != libcall && ALG_USABLE_P (candidate))
19102 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19103 last non-libcall inline algorithm. */
19104 if (TARGET_INLINE_ALL_STRINGOPS)
19106 /* When the current size is best to be copied by a libcall,
19107 but we are still forced to inline, run the heuristic below
19108 that will pick code for medium sized blocks. */
19109 if (alg != libcall)
19113 else if (ALG_USABLE_P (candidate))
19117 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19119 /* When asked to inline the call anyway, try to pick meaningful choice.
19120 We look for maximal size of block that is faster to copy by hand and
19121 take blocks of at most of that size guessing that average size will
19122 be roughly half of the block.
19124 If this turns out to be bad, we might simply specify the preferred
19125 choice in ix86_costs. */
19126 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19127 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19130 enum stringop_alg alg;
19132 bool any_alg_usable_p = true;
19134 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
19136 enum stringop_alg candidate = algs->size[i].alg;
19137 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19139 if (candidate != libcall && candidate
19140 && ALG_USABLE_P (candidate))
19141 max = algs->size[i].max;
19143 /* If there aren't any usable algorithms, then recursing on
19144 smaller sizes isn't going to find anything. Just return the
19145 simple byte-at-a-time copy loop. */
19146 if (!any_alg_usable_p)
19148 /* Pick something reasonable. */
19149 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19150 *dynamic_check = 128;
19151 return loop_1_byte;
19155 alg = decide_alg (count, max / 2, memset, dynamic_check);
19156 gcc_assert (*dynamic_check == -1);
19157 gcc_assert (alg != libcall);
19158 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19159 *dynamic_check = max;
19162 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19163 #undef ALG_USABLE_P
19166 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19167 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19169 decide_alignment (int align,
19170 enum stringop_alg alg,
19173 int desired_align = 0;
19177 gcc_unreachable ();
19179 case unrolled_loop:
19180 desired_align = GET_MODE_SIZE (Pmode);
19182 case rep_prefix_8_byte:
19185 case rep_prefix_4_byte:
19186 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19187 copying whole cacheline at once. */
19188 if (TARGET_PENTIUMPRO)
19193 case rep_prefix_1_byte:
19194 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19195 copying whole cacheline at once. */
19196 if (TARGET_PENTIUMPRO)
19210 if (desired_align < align)
19211 desired_align = align;
19212 if (expected_size != -1 && expected_size < 4)
19213 desired_align = align;
19214 return desired_align;
19217 /* Return the smallest power of 2 greater than VAL. */
19219 smallest_pow2_greater_than (int val)
19227 /* Expand string move (memcpy) operation. Use i386 string operations when
19228 profitable. expand_setmem contains similar code. The code depends upon
19229 architecture, block size and alignment, but always has the same
19232 1) Prologue guard: Conditional that jumps up to epilogues for small
19233 blocks that can be handled by epilogue alone. This is faster but
19234 also needed for correctness, since prologue assume the block is larger
19235 than the desired alignment.
19237 Optional dynamic check for size and libcall for large
19238 blocks is emitted here too, with -minline-stringops-dynamically.
19240 2) Prologue: copy first few bytes in order to get destination aligned
19241 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19242 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19243 We emit either a jump tree on power of two sized blocks, or a byte loop.
19245 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19246 with specified algorithm.
19248 4) Epilogue: code copying tail of the block that is too small to be
19249 handled by main body (or up to size guarded by prologue guard). */
19252 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19253 rtx expected_align_exp, rtx expected_size_exp)
19259 rtx jump_around_label = NULL;
19260 HOST_WIDE_INT align = 1;
19261 unsigned HOST_WIDE_INT count = 0;
19262 HOST_WIDE_INT expected_size = -1;
19263 int size_needed = 0, epilogue_size_needed;
19264 int desired_align = 0, align_bytes = 0;
19265 enum stringop_alg alg;
19267 bool need_zero_guard = false;
19269 if (CONST_INT_P (align_exp))
19270 align = INTVAL (align_exp);
19271 /* i386 can do misaligned access on reasonably increased cost. */
19272 if (CONST_INT_P (expected_align_exp)
19273 && INTVAL (expected_align_exp) > align)
19274 align = INTVAL (expected_align_exp);
19275 /* ALIGN is the minimum of destination and source alignment, but we care here
19276 just about destination alignment. */
19277 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19278 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19280 if (CONST_INT_P (count_exp))
19281 count = expected_size = INTVAL (count_exp);
19282 if (CONST_INT_P (expected_size_exp) && count == 0)
19283 expected_size = INTVAL (expected_size_exp);
19285 /* Make sure we don't need to care about overflow later on. */
19286 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19289 /* Step 0: Decide on preferred algorithm, desired alignment and
19290 size of chunks to be copied by main loop. */
19292 alg = decide_alg (count, expected_size, false, &dynamic_check);
19293 desired_align = decide_alignment (align, alg, expected_size);
19295 if (!TARGET_ALIGN_STRINGOPS)
19296 align = desired_align;
19298 if (alg == libcall)
19300 gcc_assert (alg != no_stringop);
19302 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19303 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19304 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19309 gcc_unreachable ();
19311 need_zero_guard = true;
19312 size_needed = GET_MODE_SIZE (Pmode);
19314 case unrolled_loop:
19315 need_zero_guard = true;
19316 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19318 case rep_prefix_8_byte:
19321 case rep_prefix_4_byte:
19324 case rep_prefix_1_byte:
19328 need_zero_guard = true;
19333 epilogue_size_needed = size_needed;
19335 /* Step 1: Prologue guard. */
19337 /* Alignment code needs count to be in register. */
19338 if (CONST_INT_P (count_exp) && desired_align > align)
19340 if (INTVAL (count_exp) > desired_align
19341 && INTVAL (count_exp) > size_needed)
19344 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19345 if (align_bytes <= 0)
19348 align_bytes = desired_align - align_bytes;
19350 if (align_bytes == 0)
19351 count_exp = force_reg (counter_mode (count_exp), count_exp);
19353 gcc_assert (desired_align >= 1 && align >= 1);
19355 /* Ensure that alignment prologue won't copy past end of block. */
19356 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19358 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19359 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19360 Make sure it is power of 2. */
19361 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19365 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19367 /* If main algorithm works on QImode, no epilogue is needed.
19368 For small sizes just don't align anything. */
19369 if (size_needed == 1)
19370 desired_align = align;
19377 label = gen_label_rtx ();
19378 emit_cmp_and_jump_insns (count_exp,
19379 GEN_INT (epilogue_size_needed),
19380 LTU, 0, counter_mode (count_exp), 1, label);
19381 if (expected_size == -1 || expected_size < epilogue_size_needed)
19382 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19384 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19388 /* Emit code to decide on runtime whether library call or inline should be
19390 if (dynamic_check != -1)
19392 if (CONST_INT_P (count_exp))
19394 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19396 emit_block_move_via_libcall (dst, src, count_exp, false);
19397 count_exp = const0_rtx;
19403 rtx hot_label = gen_label_rtx ();
19404 jump_around_label = gen_label_rtx ();
19405 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19406 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19407 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19408 emit_block_move_via_libcall (dst, src, count_exp, false);
19409 emit_jump (jump_around_label);
19410 emit_label (hot_label);
19414 /* Step 2: Alignment prologue. */
19416 if (desired_align > align)
19418 if (align_bytes == 0)
19420 /* Except for the first move in epilogue, we no longer know
19421 constant offset in aliasing info. It don't seems to worth
19422 the pain to maintain it for the first move, so throw away
19424 src = change_address (src, BLKmode, srcreg);
19425 dst = change_address (dst, BLKmode, destreg);
19426 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19431 /* If we know how many bytes need to be stored before dst is
19432 sufficiently aligned, maintain aliasing info accurately. */
19433 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19434 desired_align, align_bytes);
19435 count_exp = plus_constant (count_exp, -align_bytes);
19436 count -= align_bytes;
19438 if (need_zero_guard
19439 && (count < (unsigned HOST_WIDE_INT) size_needed
19440 || (align_bytes == 0
19441 && count < ((unsigned HOST_WIDE_INT) size_needed
19442 + desired_align - align))))
19444 /* It is possible that we copied enough so the main loop will not
19446 gcc_assert (size_needed > 1);
19447 if (label == NULL_RTX)
19448 label = gen_label_rtx ();
19449 emit_cmp_and_jump_insns (count_exp,
19450 GEN_INT (size_needed),
19451 LTU, 0, counter_mode (count_exp), 1, label);
19452 if (expected_size == -1
19453 || expected_size < (desired_align - align) / 2 + size_needed)
19454 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19456 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19459 if (label && size_needed == 1)
19461 emit_label (label);
19462 LABEL_NUSES (label) = 1;
19464 epilogue_size_needed = 1;
19466 else if (label == NULL_RTX)
19467 epilogue_size_needed = size_needed;
19469 /* Step 3: Main loop. */
19475 gcc_unreachable ();
19477 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19478 count_exp, QImode, 1, expected_size);
19481 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19482 count_exp, Pmode, 1, expected_size);
19484 case unrolled_loop:
19485 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
19486 registers for 4 temporaries anyway. */
19487 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19488 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
19491 case rep_prefix_8_byte:
19492 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19495 case rep_prefix_4_byte:
19496 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19499 case rep_prefix_1_byte:
19500 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19504 /* Adjust properly the offset of src and dest memory for aliasing. */
19505 if (CONST_INT_P (count_exp))
19507 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
19508 (count / size_needed) * size_needed);
19509 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19510 (count / size_needed) * size_needed);
19514 src = change_address (src, BLKmode, srcreg);
19515 dst = change_address (dst, BLKmode, destreg);
19518 /* Step 4: Epilogue to copy the remaining bytes. */
19522 /* When the main loop is done, COUNT_EXP might hold original count,
19523 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19524 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19525 bytes. Compensate if needed. */
19527 if (size_needed < epilogue_size_needed)
19530 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19531 GEN_INT (size_needed - 1), count_exp, 1,
19533 if (tmp != count_exp)
19534 emit_move_insn (count_exp, tmp);
19536 emit_label (label);
19537 LABEL_NUSES (label) = 1;
19540 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19541 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
19542 epilogue_size_needed);
19543 if (jump_around_label)
19544 emit_label (jump_around_label);
19548 /* Helper function for memcpy. For QImode value 0xXY produce
19549 0xXYXYXYXY of wide specified by MODE. This is essentially
19550 a * 0x10101010, but we can do slightly better than
19551 synth_mult by unwinding the sequence by hand on CPUs with
19554 promote_duplicated_reg (enum machine_mode mode, rtx val)
19556 enum machine_mode valmode = GET_MODE (val);
19558 int nops = mode == DImode ? 3 : 2;
19560 gcc_assert (mode == SImode || mode == DImode);
19561 if (val == const0_rtx)
19562 return copy_to_mode_reg (mode, const0_rtx);
19563 if (CONST_INT_P (val))
19565 HOST_WIDE_INT v = INTVAL (val) & 255;
19569 if (mode == DImode)
19570 v |= (v << 16) << 16;
19571 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
19574 if (valmode == VOIDmode)
19576 if (valmode != QImode)
19577 val = gen_lowpart (QImode, val);
19578 if (mode == QImode)
19580 if (!TARGET_PARTIAL_REG_STALL)
19582 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
19583 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
19584 <= (ix86_cost->shift_const + ix86_cost->add) * nops
19585 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
19587 rtx reg = convert_modes (mode, QImode, val, true);
19588 tmp = promote_duplicated_reg (mode, const1_rtx);
19589 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
19594 rtx reg = convert_modes (mode, QImode, val, true);
19596 if (!TARGET_PARTIAL_REG_STALL)
19597 if (mode == SImode)
19598 emit_insn (gen_movsi_insv_1 (reg, reg));
19600 emit_insn (gen_movdi_insv_1 (reg, reg));
19603 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
19604 NULL, 1, OPTAB_DIRECT);
19606 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19608 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
19609 NULL, 1, OPTAB_DIRECT);
19610 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19611 if (mode == SImode)
19613 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
19614 NULL, 1, OPTAB_DIRECT);
19615 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19620 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
19621 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
19622 alignment from ALIGN to DESIRED_ALIGN. */
19624 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
19629 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
19630 promoted_val = promote_duplicated_reg (DImode, val);
19631 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
19632 promoted_val = promote_duplicated_reg (SImode, val);
19633 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
19634 promoted_val = promote_duplicated_reg (HImode, val);
19636 promoted_val = val;
19638 return promoted_val;
19641 /* Expand string clear operation (bzero). Use i386 string operations when
19642 profitable. See expand_movmem comment for explanation of individual
19643 steps performed. */
19645 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
19646 rtx expected_align_exp, rtx expected_size_exp)
19651 rtx jump_around_label = NULL;
19652 HOST_WIDE_INT align = 1;
19653 unsigned HOST_WIDE_INT count = 0;
19654 HOST_WIDE_INT expected_size = -1;
19655 int size_needed = 0, epilogue_size_needed;
19656 int desired_align = 0, align_bytes = 0;
19657 enum stringop_alg alg;
19658 rtx promoted_val = NULL;
19659 bool force_loopy_epilogue = false;
19661 bool need_zero_guard = false;
19663 if (CONST_INT_P (align_exp))
19664 align = INTVAL (align_exp);
19665 /* i386 can do misaligned access on reasonably increased cost. */
19666 if (CONST_INT_P (expected_align_exp)
19667 && INTVAL (expected_align_exp) > align)
19668 align = INTVAL (expected_align_exp);
19669 if (CONST_INT_P (count_exp))
19670 count = expected_size = INTVAL (count_exp);
19671 if (CONST_INT_P (expected_size_exp) && count == 0)
19672 expected_size = INTVAL (expected_size_exp);
19674 /* Make sure we don't need to care about overflow later on. */
19675 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19678 /* Step 0: Decide on preferred algorithm, desired alignment and
19679 size of chunks to be copied by main loop. */
19681 alg = decide_alg (count, expected_size, true, &dynamic_check);
19682 desired_align = decide_alignment (align, alg, expected_size);
19684 if (!TARGET_ALIGN_STRINGOPS)
19685 align = desired_align;
19687 if (alg == libcall)
19689 gcc_assert (alg != no_stringop);
19691 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
19692 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19697 gcc_unreachable ();
19699 need_zero_guard = true;
19700 size_needed = GET_MODE_SIZE (Pmode);
19702 case unrolled_loop:
19703 need_zero_guard = true;
19704 size_needed = GET_MODE_SIZE (Pmode) * 4;
19706 case rep_prefix_8_byte:
19709 case rep_prefix_4_byte:
19712 case rep_prefix_1_byte:
19716 need_zero_guard = true;
19720 epilogue_size_needed = size_needed;
19722 /* Step 1: Prologue guard. */
19724 /* Alignment code needs count to be in register. */
19725 if (CONST_INT_P (count_exp) && desired_align > align)
19727 if (INTVAL (count_exp) > desired_align
19728 && INTVAL (count_exp) > size_needed)
19731 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19732 if (align_bytes <= 0)
19735 align_bytes = desired_align - align_bytes;
19737 if (align_bytes == 0)
19739 enum machine_mode mode = SImode;
19740 if (TARGET_64BIT && (count & ~0xffffffff))
19742 count_exp = force_reg (mode, count_exp);
19745 /* Do the cheap promotion to allow better CSE across the
19746 main loop and epilogue (ie one load of the big constant in the
19747 front of all code. */
19748 if (CONST_INT_P (val_exp))
19749 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19750 desired_align, align);
19751 /* Ensure that alignment prologue won't copy past end of block. */
19752 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19754 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19755 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
19756 Make sure it is power of 2. */
19757 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19759 /* To improve performance of small blocks, we jump around the VAL
19760 promoting mode. This mean that if the promoted VAL is not constant,
19761 we might not use it in the epilogue and have to use byte
19763 if (epilogue_size_needed > 2 && !promoted_val)
19764 force_loopy_epilogue = true;
19767 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19769 /* If main algorithm works on QImode, no epilogue is needed.
19770 For small sizes just don't align anything. */
19771 if (size_needed == 1)
19772 desired_align = align;
19779 label = gen_label_rtx ();
19780 emit_cmp_and_jump_insns (count_exp,
19781 GEN_INT (epilogue_size_needed),
19782 LTU, 0, counter_mode (count_exp), 1, label);
19783 if (expected_size == -1 || expected_size <= epilogue_size_needed)
19784 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19786 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19789 if (dynamic_check != -1)
19791 rtx hot_label = gen_label_rtx ();
19792 jump_around_label = gen_label_rtx ();
19793 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19794 LEU, 0, counter_mode (count_exp), 1, hot_label);
19795 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19796 set_storage_via_libcall (dst, count_exp, val_exp, false);
19797 emit_jump (jump_around_label);
19798 emit_label (hot_label);
19801 /* Step 2: Alignment prologue. */
19803 /* Do the expensive promotion once we branched off the small blocks. */
19805 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19806 desired_align, align);
19807 gcc_assert (desired_align >= 1 && align >= 1);
19809 if (desired_align > align)
19811 if (align_bytes == 0)
19813 /* Except for the first move in epilogue, we no longer know
19814 constant offset in aliasing info. It don't seems to worth
19815 the pain to maintain it for the first move, so throw away
19817 dst = change_address (dst, BLKmode, destreg);
19818 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
19823 /* If we know how many bytes need to be stored before dst is
19824 sufficiently aligned, maintain aliasing info accurately. */
19825 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
19826 desired_align, align_bytes);
19827 count_exp = plus_constant (count_exp, -align_bytes);
19828 count -= align_bytes;
19830 if (need_zero_guard
19831 && (count < (unsigned HOST_WIDE_INT) size_needed
19832 || (align_bytes == 0
19833 && count < ((unsigned HOST_WIDE_INT) size_needed
19834 + desired_align - align))))
19836 /* It is possible that we copied enough so the main loop will not
19838 gcc_assert (size_needed > 1);
19839 if (label == NULL_RTX)
19840 label = gen_label_rtx ();
19841 emit_cmp_and_jump_insns (count_exp,
19842 GEN_INT (size_needed),
19843 LTU, 0, counter_mode (count_exp), 1, label);
19844 if (expected_size == -1
19845 || expected_size < (desired_align - align) / 2 + size_needed)
19846 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19848 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19851 if (label && size_needed == 1)
19853 emit_label (label);
19854 LABEL_NUSES (label) = 1;
19856 promoted_val = val_exp;
19857 epilogue_size_needed = 1;
19859 else if (label == NULL_RTX)
19860 epilogue_size_needed = size_needed;
19862 /* Step 3: Main loop. */
19868 gcc_unreachable ();
19870 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19871 count_exp, QImode, 1, expected_size);
19874 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19875 count_exp, Pmode, 1, expected_size);
19877 case unrolled_loop:
19878 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19879 count_exp, Pmode, 4, expected_size);
19881 case rep_prefix_8_byte:
19882 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19885 case rep_prefix_4_byte:
19886 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19889 case rep_prefix_1_byte:
19890 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19894 /* Adjust properly the offset of src and dest memory for aliasing. */
19895 if (CONST_INT_P (count_exp))
19896 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19897 (count / size_needed) * size_needed);
19899 dst = change_address (dst, BLKmode, destreg);
19901 /* Step 4: Epilogue to copy the remaining bytes. */
19905 /* When the main loop is done, COUNT_EXP might hold original count,
19906 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19907 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19908 bytes. Compensate if needed. */
19910 if (size_needed < epilogue_size_needed)
19913 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19914 GEN_INT (size_needed - 1), count_exp, 1,
19916 if (tmp != count_exp)
19917 emit_move_insn (count_exp, tmp);
19919 emit_label (label);
19920 LABEL_NUSES (label) = 1;
19923 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19925 if (force_loopy_epilogue)
19926 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
19927 epilogue_size_needed);
19929 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
19930 epilogue_size_needed);
19932 if (jump_around_label)
19933 emit_label (jump_around_label);
19937 /* Expand the appropriate insns for doing strlen if not just doing
19940 out = result, initialized with the start address
19941 align_rtx = alignment of the address.
19942 scratch = scratch register, initialized with the startaddress when
19943 not aligned, otherwise undefined
19945 This is just the body. It needs the initializations mentioned above and
19946 some address computing at the end. These things are done in i386.md. */
19949 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
19953 rtx align_2_label = NULL_RTX;
19954 rtx align_3_label = NULL_RTX;
19955 rtx align_4_label = gen_label_rtx ();
19956 rtx end_0_label = gen_label_rtx ();
19958 rtx tmpreg = gen_reg_rtx (SImode);
19959 rtx scratch = gen_reg_rtx (SImode);
19963 if (CONST_INT_P (align_rtx))
19964 align = INTVAL (align_rtx);
19966 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
19968 /* Is there a known alignment and is it less than 4? */
19971 rtx scratch1 = gen_reg_rtx (Pmode);
19972 emit_move_insn (scratch1, out);
19973 /* Is there a known alignment and is it not 2? */
19976 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
19977 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
19979 /* Leave just the 3 lower bits. */
19980 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
19981 NULL_RTX, 0, OPTAB_WIDEN);
19983 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19984 Pmode, 1, align_4_label);
19985 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
19986 Pmode, 1, align_2_label);
19987 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
19988 Pmode, 1, align_3_label);
19992 /* Since the alignment is 2, we have to check 2 or 0 bytes;
19993 check if is aligned to 4 - byte. */
19995 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
19996 NULL_RTX, 0, OPTAB_WIDEN);
19998 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19999 Pmode, 1, align_4_label);
20002 mem = change_address (src, QImode, out);
20004 /* Now compare the bytes. */
20006 /* Compare the first n unaligned byte on a byte per byte basis. */
20007 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
20008 QImode, 1, end_0_label);
20010 /* Increment the address. */
20011 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20013 /* Not needed with an alignment of 2 */
20016 emit_label (align_2_label);
20018 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20021 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20023 emit_label (align_3_label);
20026 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20029 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20032 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20033 align this loop. It gives only huge programs, but does not help to
20035 emit_label (align_4_label);
20037 mem = change_address (src, SImode, out);
20038 emit_move_insn (scratch, mem);
20039 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20041 /* This formula yields a nonzero result iff one of the bytes is zero.
20042 This saves three branches inside loop and many cycles. */
20044 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20045 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20046 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20047 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20048 gen_int_mode (0x80808080, SImode)));
20049 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20054 rtx reg = gen_reg_rtx (SImode);
20055 rtx reg2 = gen_reg_rtx (Pmode);
20056 emit_move_insn (reg, tmpreg);
20057 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20059 /* If zero is not in the first two bytes, move two bytes forward. */
20060 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20061 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20062 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20063 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20064 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20067 /* Emit lea manually to avoid clobbering of flags. */
20068 emit_insn (gen_rtx_SET (SImode, reg2,
20069 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20071 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20072 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20073 emit_insn (gen_rtx_SET (VOIDmode, out,
20074 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20080 rtx end_2_label = gen_label_rtx ();
20081 /* Is zero in the first two bytes? */
20083 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20084 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20085 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20086 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20087 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20089 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20090 JUMP_LABEL (tmp) = end_2_label;
20092 /* Not in the first two. Move two bytes forward. */
20093 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20094 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20096 emit_label (end_2_label);
20100 /* Avoid branch in fixing the byte. */
20101 tmpreg = gen_lowpart (QImode, tmpreg);
20102 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20103 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20104 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20105 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20107 emit_label (end_0_label);
20110 /* Expand strlen. */
20113 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20115 rtx addr, scratch1, scratch2, scratch3, scratch4;
20117 /* The generic case of strlen expander is long. Avoid it's
20118 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20120 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20121 && !TARGET_INLINE_ALL_STRINGOPS
20122 && !optimize_insn_for_size_p ()
20123 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20126 addr = force_reg (Pmode, XEXP (src, 0));
20127 scratch1 = gen_reg_rtx (Pmode);
20129 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20130 && !optimize_insn_for_size_p ())
20132 /* Well it seems that some optimizer does not combine a call like
20133 foo(strlen(bar), strlen(bar));
20134 when the move and the subtraction is done here. It does calculate
20135 the length just once when these instructions are done inside of
20136 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20137 often used and I use one fewer register for the lifetime of
20138 output_strlen_unroll() this is better. */
20140 emit_move_insn (out, addr);
20142 ix86_expand_strlensi_unroll_1 (out, src, align);
20144 /* strlensi_unroll_1 returns the address of the zero at the end of
20145 the string, like memchr(), so compute the length by subtracting
20146 the start address. */
20147 emit_insn (ix86_gen_sub3 (out, out, addr));
20153 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20154 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20157 scratch2 = gen_reg_rtx (Pmode);
20158 scratch3 = gen_reg_rtx (Pmode);
20159 scratch4 = force_reg (Pmode, constm1_rtx);
20161 emit_move_insn (scratch3, addr);
20162 eoschar = force_reg (QImode, eoschar);
20164 src = replace_equiv_address_nv (src, scratch3);
20166 /* If .md starts supporting :P, this can be done in .md. */
20167 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20168 scratch4), UNSPEC_SCAS);
20169 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20170 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20171 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20176 /* For given symbol (function) construct code to compute address of it's PLT
20177 entry in large x86-64 PIC model. */
20179 construct_plt_address (rtx symbol)
20181 rtx tmp = gen_reg_rtx (Pmode);
20182 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20184 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20185 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20187 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20188 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20193 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20195 rtx pop, int sibcall)
20197 rtx use = NULL, call;
20199 if (pop == const0_rtx)
20201 gcc_assert (!TARGET_64BIT || !pop);
20203 if (TARGET_MACHO && !TARGET_64BIT)
20206 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20207 fnaddr = machopic_indirect_call_target (fnaddr);
20212 /* Static functions and indirect calls don't need the pic register. */
20213 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20214 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20215 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20216 use_reg (&use, pic_offset_table_rtx);
20219 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20221 rtx al = gen_rtx_REG (QImode, AX_REG);
20222 emit_move_insn (al, callarg2);
20223 use_reg (&use, al);
20226 if (ix86_cmodel == CM_LARGE_PIC
20228 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20229 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20230 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20232 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20233 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20235 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20236 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20239 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20241 call = gen_rtx_SET (VOIDmode, retval, call);
20244 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20245 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20246 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20249 && ix86_cfun_abi () == MS_ABI
20250 && (!callarg2 || INTVAL (callarg2) != -2))
20252 /* We need to represent that SI and DI registers are clobbered
20254 static int clobbered_registers[] = {
20255 XMM6_REG, XMM7_REG, XMM8_REG,
20256 XMM9_REG, XMM10_REG, XMM11_REG,
20257 XMM12_REG, XMM13_REG, XMM14_REG,
20258 XMM15_REG, SI_REG, DI_REG
20261 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20262 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20263 UNSPEC_MS_TO_SYSV_CALL);
20267 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20268 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20271 (SSE_REGNO_P (clobbered_registers[i])
20273 clobbered_registers[i]));
20275 call = gen_rtx_PARALLEL (VOIDmode,
20276 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20280 call = emit_call_insn (call);
20282 CALL_INSN_FUNCTION_USAGE (call) = use;
20286 /* Clear stack slot assignments remembered from previous functions.
20287 This is called from INIT_EXPANDERS once before RTL is emitted for each
20290 static struct machine_function *
20291 ix86_init_machine_status (void)
20293 struct machine_function *f;
20295 f = ggc_alloc_cleared_machine_function ();
20296 f->use_fast_prologue_epilogue_nregs = -1;
20297 f->tls_descriptor_call_expanded_p = 0;
20298 f->call_abi = ix86_abi;
20303 /* Return a MEM corresponding to a stack slot with mode MODE.
20304 Allocate a new slot if necessary.
20306 The RTL for a function can have several slots available: N is
20307 which slot to use. */
20310 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20312 struct stack_local_entry *s;
20314 gcc_assert (n < MAX_386_STACK_LOCALS);
20316 /* Virtual slot is valid only before vregs are instantiated. */
20317 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20319 for (s = ix86_stack_locals; s; s = s->next)
20320 if (s->mode == mode && s->n == n)
20321 return copy_rtx (s->rtl);
20323 s = ggc_alloc_stack_local_entry ();
20326 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20328 s->next = ix86_stack_locals;
20329 ix86_stack_locals = s;
20333 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20335 static GTY(()) rtx ix86_tls_symbol;
20337 ix86_tls_get_addr (void)
20340 if (!ix86_tls_symbol)
20342 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20343 (TARGET_ANY_GNU_TLS
20345 ? "___tls_get_addr"
20346 : "__tls_get_addr");
20349 return ix86_tls_symbol;
20352 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20354 static GTY(()) rtx ix86_tls_module_base_symbol;
20356 ix86_tls_module_base (void)
20359 if (!ix86_tls_module_base_symbol)
20361 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20362 "_TLS_MODULE_BASE_");
20363 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20364 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20367 return ix86_tls_module_base_symbol;
20370 /* Calculate the length of the memory address in the instruction
20371 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20374 memory_address_length (rtx addr)
20376 struct ix86_address parts;
20377 rtx base, index, disp;
20381 if (GET_CODE (addr) == PRE_DEC
20382 || GET_CODE (addr) == POST_INC
20383 || GET_CODE (addr) == PRE_MODIFY
20384 || GET_CODE (addr) == POST_MODIFY)
20387 ok = ix86_decompose_address (addr, &parts);
20390 if (parts.base && GET_CODE (parts.base) == SUBREG)
20391 parts.base = SUBREG_REG (parts.base);
20392 if (parts.index && GET_CODE (parts.index) == SUBREG)
20393 parts.index = SUBREG_REG (parts.index);
20396 index = parts.index;
20401 - esp as the base always wants an index,
20402 - ebp as the base always wants a displacement,
20403 - r12 as the base always wants an index,
20404 - r13 as the base always wants a displacement. */
20406 /* Register Indirect. */
20407 if (base && !index && !disp)
20409 /* esp (for its index) and ebp (for its displacement) need
20410 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20413 && (addr == arg_pointer_rtx
20414 || addr == frame_pointer_rtx
20415 || REGNO (addr) == SP_REG
20416 || REGNO (addr) == BP_REG
20417 || REGNO (addr) == R12_REG
20418 || REGNO (addr) == R13_REG))
20422 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20423 is not disp32, but disp32(%rip), so for disp32
20424 SIB byte is needed, unless print_operand_address
20425 optimizes it into disp32(%rip) or (%rip) is implied
20427 else if (disp && !base && !index)
20434 if (GET_CODE (disp) == CONST)
20435 symbol = XEXP (disp, 0);
20436 if (GET_CODE (symbol) == PLUS
20437 && CONST_INT_P (XEXP (symbol, 1)))
20438 symbol = XEXP (symbol, 0);
20440 if (GET_CODE (symbol) != LABEL_REF
20441 && (GET_CODE (symbol) != SYMBOL_REF
20442 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20443 && (GET_CODE (symbol) != UNSPEC
20444 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20445 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20452 /* Find the length of the displacement constant. */
20455 if (base && satisfies_constraint_K (disp))
20460 /* ebp always wants a displacement. Similarly r13. */
20461 else if (base && REG_P (base)
20462 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20465 /* An index requires the two-byte modrm form.... */
20467 /* ...like esp (or r12), which always wants an index. */
20468 || base == arg_pointer_rtx
20469 || base == frame_pointer_rtx
20470 || (base && REG_P (base)
20471 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
20488 /* Compute default value for "length_immediate" attribute. When SHORTFORM
20489 is set, expect that insn have 8bit immediate alternative. */
20491 ix86_attr_length_immediate_default (rtx insn, int shortform)
20495 extract_insn_cached (insn);
20496 for (i = recog_data.n_operands - 1; i >= 0; --i)
20497 if (CONSTANT_P (recog_data.operand[i]))
20499 enum attr_mode mode = get_attr_mode (insn);
20502 if (shortform && CONST_INT_P (recog_data.operand[i]))
20504 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
20511 ival = trunc_int_for_mode (ival, HImode);
20514 ival = trunc_int_for_mode (ival, SImode);
20519 if (IN_RANGE (ival, -128, 127))
20536 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
20541 fatal_insn ("unknown insn mode", insn);
20546 /* Compute default value for "length_address" attribute. */
20548 ix86_attr_length_address_default (rtx insn)
20552 if (get_attr_type (insn) == TYPE_LEA)
20554 rtx set = PATTERN (insn), addr;
20556 if (GET_CODE (set) == PARALLEL)
20557 set = XVECEXP (set, 0, 0);
20559 gcc_assert (GET_CODE (set) == SET);
20561 addr = SET_SRC (set);
20562 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
20564 if (GET_CODE (addr) == ZERO_EXTEND)
20565 addr = XEXP (addr, 0);
20566 if (GET_CODE (addr) == SUBREG)
20567 addr = SUBREG_REG (addr);
20570 return memory_address_length (addr);
20573 extract_insn_cached (insn);
20574 for (i = recog_data.n_operands - 1; i >= 0; --i)
20575 if (MEM_P (recog_data.operand[i]))
20577 constrain_operands_cached (reload_completed);
20578 if (which_alternative != -1)
20580 const char *constraints = recog_data.constraints[i];
20581 int alt = which_alternative;
20583 while (*constraints == '=' || *constraints == '+')
20586 while (*constraints++ != ',')
20588 /* Skip ignored operands. */
20589 if (*constraints == 'X')
20592 return memory_address_length (XEXP (recog_data.operand[i], 0));
20597 /* Compute default value for "length_vex" attribute. It includes
20598 2 or 3 byte VEX prefix and 1 opcode byte. */
20601 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
20606 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
20607 byte VEX prefix. */
20608 if (!has_0f_opcode || has_vex_w)
20611 /* We can always use 2 byte VEX prefix in 32bit. */
20615 extract_insn_cached (insn);
20617 for (i = recog_data.n_operands - 1; i >= 0; --i)
20618 if (REG_P (recog_data.operand[i]))
20620 /* REX.W bit uses 3 byte VEX prefix. */
20621 if (GET_MODE (recog_data.operand[i]) == DImode
20622 && GENERAL_REG_P (recog_data.operand[i]))
20627 /* REX.X or REX.B bits use 3 byte VEX prefix. */
20628 if (MEM_P (recog_data.operand[i])
20629 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
20636 /* Return the maximum number of instructions a cpu can issue. */
20639 ix86_issue_rate (void)
20643 case PROCESSOR_PENTIUM:
20644 case PROCESSOR_ATOM:
20648 case PROCESSOR_PENTIUMPRO:
20649 case PROCESSOR_PENTIUM4:
20650 case PROCESSOR_ATHLON:
20652 case PROCESSOR_AMDFAM10:
20653 case PROCESSOR_NOCONA:
20654 case PROCESSOR_GENERIC32:
20655 case PROCESSOR_GENERIC64:
20656 case PROCESSOR_BDVER1:
20659 case PROCESSOR_CORE2:
20667 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
20668 by DEP_INSN and nothing set by DEP_INSN. */
20671 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
20675 /* Simplify the test for uninteresting insns. */
20676 if (insn_type != TYPE_SETCC
20677 && insn_type != TYPE_ICMOV
20678 && insn_type != TYPE_FCMOV
20679 && insn_type != TYPE_IBR)
20682 if ((set = single_set (dep_insn)) != 0)
20684 set = SET_DEST (set);
20687 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
20688 && XVECLEN (PATTERN (dep_insn), 0) == 2
20689 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
20690 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
20692 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20693 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20698 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
20701 /* This test is true if the dependent insn reads the flags but
20702 not any other potentially set register. */
20703 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
20706 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
20712 /* Return true iff USE_INSN has a memory address with operands set by
20716 ix86_agi_dependent (rtx set_insn, rtx use_insn)
20719 extract_insn_cached (use_insn);
20720 for (i = recog_data.n_operands - 1; i >= 0; --i)
20721 if (MEM_P (recog_data.operand[i]))
20723 rtx addr = XEXP (recog_data.operand[i], 0);
20724 return modified_in_p (addr, set_insn) != 0;
20730 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
20732 enum attr_type insn_type, dep_insn_type;
20733 enum attr_memory memory;
20735 int dep_insn_code_number;
20737 /* Anti and output dependencies have zero cost on all CPUs. */
20738 if (REG_NOTE_KIND (link) != 0)
20741 dep_insn_code_number = recog_memoized (dep_insn);
20743 /* If we can't recognize the insns, we can't really do anything. */
20744 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
20747 insn_type = get_attr_type (insn);
20748 dep_insn_type = get_attr_type (dep_insn);
20752 case PROCESSOR_PENTIUM:
20753 /* Address Generation Interlock adds a cycle of latency. */
20754 if (insn_type == TYPE_LEA)
20756 rtx addr = PATTERN (insn);
20758 if (GET_CODE (addr) == PARALLEL)
20759 addr = XVECEXP (addr, 0, 0);
20761 gcc_assert (GET_CODE (addr) == SET);
20763 addr = SET_SRC (addr);
20764 if (modified_in_p (addr, dep_insn))
20767 else if (ix86_agi_dependent (dep_insn, insn))
20770 /* ??? Compares pair with jump/setcc. */
20771 if (ix86_flags_dependent (insn, dep_insn, insn_type))
20774 /* Floating point stores require value to be ready one cycle earlier. */
20775 if (insn_type == TYPE_FMOV
20776 && get_attr_memory (insn) == MEMORY_STORE
20777 && !ix86_agi_dependent (dep_insn, insn))
20781 case PROCESSOR_PENTIUMPRO:
20782 memory = get_attr_memory (insn);
20784 /* INT->FP conversion is expensive. */
20785 if (get_attr_fp_int_src (dep_insn))
20788 /* There is one cycle extra latency between an FP op and a store. */
20789 if (insn_type == TYPE_FMOV
20790 && (set = single_set (dep_insn)) != NULL_RTX
20791 && (set2 = single_set (insn)) != NULL_RTX
20792 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
20793 && MEM_P (SET_DEST (set2)))
20796 /* Show ability of reorder buffer to hide latency of load by executing
20797 in parallel with previous instruction in case
20798 previous instruction is not needed to compute the address. */
20799 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20800 && !ix86_agi_dependent (dep_insn, insn))
20802 /* Claim moves to take one cycle, as core can issue one load
20803 at time and the next load can start cycle later. */
20804 if (dep_insn_type == TYPE_IMOV
20805 || dep_insn_type == TYPE_FMOV)
20813 memory = get_attr_memory (insn);
20815 /* The esp dependency is resolved before the instruction is really
20817 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
20818 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
20821 /* INT->FP conversion is expensive. */
20822 if (get_attr_fp_int_src (dep_insn))
20825 /* Show ability of reorder buffer to hide latency of load by executing
20826 in parallel with previous instruction in case
20827 previous instruction is not needed to compute the address. */
20828 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20829 && !ix86_agi_dependent (dep_insn, insn))
20831 /* Claim moves to take one cycle, as core can issue one load
20832 at time and the next load can start cycle later. */
20833 if (dep_insn_type == TYPE_IMOV
20834 || dep_insn_type == TYPE_FMOV)
20843 case PROCESSOR_ATHLON:
20845 case PROCESSOR_AMDFAM10:
20846 case PROCESSOR_BDVER1:
20847 case PROCESSOR_ATOM:
20848 case PROCESSOR_GENERIC32:
20849 case PROCESSOR_GENERIC64:
20850 memory = get_attr_memory (insn);
20852 /* Show ability of reorder buffer to hide latency of load by executing
20853 in parallel with previous instruction in case
20854 previous instruction is not needed to compute the address. */
20855 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20856 && !ix86_agi_dependent (dep_insn, insn))
20858 enum attr_unit unit = get_attr_unit (insn);
20861 /* Because of the difference between the length of integer and
20862 floating unit pipeline preparation stages, the memory operands
20863 for floating point are cheaper.
20865 ??? For Athlon it the difference is most probably 2. */
20866 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
20869 loadcost = TARGET_ATHLON ? 2 : 0;
20871 if (cost >= loadcost)
20884 /* How many alternative schedules to try. This should be as wide as the
20885 scheduling freedom in the DFA, but no wider. Making this value too
20886 large results extra work for the scheduler. */
20889 ia32_multipass_dfa_lookahead (void)
20893 case PROCESSOR_PENTIUM:
20896 case PROCESSOR_PENTIUMPRO:
20906 /* Compute the alignment given to a constant that is being placed in memory.
20907 EXP is the constant and ALIGN is the alignment that the object would
20909 The value of this function is used instead of that alignment to align
20913 ix86_constant_alignment (tree exp, int align)
20915 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
20916 || TREE_CODE (exp) == INTEGER_CST)
20918 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
20920 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
20923 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
20924 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
20925 return BITS_PER_WORD;
20930 /* Compute the alignment for a static variable.
20931 TYPE is the data type, and ALIGN is the alignment that
20932 the object would ordinarily have. The value of this function is used
20933 instead of that alignment to align the object. */
20936 ix86_data_alignment (tree type, int align)
20938 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
20940 if (AGGREGATE_TYPE_P (type)
20941 && TYPE_SIZE (type)
20942 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20943 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
20944 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
20945 && align < max_align)
20948 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20949 to 16byte boundary. */
20952 if (AGGREGATE_TYPE_P (type)
20953 && TYPE_SIZE (type)
20954 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20955 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
20956 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20960 if (TREE_CODE (type) == ARRAY_TYPE)
20962 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20964 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20967 else if (TREE_CODE (type) == COMPLEX_TYPE)
20970 if (TYPE_MODE (type) == DCmode && align < 64)
20972 if ((TYPE_MODE (type) == XCmode
20973 || TYPE_MODE (type) == TCmode) && align < 128)
20976 else if ((TREE_CODE (type) == RECORD_TYPE
20977 || TREE_CODE (type) == UNION_TYPE
20978 || TREE_CODE (type) == QUAL_UNION_TYPE)
20979 && TYPE_FIELDS (type))
20981 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20983 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20986 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20987 || TREE_CODE (type) == INTEGER_TYPE)
20989 if (TYPE_MODE (type) == DFmode && align < 64)
20991 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20998 /* Compute the alignment for a local variable or a stack slot. EXP is
20999 the data type or decl itself, MODE is the widest mode available and
21000 ALIGN is the alignment that the object would ordinarily have. The
21001 value of this macro is used instead of that alignment to align the
21005 ix86_local_alignment (tree exp, enum machine_mode mode,
21006 unsigned int align)
21010 if (exp && DECL_P (exp))
21012 type = TREE_TYPE (exp);
21021 /* Don't do dynamic stack realignment for long long objects with
21022 -mpreferred-stack-boundary=2. */
21025 && ix86_preferred_stack_boundary < 64
21026 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21027 && (!type || !TYPE_USER_ALIGN (type))
21028 && (!decl || !DECL_USER_ALIGN (decl)))
21031 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21032 register in MODE. We will return the largest alignment of XF
21036 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21037 align = GET_MODE_ALIGNMENT (DFmode);
21041 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21042 to 16byte boundary. Exact wording is:
21044 An array uses the same alignment as its elements, except that a local or
21045 global array variable of length at least 16 bytes or
21046 a C99 variable-length array variable always has alignment of at least 16 bytes.
21048 This was added to allow use of aligned SSE instructions at arrays. This
21049 rule is meant for static storage (where compiler can not do the analysis
21050 by itself). We follow it for automatic variables only when convenient.
21051 We fully control everything in the function compiled and functions from
21052 other unit can not rely on the alignment.
21054 Exclude va_list type. It is the common case of local array where
21055 we can not benefit from the alignment. */
21056 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21059 if (AGGREGATE_TYPE_P (type)
21060 && (TYPE_MAIN_VARIANT (type)
21061 != TYPE_MAIN_VARIANT (va_list_type_node))
21062 && TYPE_SIZE (type)
21063 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21064 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21065 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21068 if (TREE_CODE (type) == ARRAY_TYPE)
21070 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21072 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21075 else if (TREE_CODE (type) == COMPLEX_TYPE)
21077 if (TYPE_MODE (type) == DCmode && align < 64)
21079 if ((TYPE_MODE (type) == XCmode
21080 || TYPE_MODE (type) == TCmode) && align < 128)
21083 else if ((TREE_CODE (type) == RECORD_TYPE
21084 || TREE_CODE (type) == UNION_TYPE
21085 || TREE_CODE (type) == QUAL_UNION_TYPE)
21086 && TYPE_FIELDS (type))
21088 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21090 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21093 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21094 || TREE_CODE (type) == INTEGER_TYPE)
21097 if (TYPE_MODE (type) == DFmode && align < 64)
21099 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21105 /* Compute the minimum required alignment for dynamic stack realignment
21106 purposes for a local variable, parameter or a stack slot. EXP is
21107 the data type or decl itself, MODE is its mode and ALIGN is the
21108 alignment that the object would ordinarily have. */
21111 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21112 unsigned int align)
21116 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21119 if (exp && DECL_P (exp))
21121 type = TREE_TYPE (exp);
21130 /* Don't do dynamic stack realignment for long long objects with
21131 -mpreferred-stack-boundary=2. */
21132 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21133 && (!type || !TYPE_USER_ALIGN (type))
21134 && (!decl || !DECL_USER_ALIGN (decl)))
21140 /* Find a location for the static chain incoming to a nested function.
21141 This is a register, unless all free registers are used by arguments. */
21144 ix86_static_chain (const_tree fndecl, bool incoming_p)
21148 if (!DECL_STATIC_CHAIN (fndecl))
21153 /* We always use R10 in 64-bit mode. */
21159 /* By default in 32-bit mode we use ECX to pass the static chain. */
21162 fntype = TREE_TYPE (fndecl);
21163 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21165 /* Fastcall functions use ecx/edx for arguments, which leaves
21166 us with EAX for the static chain. */
21169 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21171 /* Thiscall functions use ecx for arguments, which leaves
21172 us with EAX for the static chain. */
21175 else if (ix86_function_regparm (fntype, fndecl) == 3)
21177 /* For regparm 3, we have no free call-clobbered registers in
21178 which to store the static chain. In order to implement this,
21179 we have the trampoline push the static chain to the stack.
21180 However, we can't push a value below the return address when
21181 we call the nested function directly, so we have to use an
21182 alternate entry point. For this we use ESI, and have the
21183 alternate entry point push ESI, so that things appear the
21184 same once we're executing the nested function. */
21187 if (fndecl == current_function_decl)
21188 ix86_static_chain_on_stack = true;
21189 return gen_frame_mem (SImode,
21190 plus_constant (arg_pointer_rtx, -8));
21196 return gen_rtx_REG (Pmode, regno);
21199 /* Emit RTL insns to initialize the variable parts of a trampoline.
21200 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21201 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21202 to be passed to the target function. */
21205 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21209 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21216 /* Depending on the static chain location, either load a register
21217 with a constant, or push the constant to the stack. All of the
21218 instructions are the same size. */
21219 chain = ix86_static_chain (fndecl, true);
21222 if (REGNO (chain) == CX_REG)
21224 else if (REGNO (chain) == AX_REG)
21227 gcc_unreachable ();
21232 mem = adjust_address (m_tramp, QImode, 0);
21233 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21235 mem = adjust_address (m_tramp, SImode, 1);
21236 emit_move_insn (mem, chain_value);
21238 /* Compute offset from the end of the jmp to the target function.
21239 In the case in which the trampoline stores the static chain on
21240 the stack, we need to skip the first insn which pushes the
21241 (call-saved) register static chain; this push is 1 byte. */
21242 disp = expand_binop (SImode, sub_optab, fnaddr,
21243 plus_constant (XEXP (m_tramp, 0),
21244 MEM_P (chain) ? 9 : 10),
21245 NULL_RTX, 1, OPTAB_DIRECT);
21247 mem = adjust_address (m_tramp, QImode, 5);
21248 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21250 mem = adjust_address (m_tramp, SImode, 6);
21251 emit_move_insn (mem, disp);
21257 /* Load the function address to r11. Try to load address using
21258 the shorter movl instead of movabs. We may want to support
21259 movq for kernel mode, but kernel does not use trampolines at
21261 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21263 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21265 mem = adjust_address (m_tramp, HImode, offset);
21266 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21268 mem = adjust_address (m_tramp, SImode, offset + 2);
21269 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21274 mem = adjust_address (m_tramp, HImode, offset);
21275 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21277 mem = adjust_address (m_tramp, DImode, offset + 2);
21278 emit_move_insn (mem, fnaddr);
21282 /* Load static chain using movabs to r10. */
21283 mem = adjust_address (m_tramp, HImode, offset);
21284 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21286 mem = adjust_address (m_tramp, DImode, offset + 2);
21287 emit_move_insn (mem, chain_value);
21290 /* Jump to r11; the last (unused) byte is a nop, only there to
21291 pad the write out to a single 32-bit store. */
21292 mem = adjust_address (m_tramp, SImode, offset);
21293 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21296 gcc_assert (offset <= TRAMPOLINE_SIZE);
21299 #ifdef ENABLE_EXECUTE_STACK
21300 #ifdef CHECK_EXECUTE_STACK_ENABLED
21301 if (CHECK_EXECUTE_STACK_ENABLED)
21303 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21304 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21308 /* The following file contains several enumerations and data structures
21309 built from the definitions in i386-builtin-types.def. */
21311 #include "i386-builtin-types.inc"
21313 /* Table for the ix86 builtin non-function types. */
21314 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21316 /* Retrieve an element from the above table, building some of
21317 the types lazily. */
21320 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21322 unsigned int index;
21325 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21327 type = ix86_builtin_type_tab[(int) tcode];
21331 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21332 if (tcode <= IX86_BT_LAST_VECT)
21334 enum machine_mode mode;
21336 index = tcode - IX86_BT_LAST_PRIM - 1;
21337 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21338 mode = ix86_builtin_type_vect_mode[index];
21340 type = build_vector_type_for_mode (itype, mode);
21346 index = tcode - IX86_BT_LAST_VECT - 1;
21347 if (tcode <= IX86_BT_LAST_PTR)
21348 quals = TYPE_UNQUALIFIED;
21350 quals = TYPE_QUAL_CONST;
21352 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21353 if (quals != TYPE_UNQUALIFIED)
21354 itype = build_qualified_type (itype, quals);
21356 type = build_pointer_type (itype);
21359 ix86_builtin_type_tab[(int) tcode] = type;
21363 /* Table for the ix86 builtin function types. */
21364 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21366 /* Retrieve an element from the above table, building some of
21367 the types lazily. */
21370 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21374 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21376 type = ix86_builtin_func_type_tab[(int) tcode];
21380 if (tcode <= IX86_BT_LAST_FUNC)
21382 unsigned start = ix86_builtin_func_start[(int) tcode];
21383 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21384 tree rtype, atype, args = void_list_node;
21387 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21388 for (i = after - 1; i > start; --i)
21390 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21391 args = tree_cons (NULL, atype, args);
21394 type = build_function_type (rtype, args);
21398 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21399 enum ix86_builtin_func_type icode;
21401 icode = ix86_builtin_func_alias_base[index];
21402 type = ix86_get_builtin_func_type (icode);
21405 ix86_builtin_func_type_tab[(int) tcode] = type;
21410 /* Codes for all the SSE/MMX builtins. */
21413 IX86_BUILTIN_ADDPS,
21414 IX86_BUILTIN_ADDSS,
21415 IX86_BUILTIN_DIVPS,
21416 IX86_BUILTIN_DIVSS,
21417 IX86_BUILTIN_MULPS,
21418 IX86_BUILTIN_MULSS,
21419 IX86_BUILTIN_SUBPS,
21420 IX86_BUILTIN_SUBSS,
21422 IX86_BUILTIN_CMPEQPS,
21423 IX86_BUILTIN_CMPLTPS,
21424 IX86_BUILTIN_CMPLEPS,
21425 IX86_BUILTIN_CMPGTPS,
21426 IX86_BUILTIN_CMPGEPS,
21427 IX86_BUILTIN_CMPNEQPS,
21428 IX86_BUILTIN_CMPNLTPS,
21429 IX86_BUILTIN_CMPNLEPS,
21430 IX86_BUILTIN_CMPNGTPS,
21431 IX86_BUILTIN_CMPNGEPS,
21432 IX86_BUILTIN_CMPORDPS,
21433 IX86_BUILTIN_CMPUNORDPS,
21434 IX86_BUILTIN_CMPEQSS,
21435 IX86_BUILTIN_CMPLTSS,
21436 IX86_BUILTIN_CMPLESS,
21437 IX86_BUILTIN_CMPNEQSS,
21438 IX86_BUILTIN_CMPNLTSS,
21439 IX86_BUILTIN_CMPNLESS,
21440 IX86_BUILTIN_CMPNGTSS,
21441 IX86_BUILTIN_CMPNGESS,
21442 IX86_BUILTIN_CMPORDSS,
21443 IX86_BUILTIN_CMPUNORDSS,
21445 IX86_BUILTIN_COMIEQSS,
21446 IX86_BUILTIN_COMILTSS,
21447 IX86_BUILTIN_COMILESS,
21448 IX86_BUILTIN_COMIGTSS,
21449 IX86_BUILTIN_COMIGESS,
21450 IX86_BUILTIN_COMINEQSS,
21451 IX86_BUILTIN_UCOMIEQSS,
21452 IX86_BUILTIN_UCOMILTSS,
21453 IX86_BUILTIN_UCOMILESS,
21454 IX86_BUILTIN_UCOMIGTSS,
21455 IX86_BUILTIN_UCOMIGESS,
21456 IX86_BUILTIN_UCOMINEQSS,
21458 IX86_BUILTIN_CVTPI2PS,
21459 IX86_BUILTIN_CVTPS2PI,
21460 IX86_BUILTIN_CVTSI2SS,
21461 IX86_BUILTIN_CVTSI642SS,
21462 IX86_BUILTIN_CVTSS2SI,
21463 IX86_BUILTIN_CVTSS2SI64,
21464 IX86_BUILTIN_CVTTPS2PI,
21465 IX86_BUILTIN_CVTTSS2SI,
21466 IX86_BUILTIN_CVTTSS2SI64,
21468 IX86_BUILTIN_MAXPS,
21469 IX86_BUILTIN_MAXSS,
21470 IX86_BUILTIN_MINPS,
21471 IX86_BUILTIN_MINSS,
21473 IX86_BUILTIN_LOADUPS,
21474 IX86_BUILTIN_STOREUPS,
21475 IX86_BUILTIN_MOVSS,
21477 IX86_BUILTIN_MOVHLPS,
21478 IX86_BUILTIN_MOVLHPS,
21479 IX86_BUILTIN_LOADHPS,
21480 IX86_BUILTIN_LOADLPS,
21481 IX86_BUILTIN_STOREHPS,
21482 IX86_BUILTIN_STORELPS,
21484 IX86_BUILTIN_MASKMOVQ,
21485 IX86_BUILTIN_MOVMSKPS,
21486 IX86_BUILTIN_PMOVMSKB,
21488 IX86_BUILTIN_MOVNTPS,
21489 IX86_BUILTIN_MOVNTQ,
21491 IX86_BUILTIN_LOADDQU,
21492 IX86_BUILTIN_STOREDQU,
21494 IX86_BUILTIN_PACKSSWB,
21495 IX86_BUILTIN_PACKSSDW,
21496 IX86_BUILTIN_PACKUSWB,
21498 IX86_BUILTIN_PADDB,
21499 IX86_BUILTIN_PADDW,
21500 IX86_BUILTIN_PADDD,
21501 IX86_BUILTIN_PADDQ,
21502 IX86_BUILTIN_PADDSB,
21503 IX86_BUILTIN_PADDSW,
21504 IX86_BUILTIN_PADDUSB,
21505 IX86_BUILTIN_PADDUSW,
21506 IX86_BUILTIN_PSUBB,
21507 IX86_BUILTIN_PSUBW,
21508 IX86_BUILTIN_PSUBD,
21509 IX86_BUILTIN_PSUBQ,
21510 IX86_BUILTIN_PSUBSB,
21511 IX86_BUILTIN_PSUBSW,
21512 IX86_BUILTIN_PSUBUSB,
21513 IX86_BUILTIN_PSUBUSW,
21516 IX86_BUILTIN_PANDN,
21520 IX86_BUILTIN_PAVGB,
21521 IX86_BUILTIN_PAVGW,
21523 IX86_BUILTIN_PCMPEQB,
21524 IX86_BUILTIN_PCMPEQW,
21525 IX86_BUILTIN_PCMPEQD,
21526 IX86_BUILTIN_PCMPGTB,
21527 IX86_BUILTIN_PCMPGTW,
21528 IX86_BUILTIN_PCMPGTD,
21530 IX86_BUILTIN_PMADDWD,
21532 IX86_BUILTIN_PMAXSW,
21533 IX86_BUILTIN_PMAXUB,
21534 IX86_BUILTIN_PMINSW,
21535 IX86_BUILTIN_PMINUB,
21537 IX86_BUILTIN_PMULHUW,
21538 IX86_BUILTIN_PMULHW,
21539 IX86_BUILTIN_PMULLW,
21541 IX86_BUILTIN_PSADBW,
21542 IX86_BUILTIN_PSHUFW,
21544 IX86_BUILTIN_PSLLW,
21545 IX86_BUILTIN_PSLLD,
21546 IX86_BUILTIN_PSLLQ,
21547 IX86_BUILTIN_PSRAW,
21548 IX86_BUILTIN_PSRAD,
21549 IX86_BUILTIN_PSRLW,
21550 IX86_BUILTIN_PSRLD,
21551 IX86_BUILTIN_PSRLQ,
21552 IX86_BUILTIN_PSLLWI,
21553 IX86_BUILTIN_PSLLDI,
21554 IX86_BUILTIN_PSLLQI,
21555 IX86_BUILTIN_PSRAWI,
21556 IX86_BUILTIN_PSRADI,
21557 IX86_BUILTIN_PSRLWI,
21558 IX86_BUILTIN_PSRLDI,
21559 IX86_BUILTIN_PSRLQI,
21561 IX86_BUILTIN_PUNPCKHBW,
21562 IX86_BUILTIN_PUNPCKHWD,
21563 IX86_BUILTIN_PUNPCKHDQ,
21564 IX86_BUILTIN_PUNPCKLBW,
21565 IX86_BUILTIN_PUNPCKLWD,
21566 IX86_BUILTIN_PUNPCKLDQ,
21568 IX86_BUILTIN_SHUFPS,
21570 IX86_BUILTIN_RCPPS,
21571 IX86_BUILTIN_RCPSS,
21572 IX86_BUILTIN_RSQRTPS,
21573 IX86_BUILTIN_RSQRTPS_NR,
21574 IX86_BUILTIN_RSQRTSS,
21575 IX86_BUILTIN_RSQRTF,
21576 IX86_BUILTIN_SQRTPS,
21577 IX86_BUILTIN_SQRTPS_NR,
21578 IX86_BUILTIN_SQRTSS,
21580 IX86_BUILTIN_UNPCKHPS,
21581 IX86_BUILTIN_UNPCKLPS,
21583 IX86_BUILTIN_ANDPS,
21584 IX86_BUILTIN_ANDNPS,
21586 IX86_BUILTIN_XORPS,
21589 IX86_BUILTIN_LDMXCSR,
21590 IX86_BUILTIN_STMXCSR,
21591 IX86_BUILTIN_SFENCE,
21593 /* 3DNow! Original */
21594 IX86_BUILTIN_FEMMS,
21595 IX86_BUILTIN_PAVGUSB,
21596 IX86_BUILTIN_PF2ID,
21597 IX86_BUILTIN_PFACC,
21598 IX86_BUILTIN_PFADD,
21599 IX86_BUILTIN_PFCMPEQ,
21600 IX86_BUILTIN_PFCMPGE,
21601 IX86_BUILTIN_PFCMPGT,
21602 IX86_BUILTIN_PFMAX,
21603 IX86_BUILTIN_PFMIN,
21604 IX86_BUILTIN_PFMUL,
21605 IX86_BUILTIN_PFRCP,
21606 IX86_BUILTIN_PFRCPIT1,
21607 IX86_BUILTIN_PFRCPIT2,
21608 IX86_BUILTIN_PFRSQIT1,
21609 IX86_BUILTIN_PFRSQRT,
21610 IX86_BUILTIN_PFSUB,
21611 IX86_BUILTIN_PFSUBR,
21612 IX86_BUILTIN_PI2FD,
21613 IX86_BUILTIN_PMULHRW,
21615 /* 3DNow! Athlon Extensions */
21616 IX86_BUILTIN_PF2IW,
21617 IX86_BUILTIN_PFNACC,
21618 IX86_BUILTIN_PFPNACC,
21619 IX86_BUILTIN_PI2FW,
21620 IX86_BUILTIN_PSWAPDSI,
21621 IX86_BUILTIN_PSWAPDSF,
21624 IX86_BUILTIN_ADDPD,
21625 IX86_BUILTIN_ADDSD,
21626 IX86_BUILTIN_DIVPD,
21627 IX86_BUILTIN_DIVSD,
21628 IX86_BUILTIN_MULPD,
21629 IX86_BUILTIN_MULSD,
21630 IX86_BUILTIN_SUBPD,
21631 IX86_BUILTIN_SUBSD,
21633 IX86_BUILTIN_CMPEQPD,
21634 IX86_BUILTIN_CMPLTPD,
21635 IX86_BUILTIN_CMPLEPD,
21636 IX86_BUILTIN_CMPGTPD,
21637 IX86_BUILTIN_CMPGEPD,
21638 IX86_BUILTIN_CMPNEQPD,
21639 IX86_BUILTIN_CMPNLTPD,
21640 IX86_BUILTIN_CMPNLEPD,
21641 IX86_BUILTIN_CMPNGTPD,
21642 IX86_BUILTIN_CMPNGEPD,
21643 IX86_BUILTIN_CMPORDPD,
21644 IX86_BUILTIN_CMPUNORDPD,
21645 IX86_BUILTIN_CMPEQSD,
21646 IX86_BUILTIN_CMPLTSD,
21647 IX86_BUILTIN_CMPLESD,
21648 IX86_BUILTIN_CMPNEQSD,
21649 IX86_BUILTIN_CMPNLTSD,
21650 IX86_BUILTIN_CMPNLESD,
21651 IX86_BUILTIN_CMPORDSD,
21652 IX86_BUILTIN_CMPUNORDSD,
21654 IX86_BUILTIN_COMIEQSD,
21655 IX86_BUILTIN_COMILTSD,
21656 IX86_BUILTIN_COMILESD,
21657 IX86_BUILTIN_COMIGTSD,
21658 IX86_BUILTIN_COMIGESD,
21659 IX86_BUILTIN_COMINEQSD,
21660 IX86_BUILTIN_UCOMIEQSD,
21661 IX86_BUILTIN_UCOMILTSD,
21662 IX86_BUILTIN_UCOMILESD,
21663 IX86_BUILTIN_UCOMIGTSD,
21664 IX86_BUILTIN_UCOMIGESD,
21665 IX86_BUILTIN_UCOMINEQSD,
21667 IX86_BUILTIN_MAXPD,
21668 IX86_BUILTIN_MAXSD,
21669 IX86_BUILTIN_MINPD,
21670 IX86_BUILTIN_MINSD,
21672 IX86_BUILTIN_ANDPD,
21673 IX86_BUILTIN_ANDNPD,
21675 IX86_BUILTIN_XORPD,
21677 IX86_BUILTIN_SQRTPD,
21678 IX86_BUILTIN_SQRTSD,
21680 IX86_BUILTIN_UNPCKHPD,
21681 IX86_BUILTIN_UNPCKLPD,
21683 IX86_BUILTIN_SHUFPD,
21685 IX86_BUILTIN_LOADUPD,
21686 IX86_BUILTIN_STOREUPD,
21687 IX86_BUILTIN_MOVSD,
21689 IX86_BUILTIN_LOADHPD,
21690 IX86_BUILTIN_LOADLPD,
21692 IX86_BUILTIN_CVTDQ2PD,
21693 IX86_BUILTIN_CVTDQ2PS,
21695 IX86_BUILTIN_CVTPD2DQ,
21696 IX86_BUILTIN_CVTPD2PI,
21697 IX86_BUILTIN_CVTPD2PS,
21698 IX86_BUILTIN_CVTTPD2DQ,
21699 IX86_BUILTIN_CVTTPD2PI,
21701 IX86_BUILTIN_CVTPI2PD,
21702 IX86_BUILTIN_CVTSI2SD,
21703 IX86_BUILTIN_CVTSI642SD,
21705 IX86_BUILTIN_CVTSD2SI,
21706 IX86_BUILTIN_CVTSD2SI64,
21707 IX86_BUILTIN_CVTSD2SS,
21708 IX86_BUILTIN_CVTSS2SD,
21709 IX86_BUILTIN_CVTTSD2SI,
21710 IX86_BUILTIN_CVTTSD2SI64,
21712 IX86_BUILTIN_CVTPS2DQ,
21713 IX86_BUILTIN_CVTPS2PD,
21714 IX86_BUILTIN_CVTTPS2DQ,
21716 IX86_BUILTIN_MOVNTI,
21717 IX86_BUILTIN_MOVNTPD,
21718 IX86_BUILTIN_MOVNTDQ,
21720 IX86_BUILTIN_MOVQ128,
21723 IX86_BUILTIN_MASKMOVDQU,
21724 IX86_BUILTIN_MOVMSKPD,
21725 IX86_BUILTIN_PMOVMSKB128,
21727 IX86_BUILTIN_PACKSSWB128,
21728 IX86_BUILTIN_PACKSSDW128,
21729 IX86_BUILTIN_PACKUSWB128,
21731 IX86_BUILTIN_PADDB128,
21732 IX86_BUILTIN_PADDW128,
21733 IX86_BUILTIN_PADDD128,
21734 IX86_BUILTIN_PADDQ128,
21735 IX86_BUILTIN_PADDSB128,
21736 IX86_BUILTIN_PADDSW128,
21737 IX86_BUILTIN_PADDUSB128,
21738 IX86_BUILTIN_PADDUSW128,
21739 IX86_BUILTIN_PSUBB128,
21740 IX86_BUILTIN_PSUBW128,
21741 IX86_BUILTIN_PSUBD128,
21742 IX86_BUILTIN_PSUBQ128,
21743 IX86_BUILTIN_PSUBSB128,
21744 IX86_BUILTIN_PSUBSW128,
21745 IX86_BUILTIN_PSUBUSB128,
21746 IX86_BUILTIN_PSUBUSW128,
21748 IX86_BUILTIN_PAND128,
21749 IX86_BUILTIN_PANDN128,
21750 IX86_BUILTIN_POR128,
21751 IX86_BUILTIN_PXOR128,
21753 IX86_BUILTIN_PAVGB128,
21754 IX86_BUILTIN_PAVGW128,
21756 IX86_BUILTIN_PCMPEQB128,
21757 IX86_BUILTIN_PCMPEQW128,
21758 IX86_BUILTIN_PCMPEQD128,
21759 IX86_BUILTIN_PCMPGTB128,
21760 IX86_BUILTIN_PCMPGTW128,
21761 IX86_BUILTIN_PCMPGTD128,
21763 IX86_BUILTIN_PMADDWD128,
21765 IX86_BUILTIN_PMAXSW128,
21766 IX86_BUILTIN_PMAXUB128,
21767 IX86_BUILTIN_PMINSW128,
21768 IX86_BUILTIN_PMINUB128,
21770 IX86_BUILTIN_PMULUDQ,
21771 IX86_BUILTIN_PMULUDQ128,
21772 IX86_BUILTIN_PMULHUW128,
21773 IX86_BUILTIN_PMULHW128,
21774 IX86_BUILTIN_PMULLW128,
21776 IX86_BUILTIN_PSADBW128,
21777 IX86_BUILTIN_PSHUFHW,
21778 IX86_BUILTIN_PSHUFLW,
21779 IX86_BUILTIN_PSHUFD,
21781 IX86_BUILTIN_PSLLDQI128,
21782 IX86_BUILTIN_PSLLWI128,
21783 IX86_BUILTIN_PSLLDI128,
21784 IX86_BUILTIN_PSLLQI128,
21785 IX86_BUILTIN_PSRAWI128,
21786 IX86_BUILTIN_PSRADI128,
21787 IX86_BUILTIN_PSRLDQI128,
21788 IX86_BUILTIN_PSRLWI128,
21789 IX86_BUILTIN_PSRLDI128,
21790 IX86_BUILTIN_PSRLQI128,
21792 IX86_BUILTIN_PSLLDQ128,
21793 IX86_BUILTIN_PSLLW128,
21794 IX86_BUILTIN_PSLLD128,
21795 IX86_BUILTIN_PSLLQ128,
21796 IX86_BUILTIN_PSRAW128,
21797 IX86_BUILTIN_PSRAD128,
21798 IX86_BUILTIN_PSRLW128,
21799 IX86_BUILTIN_PSRLD128,
21800 IX86_BUILTIN_PSRLQ128,
21802 IX86_BUILTIN_PUNPCKHBW128,
21803 IX86_BUILTIN_PUNPCKHWD128,
21804 IX86_BUILTIN_PUNPCKHDQ128,
21805 IX86_BUILTIN_PUNPCKHQDQ128,
21806 IX86_BUILTIN_PUNPCKLBW128,
21807 IX86_BUILTIN_PUNPCKLWD128,
21808 IX86_BUILTIN_PUNPCKLDQ128,
21809 IX86_BUILTIN_PUNPCKLQDQ128,
21811 IX86_BUILTIN_CLFLUSH,
21812 IX86_BUILTIN_MFENCE,
21813 IX86_BUILTIN_LFENCE,
21815 IX86_BUILTIN_BSRSI,
21816 IX86_BUILTIN_BSRDI,
21817 IX86_BUILTIN_RDPMC,
21818 IX86_BUILTIN_RDTSC,
21819 IX86_BUILTIN_RDTSCP,
21820 IX86_BUILTIN_ROLQI,
21821 IX86_BUILTIN_ROLHI,
21822 IX86_BUILTIN_RORQI,
21823 IX86_BUILTIN_RORHI,
21826 IX86_BUILTIN_ADDSUBPS,
21827 IX86_BUILTIN_HADDPS,
21828 IX86_BUILTIN_HSUBPS,
21829 IX86_BUILTIN_MOVSHDUP,
21830 IX86_BUILTIN_MOVSLDUP,
21831 IX86_BUILTIN_ADDSUBPD,
21832 IX86_BUILTIN_HADDPD,
21833 IX86_BUILTIN_HSUBPD,
21834 IX86_BUILTIN_LDDQU,
21836 IX86_BUILTIN_MONITOR,
21837 IX86_BUILTIN_MWAIT,
21840 IX86_BUILTIN_PHADDW,
21841 IX86_BUILTIN_PHADDD,
21842 IX86_BUILTIN_PHADDSW,
21843 IX86_BUILTIN_PHSUBW,
21844 IX86_BUILTIN_PHSUBD,
21845 IX86_BUILTIN_PHSUBSW,
21846 IX86_BUILTIN_PMADDUBSW,
21847 IX86_BUILTIN_PMULHRSW,
21848 IX86_BUILTIN_PSHUFB,
21849 IX86_BUILTIN_PSIGNB,
21850 IX86_BUILTIN_PSIGNW,
21851 IX86_BUILTIN_PSIGND,
21852 IX86_BUILTIN_PALIGNR,
21853 IX86_BUILTIN_PABSB,
21854 IX86_BUILTIN_PABSW,
21855 IX86_BUILTIN_PABSD,
21857 IX86_BUILTIN_PHADDW128,
21858 IX86_BUILTIN_PHADDD128,
21859 IX86_BUILTIN_PHADDSW128,
21860 IX86_BUILTIN_PHSUBW128,
21861 IX86_BUILTIN_PHSUBD128,
21862 IX86_BUILTIN_PHSUBSW128,
21863 IX86_BUILTIN_PMADDUBSW128,
21864 IX86_BUILTIN_PMULHRSW128,
21865 IX86_BUILTIN_PSHUFB128,
21866 IX86_BUILTIN_PSIGNB128,
21867 IX86_BUILTIN_PSIGNW128,
21868 IX86_BUILTIN_PSIGND128,
21869 IX86_BUILTIN_PALIGNR128,
21870 IX86_BUILTIN_PABSB128,
21871 IX86_BUILTIN_PABSW128,
21872 IX86_BUILTIN_PABSD128,
21874 /* AMDFAM10 - SSE4A New Instructions. */
21875 IX86_BUILTIN_MOVNTSD,
21876 IX86_BUILTIN_MOVNTSS,
21877 IX86_BUILTIN_EXTRQI,
21878 IX86_BUILTIN_EXTRQ,
21879 IX86_BUILTIN_INSERTQI,
21880 IX86_BUILTIN_INSERTQ,
21883 IX86_BUILTIN_BLENDPD,
21884 IX86_BUILTIN_BLENDPS,
21885 IX86_BUILTIN_BLENDVPD,
21886 IX86_BUILTIN_BLENDVPS,
21887 IX86_BUILTIN_PBLENDVB128,
21888 IX86_BUILTIN_PBLENDW128,
21893 IX86_BUILTIN_INSERTPS128,
21895 IX86_BUILTIN_MOVNTDQA,
21896 IX86_BUILTIN_MPSADBW128,
21897 IX86_BUILTIN_PACKUSDW128,
21898 IX86_BUILTIN_PCMPEQQ,
21899 IX86_BUILTIN_PHMINPOSUW128,
21901 IX86_BUILTIN_PMAXSB128,
21902 IX86_BUILTIN_PMAXSD128,
21903 IX86_BUILTIN_PMAXUD128,
21904 IX86_BUILTIN_PMAXUW128,
21906 IX86_BUILTIN_PMINSB128,
21907 IX86_BUILTIN_PMINSD128,
21908 IX86_BUILTIN_PMINUD128,
21909 IX86_BUILTIN_PMINUW128,
21911 IX86_BUILTIN_PMOVSXBW128,
21912 IX86_BUILTIN_PMOVSXBD128,
21913 IX86_BUILTIN_PMOVSXBQ128,
21914 IX86_BUILTIN_PMOVSXWD128,
21915 IX86_BUILTIN_PMOVSXWQ128,
21916 IX86_BUILTIN_PMOVSXDQ128,
21918 IX86_BUILTIN_PMOVZXBW128,
21919 IX86_BUILTIN_PMOVZXBD128,
21920 IX86_BUILTIN_PMOVZXBQ128,
21921 IX86_BUILTIN_PMOVZXWD128,
21922 IX86_BUILTIN_PMOVZXWQ128,
21923 IX86_BUILTIN_PMOVZXDQ128,
21925 IX86_BUILTIN_PMULDQ128,
21926 IX86_BUILTIN_PMULLD128,
21928 IX86_BUILTIN_ROUNDPD,
21929 IX86_BUILTIN_ROUNDPS,
21930 IX86_BUILTIN_ROUNDSD,
21931 IX86_BUILTIN_ROUNDSS,
21933 IX86_BUILTIN_PTESTZ,
21934 IX86_BUILTIN_PTESTC,
21935 IX86_BUILTIN_PTESTNZC,
21937 IX86_BUILTIN_VEC_INIT_V2SI,
21938 IX86_BUILTIN_VEC_INIT_V4HI,
21939 IX86_BUILTIN_VEC_INIT_V8QI,
21940 IX86_BUILTIN_VEC_EXT_V2DF,
21941 IX86_BUILTIN_VEC_EXT_V2DI,
21942 IX86_BUILTIN_VEC_EXT_V4SF,
21943 IX86_BUILTIN_VEC_EXT_V4SI,
21944 IX86_BUILTIN_VEC_EXT_V8HI,
21945 IX86_BUILTIN_VEC_EXT_V2SI,
21946 IX86_BUILTIN_VEC_EXT_V4HI,
21947 IX86_BUILTIN_VEC_EXT_V16QI,
21948 IX86_BUILTIN_VEC_SET_V2DI,
21949 IX86_BUILTIN_VEC_SET_V4SF,
21950 IX86_BUILTIN_VEC_SET_V4SI,
21951 IX86_BUILTIN_VEC_SET_V8HI,
21952 IX86_BUILTIN_VEC_SET_V4HI,
21953 IX86_BUILTIN_VEC_SET_V16QI,
21955 IX86_BUILTIN_VEC_PACK_SFIX,
21958 IX86_BUILTIN_CRC32QI,
21959 IX86_BUILTIN_CRC32HI,
21960 IX86_BUILTIN_CRC32SI,
21961 IX86_BUILTIN_CRC32DI,
21963 IX86_BUILTIN_PCMPESTRI128,
21964 IX86_BUILTIN_PCMPESTRM128,
21965 IX86_BUILTIN_PCMPESTRA128,
21966 IX86_BUILTIN_PCMPESTRC128,
21967 IX86_BUILTIN_PCMPESTRO128,
21968 IX86_BUILTIN_PCMPESTRS128,
21969 IX86_BUILTIN_PCMPESTRZ128,
21970 IX86_BUILTIN_PCMPISTRI128,
21971 IX86_BUILTIN_PCMPISTRM128,
21972 IX86_BUILTIN_PCMPISTRA128,
21973 IX86_BUILTIN_PCMPISTRC128,
21974 IX86_BUILTIN_PCMPISTRO128,
21975 IX86_BUILTIN_PCMPISTRS128,
21976 IX86_BUILTIN_PCMPISTRZ128,
21978 IX86_BUILTIN_PCMPGTQ,
21980 /* AES instructions */
21981 IX86_BUILTIN_AESENC128,
21982 IX86_BUILTIN_AESENCLAST128,
21983 IX86_BUILTIN_AESDEC128,
21984 IX86_BUILTIN_AESDECLAST128,
21985 IX86_BUILTIN_AESIMC128,
21986 IX86_BUILTIN_AESKEYGENASSIST128,
21988 /* PCLMUL instruction */
21989 IX86_BUILTIN_PCLMULQDQ128,
21992 IX86_BUILTIN_ADDPD256,
21993 IX86_BUILTIN_ADDPS256,
21994 IX86_BUILTIN_ADDSUBPD256,
21995 IX86_BUILTIN_ADDSUBPS256,
21996 IX86_BUILTIN_ANDPD256,
21997 IX86_BUILTIN_ANDPS256,
21998 IX86_BUILTIN_ANDNPD256,
21999 IX86_BUILTIN_ANDNPS256,
22000 IX86_BUILTIN_BLENDPD256,
22001 IX86_BUILTIN_BLENDPS256,
22002 IX86_BUILTIN_BLENDVPD256,
22003 IX86_BUILTIN_BLENDVPS256,
22004 IX86_BUILTIN_DIVPD256,
22005 IX86_BUILTIN_DIVPS256,
22006 IX86_BUILTIN_DPPS256,
22007 IX86_BUILTIN_HADDPD256,
22008 IX86_BUILTIN_HADDPS256,
22009 IX86_BUILTIN_HSUBPD256,
22010 IX86_BUILTIN_HSUBPS256,
22011 IX86_BUILTIN_MAXPD256,
22012 IX86_BUILTIN_MAXPS256,
22013 IX86_BUILTIN_MINPD256,
22014 IX86_BUILTIN_MINPS256,
22015 IX86_BUILTIN_MULPD256,
22016 IX86_BUILTIN_MULPS256,
22017 IX86_BUILTIN_ORPD256,
22018 IX86_BUILTIN_ORPS256,
22019 IX86_BUILTIN_SHUFPD256,
22020 IX86_BUILTIN_SHUFPS256,
22021 IX86_BUILTIN_SUBPD256,
22022 IX86_BUILTIN_SUBPS256,
22023 IX86_BUILTIN_XORPD256,
22024 IX86_BUILTIN_XORPS256,
22025 IX86_BUILTIN_CMPSD,
22026 IX86_BUILTIN_CMPSS,
22027 IX86_BUILTIN_CMPPD,
22028 IX86_BUILTIN_CMPPS,
22029 IX86_BUILTIN_CMPPD256,
22030 IX86_BUILTIN_CMPPS256,
22031 IX86_BUILTIN_CVTDQ2PD256,
22032 IX86_BUILTIN_CVTDQ2PS256,
22033 IX86_BUILTIN_CVTPD2PS256,
22034 IX86_BUILTIN_CVTPS2DQ256,
22035 IX86_BUILTIN_CVTPS2PD256,
22036 IX86_BUILTIN_CVTTPD2DQ256,
22037 IX86_BUILTIN_CVTPD2DQ256,
22038 IX86_BUILTIN_CVTTPS2DQ256,
22039 IX86_BUILTIN_EXTRACTF128PD256,
22040 IX86_BUILTIN_EXTRACTF128PS256,
22041 IX86_BUILTIN_EXTRACTF128SI256,
22042 IX86_BUILTIN_VZEROALL,
22043 IX86_BUILTIN_VZEROUPPER,
22044 IX86_BUILTIN_VPERMILVARPD,
22045 IX86_BUILTIN_VPERMILVARPS,
22046 IX86_BUILTIN_VPERMILVARPD256,
22047 IX86_BUILTIN_VPERMILVARPS256,
22048 IX86_BUILTIN_VPERMILPD,
22049 IX86_BUILTIN_VPERMILPS,
22050 IX86_BUILTIN_VPERMILPD256,
22051 IX86_BUILTIN_VPERMILPS256,
22052 IX86_BUILTIN_VPERMIL2PD,
22053 IX86_BUILTIN_VPERMIL2PS,
22054 IX86_BUILTIN_VPERMIL2PD256,
22055 IX86_BUILTIN_VPERMIL2PS256,
22056 IX86_BUILTIN_VPERM2F128PD256,
22057 IX86_BUILTIN_VPERM2F128PS256,
22058 IX86_BUILTIN_VPERM2F128SI256,
22059 IX86_BUILTIN_VBROADCASTSS,
22060 IX86_BUILTIN_VBROADCASTSD256,
22061 IX86_BUILTIN_VBROADCASTSS256,
22062 IX86_BUILTIN_VBROADCASTPD256,
22063 IX86_BUILTIN_VBROADCASTPS256,
22064 IX86_BUILTIN_VINSERTF128PD256,
22065 IX86_BUILTIN_VINSERTF128PS256,
22066 IX86_BUILTIN_VINSERTF128SI256,
22067 IX86_BUILTIN_LOADUPD256,
22068 IX86_BUILTIN_LOADUPS256,
22069 IX86_BUILTIN_STOREUPD256,
22070 IX86_BUILTIN_STOREUPS256,
22071 IX86_BUILTIN_LDDQU256,
22072 IX86_BUILTIN_MOVNTDQ256,
22073 IX86_BUILTIN_MOVNTPD256,
22074 IX86_BUILTIN_MOVNTPS256,
22075 IX86_BUILTIN_LOADDQU256,
22076 IX86_BUILTIN_STOREDQU256,
22077 IX86_BUILTIN_MASKLOADPD,
22078 IX86_BUILTIN_MASKLOADPS,
22079 IX86_BUILTIN_MASKSTOREPD,
22080 IX86_BUILTIN_MASKSTOREPS,
22081 IX86_BUILTIN_MASKLOADPD256,
22082 IX86_BUILTIN_MASKLOADPS256,
22083 IX86_BUILTIN_MASKSTOREPD256,
22084 IX86_BUILTIN_MASKSTOREPS256,
22085 IX86_BUILTIN_MOVSHDUP256,
22086 IX86_BUILTIN_MOVSLDUP256,
22087 IX86_BUILTIN_MOVDDUP256,
22089 IX86_BUILTIN_SQRTPD256,
22090 IX86_BUILTIN_SQRTPS256,
22091 IX86_BUILTIN_SQRTPS_NR256,
22092 IX86_BUILTIN_RSQRTPS256,
22093 IX86_BUILTIN_RSQRTPS_NR256,
22095 IX86_BUILTIN_RCPPS256,
22097 IX86_BUILTIN_ROUNDPD256,
22098 IX86_BUILTIN_ROUNDPS256,
22100 IX86_BUILTIN_UNPCKHPD256,
22101 IX86_BUILTIN_UNPCKLPD256,
22102 IX86_BUILTIN_UNPCKHPS256,
22103 IX86_BUILTIN_UNPCKLPS256,
22105 IX86_BUILTIN_SI256_SI,
22106 IX86_BUILTIN_PS256_PS,
22107 IX86_BUILTIN_PD256_PD,
22108 IX86_BUILTIN_SI_SI256,
22109 IX86_BUILTIN_PS_PS256,
22110 IX86_BUILTIN_PD_PD256,
22112 IX86_BUILTIN_VTESTZPD,
22113 IX86_BUILTIN_VTESTCPD,
22114 IX86_BUILTIN_VTESTNZCPD,
22115 IX86_BUILTIN_VTESTZPS,
22116 IX86_BUILTIN_VTESTCPS,
22117 IX86_BUILTIN_VTESTNZCPS,
22118 IX86_BUILTIN_VTESTZPD256,
22119 IX86_BUILTIN_VTESTCPD256,
22120 IX86_BUILTIN_VTESTNZCPD256,
22121 IX86_BUILTIN_VTESTZPS256,
22122 IX86_BUILTIN_VTESTCPS256,
22123 IX86_BUILTIN_VTESTNZCPS256,
22124 IX86_BUILTIN_PTESTZ256,
22125 IX86_BUILTIN_PTESTC256,
22126 IX86_BUILTIN_PTESTNZC256,
22128 IX86_BUILTIN_MOVMSKPD256,
22129 IX86_BUILTIN_MOVMSKPS256,
22131 /* TFmode support builtins. */
22133 IX86_BUILTIN_HUGE_VALQ,
22134 IX86_BUILTIN_FABSQ,
22135 IX86_BUILTIN_COPYSIGNQ,
22137 /* Vectorizer support builtins. */
22138 IX86_BUILTIN_CPYSGNPS,
22139 IX86_BUILTIN_CPYSGNPD,
22141 IX86_BUILTIN_CVTUDQ2PS,
22143 IX86_BUILTIN_VEC_PERM_V2DF,
22144 IX86_BUILTIN_VEC_PERM_V4SF,
22145 IX86_BUILTIN_VEC_PERM_V2DI,
22146 IX86_BUILTIN_VEC_PERM_V4SI,
22147 IX86_BUILTIN_VEC_PERM_V8HI,
22148 IX86_BUILTIN_VEC_PERM_V16QI,
22149 IX86_BUILTIN_VEC_PERM_V2DI_U,
22150 IX86_BUILTIN_VEC_PERM_V4SI_U,
22151 IX86_BUILTIN_VEC_PERM_V8HI_U,
22152 IX86_BUILTIN_VEC_PERM_V16QI_U,
22153 IX86_BUILTIN_VEC_PERM_V4DF,
22154 IX86_BUILTIN_VEC_PERM_V8SF,
22156 /* FMA4 and XOP instructions. */
22157 IX86_BUILTIN_VFMADDSS,
22158 IX86_BUILTIN_VFMADDSD,
22159 IX86_BUILTIN_VFMADDPS,
22160 IX86_BUILTIN_VFMADDPD,
22161 IX86_BUILTIN_VFMSUBSS,
22162 IX86_BUILTIN_VFMSUBSD,
22163 IX86_BUILTIN_VFMSUBPS,
22164 IX86_BUILTIN_VFMSUBPD,
22165 IX86_BUILTIN_VFMADDSUBPS,
22166 IX86_BUILTIN_VFMADDSUBPD,
22167 IX86_BUILTIN_VFMSUBADDPS,
22168 IX86_BUILTIN_VFMSUBADDPD,
22169 IX86_BUILTIN_VFNMADDSS,
22170 IX86_BUILTIN_VFNMADDSD,
22171 IX86_BUILTIN_VFNMADDPS,
22172 IX86_BUILTIN_VFNMADDPD,
22173 IX86_BUILTIN_VFNMSUBSS,
22174 IX86_BUILTIN_VFNMSUBSD,
22175 IX86_BUILTIN_VFNMSUBPS,
22176 IX86_BUILTIN_VFNMSUBPD,
22177 IX86_BUILTIN_VFMADDPS256,
22178 IX86_BUILTIN_VFMADDPD256,
22179 IX86_BUILTIN_VFMSUBPS256,
22180 IX86_BUILTIN_VFMSUBPD256,
22181 IX86_BUILTIN_VFMADDSUBPS256,
22182 IX86_BUILTIN_VFMADDSUBPD256,
22183 IX86_BUILTIN_VFMSUBADDPS256,
22184 IX86_BUILTIN_VFMSUBADDPD256,
22185 IX86_BUILTIN_VFNMADDPS256,
22186 IX86_BUILTIN_VFNMADDPD256,
22187 IX86_BUILTIN_VFNMSUBPS256,
22188 IX86_BUILTIN_VFNMSUBPD256,
22190 IX86_BUILTIN_VPCMOV,
22191 IX86_BUILTIN_VPCMOV_V2DI,
22192 IX86_BUILTIN_VPCMOV_V4SI,
22193 IX86_BUILTIN_VPCMOV_V8HI,
22194 IX86_BUILTIN_VPCMOV_V16QI,
22195 IX86_BUILTIN_VPCMOV_V4SF,
22196 IX86_BUILTIN_VPCMOV_V2DF,
22197 IX86_BUILTIN_VPCMOV256,
22198 IX86_BUILTIN_VPCMOV_V4DI256,
22199 IX86_BUILTIN_VPCMOV_V8SI256,
22200 IX86_BUILTIN_VPCMOV_V16HI256,
22201 IX86_BUILTIN_VPCMOV_V32QI256,
22202 IX86_BUILTIN_VPCMOV_V8SF256,
22203 IX86_BUILTIN_VPCMOV_V4DF256,
22205 IX86_BUILTIN_VPPERM,
22207 IX86_BUILTIN_VPMACSSWW,
22208 IX86_BUILTIN_VPMACSWW,
22209 IX86_BUILTIN_VPMACSSWD,
22210 IX86_BUILTIN_VPMACSWD,
22211 IX86_BUILTIN_VPMACSSDD,
22212 IX86_BUILTIN_VPMACSDD,
22213 IX86_BUILTIN_VPMACSSDQL,
22214 IX86_BUILTIN_VPMACSSDQH,
22215 IX86_BUILTIN_VPMACSDQL,
22216 IX86_BUILTIN_VPMACSDQH,
22217 IX86_BUILTIN_VPMADCSSWD,
22218 IX86_BUILTIN_VPMADCSWD,
22220 IX86_BUILTIN_VPHADDBW,
22221 IX86_BUILTIN_VPHADDBD,
22222 IX86_BUILTIN_VPHADDBQ,
22223 IX86_BUILTIN_VPHADDWD,
22224 IX86_BUILTIN_VPHADDWQ,
22225 IX86_BUILTIN_VPHADDDQ,
22226 IX86_BUILTIN_VPHADDUBW,
22227 IX86_BUILTIN_VPHADDUBD,
22228 IX86_BUILTIN_VPHADDUBQ,
22229 IX86_BUILTIN_VPHADDUWD,
22230 IX86_BUILTIN_VPHADDUWQ,
22231 IX86_BUILTIN_VPHADDUDQ,
22232 IX86_BUILTIN_VPHSUBBW,
22233 IX86_BUILTIN_VPHSUBWD,
22234 IX86_BUILTIN_VPHSUBDQ,
22236 IX86_BUILTIN_VPROTB,
22237 IX86_BUILTIN_VPROTW,
22238 IX86_BUILTIN_VPROTD,
22239 IX86_BUILTIN_VPROTQ,
22240 IX86_BUILTIN_VPROTB_IMM,
22241 IX86_BUILTIN_VPROTW_IMM,
22242 IX86_BUILTIN_VPROTD_IMM,
22243 IX86_BUILTIN_VPROTQ_IMM,
22245 IX86_BUILTIN_VPSHLB,
22246 IX86_BUILTIN_VPSHLW,
22247 IX86_BUILTIN_VPSHLD,
22248 IX86_BUILTIN_VPSHLQ,
22249 IX86_BUILTIN_VPSHAB,
22250 IX86_BUILTIN_VPSHAW,
22251 IX86_BUILTIN_VPSHAD,
22252 IX86_BUILTIN_VPSHAQ,
22254 IX86_BUILTIN_VFRCZSS,
22255 IX86_BUILTIN_VFRCZSD,
22256 IX86_BUILTIN_VFRCZPS,
22257 IX86_BUILTIN_VFRCZPD,
22258 IX86_BUILTIN_VFRCZPS256,
22259 IX86_BUILTIN_VFRCZPD256,
22261 IX86_BUILTIN_VPCOMEQUB,
22262 IX86_BUILTIN_VPCOMNEUB,
22263 IX86_BUILTIN_VPCOMLTUB,
22264 IX86_BUILTIN_VPCOMLEUB,
22265 IX86_BUILTIN_VPCOMGTUB,
22266 IX86_BUILTIN_VPCOMGEUB,
22267 IX86_BUILTIN_VPCOMFALSEUB,
22268 IX86_BUILTIN_VPCOMTRUEUB,
22270 IX86_BUILTIN_VPCOMEQUW,
22271 IX86_BUILTIN_VPCOMNEUW,
22272 IX86_BUILTIN_VPCOMLTUW,
22273 IX86_BUILTIN_VPCOMLEUW,
22274 IX86_BUILTIN_VPCOMGTUW,
22275 IX86_BUILTIN_VPCOMGEUW,
22276 IX86_BUILTIN_VPCOMFALSEUW,
22277 IX86_BUILTIN_VPCOMTRUEUW,
22279 IX86_BUILTIN_VPCOMEQUD,
22280 IX86_BUILTIN_VPCOMNEUD,
22281 IX86_BUILTIN_VPCOMLTUD,
22282 IX86_BUILTIN_VPCOMLEUD,
22283 IX86_BUILTIN_VPCOMGTUD,
22284 IX86_BUILTIN_VPCOMGEUD,
22285 IX86_BUILTIN_VPCOMFALSEUD,
22286 IX86_BUILTIN_VPCOMTRUEUD,
22288 IX86_BUILTIN_VPCOMEQUQ,
22289 IX86_BUILTIN_VPCOMNEUQ,
22290 IX86_BUILTIN_VPCOMLTUQ,
22291 IX86_BUILTIN_VPCOMLEUQ,
22292 IX86_BUILTIN_VPCOMGTUQ,
22293 IX86_BUILTIN_VPCOMGEUQ,
22294 IX86_BUILTIN_VPCOMFALSEUQ,
22295 IX86_BUILTIN_VPCOMTRUEUQ,
22297 IX86_BUILTIN_VPCOMEQB,
22298 IX86_BUILTIN_VPCOMNEB,
22299 IX86_BUILTIN_VPCOMLTB,
22300 IX86_BUILTIN_VPCOMLEB,
22301 IX86_BUILTIN_VPCOMGTB,
22302 IX86_BUILTIN_VPCOMGEB,
22303 IX86_BUILTIN_VPCOMFALSEB,
22304 IX86_BUILTIN_VPCOMTRUEB,
22306 IX86_BUILTIN_VPCOMEQW,
22307 IX86_BUILTIN_VPCOMNEW,
22308 IX86_BUILTIN_VPCOMLTW,
22309 IX86_BUILTIN_VPCOMLEW,
22310 IX86_BUILTIN_VPCOMGTW,
22311 IX86_BUILTIN_VPCOMGEW,
22312 IX86_BUILTIN_VPCOMFALSEW,
22313 IX86_BUILTIN_VPCOMTRUEW,
22315 IX86_BUILTIN_VPCOMEQD,
22316 IX86_BUILTIN_VPCOMNED,
22317 IX86_BUILTIN_VPCOMLTD,
22318 IX86_BUILTIN_VPCOMLED,
22319 IX86_BUILTIN_VPCOMGTD,
22320 IX86_BUILTIN_VPCOMGED,
22321 IX86_BUILTIN_VPCOMFALSED,
22322 IX86_BUILTIN_VPCOMTRUED,
22324 IX86_BUILTIN_VPCOMEQQ,
22325 IX86_BUILTIN_VPCOMNEQ,
22326 IX86_BUILTIN_VPCOMLTQ,
22327 IX86_BUILTIN_VPCOMLEQ,
22328 IX86_BUILTIN_VPCOMGTQ,
22329 IX86_BUILTIN_VPCOMGEQ,
22330 IX86_BUILTIN_VPCOMFALSEQ,
22331 IX86_BUILTIN_VPCOMTRUEQ,
22333 /* LWP instructions. */
22334 IX86_BUILTIN_LLWPCB,
22335 IX86_BUILTIN_SLWPCB,
22336 IX86_BUILTIN_LWPVAL32,
22337 IX86_BUILTIN_LWPVAL64,
22338 IX86_BUILTIN_LWPINS32,
22339 IX86_BUILTIN_LWPINS64,
22343 /* FSGSBASE instructions. */
22344 IX86_BUILTIN_RDFSBASE32,
22345 IX86_BUILTIN_RDFSBASE64,
22346 IX86_BUILTIN_RDGSBASE32,
22347 IX86_BUILTIN_RDGSBASE64,
22348 IX86_BUILTIN_WRFSBASE32,
22349 IX86_BUILTIN_WRFSBASE64,
22350 IX86_BUILTIN_WRGSBASE32,
22351 IX86_BUILTIN_WRGSBASE64,
22353 /* RDRND instructions. */
22354 IX86_BUILTIN_RDRAND16,
22355 IX86_BUILTIN_RDRAND32,
22356 IX86_BUILTIN_RDRAND64,
22358 /* F16C instructions. */
22359 IX86_BUILTIN_CVTPH2PS,
22360 IX86_BUILTIN_CVTPH2PS256,
22361 IX86_BUILTIN_CVTPS2PH,
22362 IX86_BUILTIN_CVTPS2PH256,
22367 /* Table for the ix86 builtin decls. */
22368 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22370 /* Table of all of the builtin functions that are possible with different ISA's
22371 but are waiting to be built until a function is declared to use that
22373 struct builtin_isa {
22374 const char *name; /* function name */
22375 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22376 int isa; /* isa_flags this builtin is defined for */
22377 bool const_p; /* true if the declaration is constant */
22378 bool set_and_not_built_p;
22381 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22384 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22385 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22386 function decl in the ix86_builtins array. Returns the function decl or
22387 NULL_TREE, if the builtin was not added.
22389 If the front end has a special hook for builtin functions, delay adding
22390 builtin functions that aren't in the current ISA until the ISA is changed
22391 with function specific optimization. Doing so, can save about 300K for the
22392 default compiler. When the builtin is expanded, check at that time whether
22395 If the front end doesn't have a special hook, record all builtins, even if
22396 it isn't an instruction set in the current ISA in case the user uses
22397 function specific options for a different ISA, so that we don't get scope
22398 errors if a builtin is added in the middle of a function scope. */
22401 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22402 enum ix86_builtins code)
22404 tree decl = NULL_TREE;
22406 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22408 ix86_builtins_isa[(int) code].isa = mask;
22410 mask &= ~OPTION_MASK_ISA_64BIT;
22412 || (mask & ix86_isa_flags) != 0
22413 || (lang_hooks.builtin_function
22414 == lang_hooks.builtin_function_ext_scope))
22417 tree type = ix86_get_builtin_func_type (tcode);
22418 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22420 ix86_builtins[(int) code] = decl;
22421 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22425 ix86_builtins[(int) code] = NULL_TREE;
22426 ix86_builtins_isa[(int) code].tcode = tcode;
22427 ix86_builtins_isa[(int) code].name = name;
22428 ix86_builtins_isa[(int) code].const_p = false;
22429 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22436 /* Like def_builtin, but also marks the function decl "const". */
22439 def_builtin_const (int mask, const char *name,
22440 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22442 tree decl = def_builtin (mask, name, tcode, code);
22444 TREE_READONLY (decl) = 1;
22446 ix86_builtins_isa[(int) code].const_p = true;
22451 /* Add any new builtin functions for a given ISA that may not have been
22452 declared. This saves a bit of space compared to adding all of the
22453 declarations to the tree, even if we didn't use them. */
22456 ix86_add_new_builtins (int isa)
22460 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22462 if ((ix86_builtins_isa[i].isa & isa) != 0
22463 && ix86_builtins_isa[i].set_and_not_built_p)
22467 /* Don't define the builtin again. */
22468 ix86_builtins_isa[i].set_and_not_built_p = false;
22470 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22471 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
22472 type, i, BUILT_IN_MD, NULL,
22475 ix86_builtins[i] = decl;
22476 if (ix86_builtins_isa[i].const_p)
22477 TREE_READONLY (decl) = 1;
22482 /* Bits for builtin_description.flag. */
22484 /* Set when we don't support the comparison natively, and should
22485 swap_comparison in order to support it. */
22486 #define BUILTIN_DESC_SWAP_OPERANDS 1
22488 struct builtin_description
22490 const unsigned int mask;
22491 const enum insn_code icode;
22492 const char *const name;
22493 const enum ix86_builtins code;
22494 const enum rtx_code comparison;
22498 static const struct builtin_description bdesc_comi[] =
22500 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
22501 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
22502 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
22503 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
22504 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
22505 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
22506 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
22507 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
22508 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
22509 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
22510 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
22511 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
22512 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
22513 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
22514 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
22515 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
22516 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
22517 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
22518 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
22519 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
22520 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
22521 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
22522 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
22523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
22526 static const struct builtin_description bdesc_pcmpestr[] =
22529 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
22530 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
22531 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
22532 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
22533 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
22534 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
22535 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
22538 static const struct builtin_description bdesc_pcmpistr[] =
22541 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
22542 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
22543 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
22544 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
22545 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
22546 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
22547 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
22550 /* Special builtins with variable number of arguments. */
22551 static const struct builtin_description bdesc_special_args[] =
22553 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
22554 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
22557 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22560 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22563 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22564 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22565 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22568 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22569 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22572 /* SSE or 3DNow!A */
22573 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22574 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
22577 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22578 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22579 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22580 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
22581 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22582 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
22583 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
22584 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
22585 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22587 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22591 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22594 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
22597 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22598 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22601 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
22602 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
22604 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22605 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22606 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22607 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
22608 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
22610 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22611 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22614 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22615 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
22616 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
22619 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22620 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
22623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
22624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
22625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
22626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
22627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
22628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
22629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
22631 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
22632 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
22633 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
22634 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
22635 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
22636 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
22639 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22640 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22641 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22642 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22643 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22644 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22645 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22646 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22649 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
22650 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22651 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22654 /* Builtins with variable number of arguments. */
22655 static const struct builtin_description bdesc_args[] =
22657 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
22658 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
22659 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
22660 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22661 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22662 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22663 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22666 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22667 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22668 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22669 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22670 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22671 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22673 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22674 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22675 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22676 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22677 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22678 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22679 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22680 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22682 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22683 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22685 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22686 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22687 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22688 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22690 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22691 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22692 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22693 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22694 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22695 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22697 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22698 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22699 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22700 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22701 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
22702 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
22704 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22705 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
22706 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22708 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
22710 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22711 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22712 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22713 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22714 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22715 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22717 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22718 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22719 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22720 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22721 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22722 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22724 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22725 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22726 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22727 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22730 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22731 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22732 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22733 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22735 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22736 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22737 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22738 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22739 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22740 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22741 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22742 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22743 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22744 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22745 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22746 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22747 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22748 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22749 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22752 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22753 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22754 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22755 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22756 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22757 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22760 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
22761 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22762 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22763 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22764 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22765 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22766 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22768 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22769 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22771 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22773 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22775 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22776 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22777 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22778 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22779 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22780 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22781 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22782 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22784 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22785 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22786 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22787 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22788 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22789 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22790 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22791 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22792 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22793 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22794 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
22795 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22796 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22797 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22798 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22799 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22800 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22801 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22802 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22803 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22804 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22805 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22807 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22808 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22809 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22810 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22812 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22813 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22814 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22815 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22817 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22819 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22820 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22821 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22822 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22823 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22825 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
22826 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
22827 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
22829 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
22831 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22832 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22833 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22835 /* SSE MMX or 3Dnow!A */
22836 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22837 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22838 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22840 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22841 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22842 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22843 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22845 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
22846 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
22848 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
22851 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22853 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
22854 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
22855 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
22856 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
22857 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
22858 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22859 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
22860 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
22861 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
22862 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
22863 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
22864 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
22866 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
22867 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
22868 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
22869 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
22870 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22871 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22873 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22874 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22875 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
22876 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
22881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22883 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22884 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
22888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22890 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22891 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22892 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22893 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22894 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22895 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22896 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22897 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22899 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22900 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22901 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22902 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22903 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
22904 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22905 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22906 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22907 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22909 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22910 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22911 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22912 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22913 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22914 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22917 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22918 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22920 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22921 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22922 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22923 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22925 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22926 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22927 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22928 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22930 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22932 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22933 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22934 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22936 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
22938 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22939 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22940 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22941 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22942 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22943 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22944 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22945 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22947 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22948 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22949 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22951 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22952 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22953 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22956 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22957 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
22959 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22961 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22962 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22968 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22974 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22975 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22976 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22977 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22979 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22980 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22981 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22982 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22983 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22984 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22985 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22986 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22989 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22990 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22992 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22993 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
22995 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
22996 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22998 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
23000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
23001 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
23002 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
23003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
23005 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23006 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23007 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23008 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23009 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23010 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23011 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23014 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23015 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23016 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23017 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23018 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23019 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23021 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23022 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23023 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23024 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23026 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23027 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23028 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23030 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23032 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23033 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23035 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23038 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23039 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23042 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23043 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23045 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23046 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23047 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23048 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23049 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23050 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23053 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23054 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23055 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23056 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23057 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23058 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23060 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23061 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23062 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23063 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23064 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23065 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23066 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23067 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23068 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23069 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23070 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23071 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23072 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23073 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23074 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23075 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23076 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23077 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23078 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23079 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23080 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23081 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23082 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23083 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23086 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23087 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23090 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23091 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23092 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23093 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23094 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23095 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23096 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23097 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23098 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23099 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23101 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23102 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23103 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23104 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23105 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23106 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23107 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23108 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23109 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23110 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23111 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23112 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23113 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23115 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23116 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23117 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23118 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23119 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23120 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23121 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23122 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23123 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23124 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23125 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23126 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23129 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23130 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23131 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23132 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23134 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23135 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23136 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23139 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23140 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23141 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23142 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23143 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23146 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23147 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23148 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23149 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23152 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23153 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23155 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23156 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23157 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23158 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23161 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23164 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23165 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23166 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23167 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23168 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23169 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23170 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23171 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23172 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23173 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23174 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23175 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23176 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23177 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23178 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23179 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23180 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23181 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23182 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23183 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23184 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23185 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23186 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23187 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23188 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23189 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23191 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23192 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23193 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23194 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23196 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23197 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23198 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23199 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23200 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23201 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23202 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23203 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23204 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23205 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23206 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23207 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23208 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23209 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23210 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23213 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23214 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23215 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23216 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23217 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23218 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23219 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23220 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23221 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23222 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23224 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23225 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23228 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23232 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23235 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23236 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23237 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23239 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23241 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23244 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23246 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23247 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23248 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23251 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23252 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23253 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23254 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23255 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23256 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23259 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23261 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23262 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23265 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23266 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23267 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23268 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23271 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23272 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23274 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23275 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23277 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23280 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23281 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23282 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23283 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23286 /* FMA4 and XOP. */
23287 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23288 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23289 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23290 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23291 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23292 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23293 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23294 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23295 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23296 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23297 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23298 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23299 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23300 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23301 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23302 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23303 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23304 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23305 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23306 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23307 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23308 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23309 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23310 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23311 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23312 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23313 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23314 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23315 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23316 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23317 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23318 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23319 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23320 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23321 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23322 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23323 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23324 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23325 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23326 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23327 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23328 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23329 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23330 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23331 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23332 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23333 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23334 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23335 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23336 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23337 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23338 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23340 static const struct builtin_description bdesc_multi_arg[] =
23342 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23343 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23344 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23345 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23346 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23347 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23348 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23349 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23351 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23352 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23353 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23354 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23355 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23356 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23357 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23358 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23360 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23361 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23362 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23363 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23365 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23366 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23367 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23368 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23370 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23371 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23372 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23373 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23375 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23376 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23377 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23378 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23380 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23381 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23382 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23383 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23384 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23385 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23386 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23388 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23389 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23390 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23398 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23406 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23421 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23438 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23445 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23461 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23469 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
23472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
23473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
23475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23477 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
23479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
23480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
23481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
23483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
23484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23485 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
23487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
23488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
23489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
23491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
23492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23493 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
23495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
23496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
23497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
23499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
23500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23501 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
23503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
23504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
23505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
23507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23509 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
23511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
23512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
23513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
23515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23517 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23525 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
23534 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
23535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
23536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
23540 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
23541 in the current target ISA to allow the user to compile particular modules
23542 with different target specific options that differ from the command line
23545 ix86_init_mmx_sse_builtins (void)
23547 const struct builtin_description * d;
23548 enum ix86_builtin_func_type ftype;
23551 /* Add all special builtins with variable number of operands. */
23552 for (i = 0, d = bdesc_special_args;
23553 i < ARRAY_SIZE (bdesc_special_args);
23559 ftype = (enum ix86_builtin_func_type) d->flag;
23560 def_builtin (d->mask, d->name, ftype, d->code);
23563 /* Add all builtins with variable number of operands. */
23564 for (i = 0, d = bdesc_args;
23565 i < ARRAY_SIZE (bdesc_args);
23571 ftype = (enum ix86_builtin_func_type) d->flag;
23572 def_builtin_const (d->mask, d->name, ftype, d->code);
23575 /* pcmpestr[im] insns. */
23576 for (i = 0, d = bdesc_pcmpestr;
23577 i < ARRAY_SIZE (bdesc_pcmpestr);
23580 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23581 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
23583 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
23584 def_builtin_const (d->mask, d->name, ftype, d->code);
23587 /* pcmpistr[im] insns. */
23588 for (i = 0, d = bdesc_pcmpistr;
23589 i < ARRAY_SIZE (bdesc_pcmpistr);
23592 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23593 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
23595 ftype = INT_FTYPE_V16QI_V16QI_INT;
23596 def_builtin_const (d->mask, d->name, ftype, d->code);
23599 /* comi/ucomi insns. */
23600 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23602 if (d->mask == OPTION_MASK_ISA_SSE2)
23603 ftype = INT_FTYPE_V2DF_V2DF;
23605 ftype = INT_FTYPE_V4SF_V4SF;
23606 def_builtin_const (d->mask, d->name, ftype, d->code);
23610 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
23611 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
23612 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
23613 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
23615 /* SSE or 3DNow!A */
23616 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23617 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
23618 IX86_BUILTIN_MASKMOVQ);
23621 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
23622 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
23624 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
23625 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
23626 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
23627 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
23630 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
23631 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
23632 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
23633 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
23636 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
23637 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
23638 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
23639 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
23640 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
23641 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
23642 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
23643 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
23644 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
23645 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
23646 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
23647 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
23650 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
23651 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
23653 /* MMX access to the vec_init patterns. */
23654 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
23655 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
23657 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
23658 V4HI_FTYPE_HI_HI_HI_HI,
23659 IX86_BUILTIN_VEC_INIT_V4HI);
23661 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
23662 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
23663 IX86_BUILTIN_VEC_INIT_V8QI);
23665 /* Access to the vec_extract patterns. */
23666 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
23667 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
23668 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
23669 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
23670 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
23671 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
23672 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
23673 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
23674 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
23675 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
23677 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23678 "__builtin_ia32_vec_ext_v4hi",
23679 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
23681 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
23682 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
23684 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
23685 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
23687 /* Access to the vec_set patterns. */
23688 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
23689 "__builtin_ia32_vec_set_v2di",
23690 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
23692 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
23693 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
23695 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
23696 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
23698 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
23699 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
23701 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23702 "__builtin_ia32_vec_set_v4hi",
23703 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
23705 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
23706 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
23708 /* Add FMA4 multi-arg argument instructions */
23709 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23714 ftype = (enum ix86_builtin_func_type) d->flag;
23715 def_builtin_const (d->mask, d->name, ftype, d->code);
23719 /* Internal method for ix86_init_builtins. */
23722 ix86_init_builtins_va_builtins_abi (void)
23724 tree ms_va_ref, sysv_va_ref;
23725 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23726 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23727 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23728 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23732 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23733 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23734 ms_va_ref = build_reference_type (ms_va_list_type_node);
23736 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23739 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23740 fnvoid_va_start_ms =
23741 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23742 fnvoid_va_end_sysv =
23743 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23744 fnvoid_va_start_sysv =
23745 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23747 fnvoid_va_copy_ms =
23748 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23750 fnvoid_va_copy_sysv =
23751 build_function_type_list (void_type_node, sysv_va_ref,
23752 sysv_va_ref, NULL_TREE);
23754 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23755 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23756 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23757 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23758 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23759 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23760 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23761 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23762 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23763 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23764 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23765 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23769 ix86_init_builtin_types (void)
23771 tree float128_type_node, float80_type_node;
23773 /* The __float80 type. */
23774 float80_type_node = long_double_type_node;
23775 if (TYPE_MODE (float80_type_node) != XFmode)
23777 /* The __float80 type. */
23778 float80_type_node = make_node (REAL_TYPE);
23780 TYPE_PRECISION (float80_type_node) = 80;
23781 layout_type (float80_type_node);
23783 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
23785 /* The __float128 type. */
23786 float128_type_node = make_node (REAL_TYPE);
23787 TYPE_PRECISION (float128_type_node) = 128;
23788 layout_type (float128_type_node);
23789 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
23791 /* This macro is built by i386-builtin-types.awk. */
23792 DEFINE_BUILTIN_PRIMITIVE_TYPES;
23796 ix86_init_builtins (void)
23800 ix86_init_builtin_types ();
23802 /* TFmode support builtins. */
23803 def_builtin_const (0, "__builtin_infq",
23804 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
23805 def_builtin_const (0, "__builtin_huge_valq",
23806 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
23808 /* We will expand them to normal call if SSE2 isn't available since
23809 they are used by libgcc. */
23810 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
23811 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
23812 BUILT_IN_MD, "__fabstf2", NULL_TREE);
23813 TREE_READONLY (t) = 1;
23814 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
23816 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
23817 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
23818 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
23819 TREE_READONLY (t) = 1;
23820 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
23822 ix86_init_mmx_sse_builtins ();
23825 ix86_init_builtins_va_builtins_abi ();
23828 /* Return the ix86 builtin for CODE. */
23831 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
23833 if (code >= IX86_BUILTIN_MAX)
23834 return error_mark_node;
23836 return ix86_builtins[code];
23839 /* Errors in the source file can cause expand_expr to return const0_rtx
23840 where we expect a vector. To avoid crashing, use one of the vector
23841 clear instructions. */
23843 safe_vector_operand (rtx x, enum machine_mode mode)
23845 if (x == const0_rtx)
23846 x = CONST0_RTX (mode);
23850 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23853 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23856 tree arg0 = CALL_EXPR_ARG (exp, 0);
23857 tree arg1 = CALL_EXPR_ARG (exp, 1);
23858 rtx op0 = expand_normal (arg0);
23859 rtx op1 = expand_normal (arg1);
23860 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23861 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23862 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23864 if (VECTOR_MODE_P (mode0))
23865 op0 = safe_vector_operand (op0, mode0);
23866 if (VECTOR_MODE_P (mode1))
23867 op1 = safe_vector_operand (op1, mode1);
23869 if (optimize || !target
23870 || GET_MODE (target) != tmode
23871 || !insn_data[icode].operand[0].predicate (target, tmode))
23872 target = gen_reg_rtx (tmode);
23874 if (GET_MODE (op1) == SImode && mode1 == TImode)
23876 rtx x = gen_reg_rtx (V4SImode);
23877 emit_insn (gen_sse2_loadd (x, op1));
23878 op1 = gen_lowpart (TImode, x);
23881 if (!insn_data[icode].operand[1].predicate (op0, mode0))
23882 op0 = copy_to_mode_reg (mode0, op0);
23883 if (!insn_data[icode].operand[2].predicate (op1, mode1))
23884 op1 = copy_to_mode_reg (mode1, op1);
23886 pat = GEN_FCN (icode) (target, op0, op1);
23895 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23898 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23899 enum ix86_builtin_func_type m_type,
23900 enum rtx_code sub_code)
23905 bool comparison_p = false;
23907 bool last_arg_constant = false;
23908 int num_memory = 0;
23911 enum machine_mode mode;
23914 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23918 case MULTI_ARG_4_DF2_DI_I:
23919 case MULTI_ARG_4_DF2_DI_I1:
23920 case MULTI_ARG_4_SF2_SI_I:
23921 case MULTI_ARG_4_SF2_SI_I1:
23923 last_arg_constant = true;
23926 case MULTI_ARG_3_SF:
23927 case MULTI_ARG_3_DF:
23928 case MULTI_ARG_3_SF2:
23929 case MULTI_ARG_3_DF2:
23930 case MULTI_ARG_3_DI:
23931 case MULTI_ARG_3_SI:
23932 case MULTI_ARG_3_SI_DI:
23933 case MULTI_ARG_3_HI:
23934 case MULTI_ARG_3_HI_SI:
23935 case MULTI_ARG_3_QI:
23936 case MULTI_ARG_3_DI2:
23937 case MULTI_ARG_3_SI2:
23938 case MULTI_ARG_3_HI2:
23939 case MULTI_ARG_3_QI2:
23943 case MULTI_ARG_2_SF:
23944 case MULTI_ARG_2_DF:
23945 case MULTI_ARG_2_DI:
23946 case MULTI_ARG_2_SI:
23947 case MULTI_ARG_2_HI:
23948 case MULTI_ARG_2_QI:
23952 case MULTI_ARG_2_DI_IMM:
23953 case MULTI_ARG_2_SI_IMM:
23954 case MULTI_ARG_2_HI_IMM:
23955 case MULTI_ARG_2_QI_IMM:
23957 last_arg_constant = true;
23960 case MULTI_ARG_1_SF:
23961 case MULTI_ARG_1_DF:
23962 case MULTI_ARG_1_SF2:
23963 case MULTI_ARG_1_DF2:
23964 case MULTI_ARG_1_DI:
23965 case MULTI_ARG_1_SI:
23966 case MULTI_ARG_1_HI:
23967 case MULTI_ARG_1_QI:
23968 case MULTI_ARG_1_SI_DI:
23969 case MULTI_ARG_1_HI_DI:
23970 case MULTI_ARG_1_HI_SI:
23971 case MULTI_ARG_1_QI_DI:
23972 case MULTI_ARG_1_QI_SI:
23973 case MULTI_ARG_1_QI_HI:
23977 case MULTI_ARG_2_DI_CMP:
23978 case MULTI_ARG_2_SI_CMP:
23979 case MULTI_ARG_2_HI_CMP:
23980 case MULTI_ARG_2_QI_CMP:
23982 comparison_p = true;
23985 case MULTI_ARG_2_SF_TF:
23986 case MULTI_ARG_2_DF_TF:
23987 case MULTI_ARG_2_DI_TF:
23988 case MULTI_ARG_2_SI_TF:
23989 case MULTI_ARG_2_HI_TF:
23990 case MULTI_ARG_2_QI_TF:
23996 gcc_unreachable ();
23999 if (optimize || !target
24000 || GET_MODE (target) != tmode
24001 || !insn_data[icode].operand[0].predicate (target, tmode))
24002 target = gen_reg_rtx (tmode);
24004 gcc_assert (nargs <= 4);
24006 for (i = 0; i < nargs; i++)
24008 tree arg = CALL_EXPR_ARG (exp, i);
24009 rtx op = expand_normal (arg);
24010 int adjust = (comparison_p) ? 1 : 0;
24011 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24013 if (last_arg_constant && i == nargs-1)
24015 if (!CONST_INT_P (op))
24017 error ("last argument must be an immediate");
24018 return gen_reg_rtx (tmode);
24023 if (VECTOR_MODE_P (mode))
24024 op = safe_vector_operand (op, mode);
24026 /* If we aren't optimizing, only allow one memory operand to be
24028 if (memory_operand (op, mode))
24031 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24034 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24036 op = force_reg (mode, op);
24040 args[i].mode = mode;
24046 pat = GEN_FCN (icode) (target, args[0].op);
24051 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24052 GEN_INT ((int)sub_code));
24053 else if (! comparison_p)
24054 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24057 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24061 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24066 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24070 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24074 gcc_unreachable ();
24084 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24085 insns with vec_merge. */
24088 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24092 tree arg0 = CALL_EXPR_ARG (exp, 0);
24093 rtx op1, op0 = expand_normal (arg0);
24094 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24095 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24097 if (optimize || !target
24098 || GET_MODE (target) != tmode
24099 || !insn_data[icode].operand[0].predicate (target, tmode))
24100 target = gen_reg_rtx (tmode);
24102 if (VECTOR_MODE_P (mode0))
24103 op0 = safe_vector_operand (op0, mode0);
24105 if ((optimize && !register_operand (op0, mode0))
24106 || !insn_data[icode].operand[1].predicate (op0, mode0))
24107 op0 = copy_to_mode_reg (mode0, op0);
24110 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24111 op1 = copy_to_mode_reg (mode0, op1);
24113 pat = GEN_FCN (icode) (target, op0, op1);
24120 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24123 ix86_expand_sse_compare (const struct builtin_description *d,
24124 tree exp, rtx target, bool swap)
24127 tree arg0 = CALL_EXPR_ARG (exp, 0);
24128 tree arg1 = CALL_EXPR_ARG (exp, 1);
24129 rtx op0 = expand_normal (arg0);
24130 rtx op1 = expand_normal (arg1);
24132 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24133 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24134 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24135 enum rtx_code comparison = d->comparison;
24137 if (VECTOR_MODE_P (mode0))
24138 op0 = safe_vector_operand (op0, mode0);
24139 if (VECTOR_MODE_P (mode1))
24140 op1 = safe_vector_operand (op1, mode1);
24142 /* Swap operands if we have a comparison that isn't available in
24146 rtx tmp = gen_reg_rtx (mode1);
24147 emit_move_insn (tmp, op1);
24152 if (optimize || !target
24153 || GET_MODE (target) != tmode
24154 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24155 target = gen_reg_rtx (tmode);
24157 if ((optimize && !register_operand (op0, mode0))
24158 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24159 op0 = copy_to_mode_reg (mode0, op0);
24160 if ((optimize && !register_operand (op1, mode1))
24161 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24162 op1 = copy_to_mode_reg (mode1, op1);
24164 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24165 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24172 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24175 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24179 tree arg0 = CALL_EXPR_ARG (exp, 0);
24180 tree arg1 = CALL_EXPR_ARG (exp, 1);
24181 rtx op0 = expand_normal (arg0);
24182 rtx op1 = expand_normal (arg1);
24183 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24184 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24185 enum rtx_code comparison = d->comparison;
24187 if (VECTOR_MODE_P (mode0))
24188 op0 = safe_vector_operand (op0, mode0);
24189 if (VECTOR_MODE_P (mode1))
24190 op1 = safe_vector_operand (op1, mode1);
24192 /* Swap operands if we have a comparison that isn't available in
24194 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24201 target = gen_reg_rtx (SImode);
24202 emit_move_insn (target, const0_rtx);
24203 target = gen_rtx_SUBREG (QImode, target, 0);
24205 if ((optimize && !register_operand (op0, mode0))
24206 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24207 op0 = copy_to_mode_reg (mode0, op0);
24208 if ((optimize && !register_operand (op1, mode1))
24209 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24210 op1 = copy_to_mode_reg (mode1, op1);
24212 pat = GEN_FCN (d->icode) (op0, op1);
24216 emit_insn (gen_rtx_SET (VOIDmode,
24217 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24218 gen_rtx_fmt_ee (comparison, QImode,
24222 return SUBREG_REG (target);
24225 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24228 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24232 tree arg0 = CALL_EXPR_ARG (exp, 0);
24233 tree arg1 = CALL_EXPR_ARG (exp, 1);
24234 rtx op0 = expand_normal (arg0);
24235 rtx op1 = expand_normal (arg1);
24236 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24237 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24238 enum rtx_code comparison = d->comparison;
24240 if (VECTOR_MODE_P (mode0))
24241 op0 = safe_vector_operand (op0, mode0);
24242 if (VECTOR_MODE_P (mode1))
24243 op1 = safe_vector_operand (op1, mode1);
24245 target = gen_reg_rtx (SImode);
24246 emit_move_insn (target, const0_rtx);
24247 target = gen_rtx_SUBREG (QImode, target, 0);
24249 if ((optimize && !register_operand (op0, mode0))
24250 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24251 op0 = copy_to_mode_reg (mode0, op0);
24252 if ((optimize && !register_operand (op1, mode1))
24253 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24254 op1 = copy_to_mode_reg (mode1, op1);
24256 pat = GEN_FCN (d->icode) (op0, op1);
24260 emit_insn (gen_rtx_SET (VOIDmode,
24261 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24262 gen_rtx_fmt_ee (comparison, QImode,
24266 return SUBREG_REG (target);
24269 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24272 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24273 tree exp, rtx target)
24276 tree arg0 = CALL_EXPR_ARG (exp, 0);
24277 tree arg1 = CALL_EXPR_ARG (exp, 1);
24278 tree arg2 = CALL_EXPR_ARG (exp, 2);
24279 tree arg3 = CALL_EXPR_ARG (exp, 3);
24280 tree arg4 = CALL_EXPR_ARG (exp, 4);
24281 rtx scratch0, scratch1;
24282 rtx op0 = expand_normal (arg0);
24283 rtx op1 = expand_normal (arg1);
24284 rtx op2 = expand_normal (arg2);
24285 rtx op3 = expand_normal (arg3);
24286 rtx op4 = expand_normal (arg4);
24287 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24289 tmode0 = insn_data[d->icode].operand[0].mode;
24290 tmode1 = insn_data[d->icode].operand[1].mode;
24291 modev2 = insn_data[d->icode].operand[2].mode;
24292 modei3 = insn_data[d->icode].operand[3].mode;
24293 modev4 = insn_data[d->icode].operand[4].mode;
24294 modei5 = insn_data[d->icode].operand[5].mode;
24295 modeimm = insn_data[d->icode].operand[6].mode;
24297 if (VECTOR_MODE_P (modev2))
24298 op0 = safe_vector_operand (op0, modev2);
24299 if (VECTOR_MODE_P (modev4))
24300 op2 = safe_vector_operand (op2, modev4);
24302 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24303 op0 = copy_to_mode_reg (modev2, op0);
24304 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24305 op1 = copy_to_mode_reg (modei3, op1);
24306 if ((optimize && !register_operand (op2, modev4))
24307 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24308 op2 = copy_to_mode_reg (modev4, op2);
24309 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24310 op3 = copy_to_mode_reg (modei5, op3);
24312 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24314 error ("the fifth argument must be a 8-bit immediate");
24318 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24320 if (optimize || !target
24321 || GET_MODE (target) != tmode0
24322 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24323 target = gen_reg_rtx (tmode0);
24325 scratch1 = gen_reg_rtx (tmode1);
24327 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24329 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24331 if (optimize || !target
24332 || GET_MODE (target) != tmode1
24333 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24334 target = gen_reg_rtx (tmode1);
24336 scratch0 = gen_reg_rtx (tmode0);
24338 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24342 gcc_assert (d->flag);
24344 scratch0 = gen_reg_rtx (tmode0);
24345 scratch1 = gen_reg_rtx (tmode1);
24347 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24357 target = gen_reg_rtx (SImode);
24358 emit_move_insn (target, const0_rtx);
24359 target = gen_rtx_SUBREG (QImode, target, 0);
24362 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24363 gen_rtx_fmt_ee (EQ, QImode,
24364 gen_rtx_REG ((enum machine_mode) d->flag,
24367 return SUBREG_REG (target);
24374 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24377 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24378 tree exp, rtx target)
24381 tree arg0 = CALL_EXPR_ARG (exp, 0);
24382 tree arg1 = CALL_EXPR_ARG (exp, 1);
24383 tree arg2 = CALL_EXPR_ARG (exp, 2);
24384 rtx scratch0, scratch1;
24385 rtx op0 = expand_normal (arg0);
24386 rtx op1 = expand_normal (arg1);
24387 rtx op2 = expand_normal (arg2);
24388 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24390 tmode0 = insn_data[d->icode].operand[0].mode;
24391 tmode1 = insn_data[d->icode].operand[1].mode;
24392 modev2 = insn_data[d->icode].operand[2].mode;
24393 modev3 = insn_data[d->icode].operand[3].mode;
24394 modeimm = insn_data[d->icode].operand[4].mode;
24396 if (VECTOR_MODE_P (modev2))
24397 op0 = safe_vector_operand (op0, modev2);
24398 if (VECTOR_MODE_P (modev3))
24399 op1 = safe_vector_operand (op1, modev3);
24401 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24402 op0 = copy_to_mode_reg (modev2, op0);
24403 if ((optimize && !register_operand (op1, modev3))
24404 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24405 op1 = copy_to_mode_reg (modev3, op1);
24407 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24409 error ("the third argument must be a 8-bit immediate");
24413 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24415 if (optimize || !target
24416 || GET_MODE (target) != tmode0
24417 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24418 target = gen_reg_rtx (tmode0);
24420 scratch1 = gen_reg_rtx (tmode1);
24422 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24424 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24426 if (optimize || !target
24427 || GET_MODE (target) != tmode1
24428 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24429 target = gen_reg_rtx (tmode1);
24431 scratch0 = gen_reg_rtx (tmode0);
24433 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24437 gcc_assert (d->flag);
24439 scratch0 = gen_reg_rtx (tmode0);
24440 scratch1 = gen_reg_rtx (tmode1);
24442 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24452 target = gen_reg_rtx (SImode);
24453 emit_move_insn (target, const0_rtx);
24454 target = gen_rtx_SUBREG (QImode, target, 0);
24457 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24458 gen_rtx_fmt_ee (EQ, QImode,
24459 gen_rtx_REG ((enum machine_mode) d->flag,
24462 return SUBREG_REG (target);
24468 /* Subroutine of ix86_expand_builtin to take care of insns with
24469 variable number of operands. */
24472 ix86_expand_args_builtin (const struct builtin_description *d,
24473 tree exp, rtx target)
24475 rtx pat, real_target;
24476 unsigned int i, nargs;
24477 unsigned int nargs_constant = 0;
24478 int num_memory = 0;
24482 enum machine_mode mode;
24484 bool last_arg_count = false;
24485 enum insn_code icode = d->icode;
24486 const struct insn_data_d *insn_p = &insn_data[icode];
24487 enum machine_mode tmode = insn_p->operand[0].mode;
24488 enum machine_mode rmode = VOIDmode;
24490 enum rtx_code comparison = d->comparison;
24492 switch ((enum ix86_builtin_func_type) d->flag)
24494 case INT_FTYPE_V8SF_V8SF_PTEST:
24495 case INT_FTYPE_V4DI_V4DI_PTEST:
24496 case INT_FTYPE_V4DF_V4DF_PTEST:
24497 case INT_FTYPE_V4SF_V4SF_PTEST:
24498 case INT_FTYPE_V2DI_V2DI_PTEST:
24499 case INT_FTYPE_V2DF_V2DF_PTEST:
24500 return ix86_expand_sse_ptest (d, exp, target);
24501 case FLOAT128_FTYPE_FLOAT128:
24502 case FLOAT_FTYPE_FLOAT:
24503 case INT_FTYPE_INT:
24504 case UINT64_FTYPE_INT:
24505 case UINT16_FTYPE_UINT16:
24506 case INT64_FTYPE_INT64:
24507 case INT64_FTYPE_V4SF:
24508 case INT64_FTYPE_V2DF:
24509 case INT_FTYPE_V16QI:
24510 case INT_FTYPE_V8QI:
24511 case INT_FTYPE_V8SF:
24512 case INT_FTYPE_V4DF:
24513 case INT_FTYPE_V4SF:
24514 case INT_FTYPE_V2DF:
24515 case V16QI_FTYPE_V16QI:
24516 case V8SI_FTYPE_V8SF:
24517 case V8SI_FTYPE_V4SI:
24518 case V8HI_FTYPE_V8HI:
24519 case V8HI_FTYPE_V16QI:
24520 case V8QI_FTYPE_V8QI:
24521 case V8SF_FTYPE_V8SF:
24522 case V8SF_FTYPE_V8SI:
24523 case V8SF_FTYPE_V4SF:
24524 case V8SF_FTYPE_V8HI:
24525 case V4SI_FTYPE_V4SI:
24526 case V4SI_FTYPE_V16QI:
24527 case V4SI_FTYPE_V4SF:
24528 case V4SI_FTYPE_V8SI:
24529 case V4SI_FTYPE_V8HI:
24530 case V4SI_FTYPE_V4DF:
24531 case V4SI_FTYPE_V2DF:
24532 case V4HI_FTYPE_V4HI:
24533 case V4DF_FTYPE_V4DF:
24534 case V4DF_FTYPE_V4SI:
24535 case V4DF_FTYPE_V4SF:
24536 case V4DF_FTYPE_V2DF:
24537 case V4SF_FTYPE_V4SF:
24538 case V4SF_FTYPE_V4SI:
24539 case V4SF_FTYPE_V8SF:
24540 case V4SF_FTYPE_V4DF:
24541 case V4SF_FTYPE_V8HI:
24542 case V4SF_FTYPE_V2DF:
24543 case V2DI_FTYPE_V2DI:
24544 case V2DI_FTYPE_V16QI:
24545 case V2DI_FTYPE_V8HI:
24546 case V2DI_FTYPE_V4SI:
24547 case V2DF_FTYPE_V2DF:
24548 case V2DF_FTYPE_V4SI:
24549 case V2DF_FTYPE_V4DF:
24550 case V2DF_FTYPE_V4SF:
24551 case V2DF_FTYPE_V2SI:
24552 case V2SI_FTYPE_V2SI:
24553 case V2SI_FTYPE_V4SF:
24554 case V2SI_FTYPE_V2SF:
24555 case V2SI_FTYPE_V2DF:
24556 case V2SF_FTYPE_V2SF:
24557 case V2SF_FTYPE_V2SI:
24560 case V4SF_FTYPE_V4SF_VEC_MERGE:
24561 case V2DF_FTYPE_V2DF_VEC_MERGE:
24562 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24563 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24564 case V16QI_FTYPE_V16QI_V16QI:
24565 case V16QI_FTYPE_V8HI_V8HI:
24566 case V8QI_FTYPE_V8QI_V8QI:
24567 case V8QI_FTYPE_V4HI_V4HI:
24568 case V8HI_FTYPE_V8HI_V8HI:
24569 case V8HI_FTYPE_V16QI_V16QI:
24570 case V8HI_FTYPE_V4SI_V4SI:
24571 case V8SF_FTYPE_V8SF_V8SF:
24572 case V8SF_FTYPE_V8SF_V8SI:
24573 case V4SI_FTYPE_V4SI_V4SI:
24574 case V4SI_FTYPE_V8HI_V8HI:
24575 case V4SI_FTYPE_V4SF_V4SF:
24576 case V4SI_FTYPE_V2DF_V2DF:
24577 case V4HI_FTYPE_V4HI_V4HI:
24578 case V4HI_FTYPE_V8QI_V8QI:
24579 case V4HI_FTYPE_V2SI_V2SI:
24580 case V4DF_FTYPE_V4DF_V4DF:
24581 case V4DF_FTYPE_V4DF_V4DI:
24582 case V4SF_FTYPE_V4SF_V4SF:
24583 case V4SF_FTYPE_V4SF_V4SI:
24584 case V4SF_FTYPE_V4SF_V2SI:
24585 case V4SF_FTYPE_V4SF_V2DF:
24586 case V4SF_FTYPE_V4SF_DI:
24587 case V4SF_FTYPE_V4SF_SI:
24588 case V2DI_FTYPE_V2DI_V2DI:
24589 case V2DI_FTYPE_V16QI_V16QI:
24590 case V2DI_FTYPE_V4SI_V4SI:
24591 case V2DI_FTYPE_V2DI_V16QI:
24592 case V2DI_FTYPE_V2DF_V2DF:
24593 case V2SI_FTYPE_V2SI_V2SI:
24594 case V2SI_FTYPE_V4HI_V4HI:
24595 case V2SI_FTYPE_V2SF_V2SF:
24596 case V2DF_FTYPE_V2DF_V2DF:
24597 case V2DF_FTYPE_V2DF_V4SF:
24598 case V2DF_FTYPE_V2DF_V2DI:
24599 case V2DF_FTYPE_V2DF_DI:
24600 case V2DF_FTYPE_V2DF_SI:
24601 case V2SF_FTYPE_V2SF_V2SF:
24602 case V1DI_FTYPE_V1DI_V1DI:
24603 case V1DI_FTYPE_V8QI_V8QI:
24604 case V1DI_FTYPE_V2SI_V2SI:
24605 if (comparison == UNKNOWN)
24606 return ix86_expand_binop_builtin (icode, exp, target);
24609 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24610 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24611 gcc_assert (comparison != UNKNOWN);
24615 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24616 case V8HI_FTYPE_V8HI_SI_COUNT:
24617 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24618 case V4SI_FTYPE_V4SI_SI_COUNT:
24619 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24620 case V4HI_FTYPE_V4HI_SI_COUNT:
24621 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24622 case V2DI_FTYPE_V2DI_SI_COUNT:
24623 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24624 case V2SI_FTYPE_V2SI_SI_COUNT:
24625 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24626 case V1DI_FTYPE_V1DI_SI_COUNT:
24628 last_arg_count = true;
24630 case UINT64_FTYPE_UINT64_UINT64:
24631 case UINT_FTYPE_UINT_UINT:
24632 case UINT_FTYPE_UINT_USHORT:
24633 case UINT_FTYPE_UINT_UCHAR:
24634 case UINT16_FTYPE_UINT16_INT:
24635 case UINT8_FTYPE_UINT8_INT:
24638 case V2DI_FTYPE_V2DI_INT_CONVERT:
24641 nargs_constant = 1;
24643 case V8HI_FTYPE_V8HI_INT:
24644 case V8HI_FTYPE_V8SF_INT:
24645 case V8HI_FTYPE_V4SF_INT:
24646 case V8SF_FTYPE_V8SF_INT:
24647 case V4SI_FTYPE_V4SI_INT:
24648 case V4SI_FTYPE_V8SI_INT:
24649 case V4HI_FTYPE_V4HI_INT:
24650 case V4DF_FTYPE_V4DF_INT:
24651 case V4SF_FTYPE_V4SF_INT:
24652 case V4SF_FTYPE_V8SF_INT:
24653 case V2DI_FTYPE_V2DI_INT:
24654 case V2DF_FTYPE_V2DF_INT:
24655 case V2DF_FTYPE_V4DF_INT:
24657 nargs_constant = 1;
24659 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24660 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24661 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24662 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24663 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24666 case V16QI_FTYPE_V16QI_V16QI_INT:
24667 case V8HI_FTYPE_V8HI_V8HI_INT:
24668 case V8SI_FTYPE_V8SI_V8SI_INT:
24669 case V8SI_FTYPE_V8SI_V4SI_INT:
24670 case V8SF_FTYPE_V8SF_V8SF_INT:
24671 case V8SF_FTYPE_V8SF_V4SF_INT:
24672 case V4SI_FTYPE_V4SI_V4SI_INT:
24673 case V4DF_FTYPE_V4DF_V4DF_INT:
24674 case V4DF_FTYPE_V4DF_V2DF_INT:
24675 case V4SF_FTYPE_V4SF_V4SF_INT:
24676 case V2DI_FTYPE_V2DI_V2DI_INT:
24677 case V2DF_FTYPE_V2DF_V2DF_INT:
24679 nargs_constant = 1;
24681 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
24684 nargs_constant = 1;
24686 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
24689 nargs_constant = 1;
24691 case V2DI_FTYPE_V2DI_UINT_UINT:
24693 nargs_constant = 2;
24695 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
24696 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
24697 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
24698 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
24700 nargs_constant = 1;
24702 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24704 nargs_constant = 2;
24707 gcc_unreachable ();
24710 gcc_assert (nargs <= ARRAY_SIZE (args));
24712 if (comparison != UNKNOWN)
24714 gcc_assert (nargs == 2);
24715 return ix86_expand_sse_compare (d, exp, target, swap);
24718 if (rmode == VOIDmode || rmode == tmode)
24722 || GET_MODE (target) != tmode
24723 || !insn_p->operand[0].predicate (target, tmode))
24724 target = gen_reg_rtx (tmode);
24725 real_target = target;
24729 target = gen_reg_rtx (rmode);
24730 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24733 for (i = 0; i < nargs; i++)
24735 tree arg = CALL_EXPR_ARG (exp, i);
24736 rtx op = expand_normal (arg);
24737 enum machine_mode mode = insn_p->operand[i + 1].mode;
24738 bool match = insn_p->operand[i + 1].predicate (op, mode);
24740 if (last_arg_count && (i + 1) == nargs)
24742 /* SIMD shift insns take either an 8-bit immediate or
24743 register as count. But builtin functions take int as
24744 count. If count doesn't match, we put it in register. */
24747 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24748 if (!insn_p->operand[i + 1].predicate (op, mode))
24749 op = copy_to_reg (op);
24752 else if ((nargs - i) <= nargs_constant)
24757 case CODE_FOR_sse4_1_roundpd:
24758 case CODE_FOR_sse4_1_roundps:
24759 case CODE_FOR_sse4_1_roundsd:
24760 case CODE_FOR_sse4_1_roundss:
24761 case CODE_FOR_sse4_1_blendps:
24762 case CODE_FOR_avx_blendpd256:
24763 case CODE_FOR_avx_vpermilv4df:
24764 case CODE_FOR_avx_roundpd256:
24765 case CODE_FOR_avx_roundps256:
24766 error ("the last argument must be a 4-bit immediate");
24769 case CODE_FOR_sse4_1_blendpd:
24770 case CODE_FOR_avx_vpermilv2df:
24771 case CODE_FOR_xop_vpermil2v2df3:
24772 case CODE_FOR_xop_vpermil2v4sf3:
24773 case CODE_FOR_xop_vpermil2v4df3:
24774 case CODE_FOR_xop_vpermil2v8sf3:
24775 error ("the last argument must be a 2-bit immediate");
24778 case CODE_FOR_avx_vextractf128v4df:
24779 case CODE_FOR_avx_vextractf128v8sf:
24780 case CODE_FOR_avx_vextractf128v8si:
24781 case CODE_FOR_avx_vinsertf128v4df:
24782 case CODE_FOR_avx_vinsertf128v8sf:
24783 case CODE_FOR_avx_vinsertf128v8si:
24784 error ("the last argument must be a 1-bit immediate");
24787 case CODE_FOR_avx_cmpsdv2df3:
24788 case CODE_FOR_avx_cmpssv4sf3:
24789 case CODE_FOR_avx_cmppdv2df3:
24790 case CODE_FOR_avx_cmppsv4sf3:
24791 case CODE_FOR_avx_cmppdv4df3:
24792 case CODE_FOR_avx_cmppsv8sf3:
24793 error ("the last argument must be a 5-bit immediate");
24797 switch (nargs_constant)
24800 if ((nargs - i) == nargs_constant)
24802 error ("the next to last argument must be an 8-bit immediate");
24806 error ("the last argument must be an 8-bit immediate");
24809 gcc_unreachable ();
24816 if (VECTOR_MODE_P (mode))
24817 op = safe_vector_operand (op, mode);
24819 /* If we aren't optimizing, only allow one memory operand to
24821 if (memory_operand (op, mode))
24824 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24826 if (optimize || !match || num_memory > 1)
24827 op = copy_to_mode_reg (mode, op);
24831 op = copy_to_reg (op);
24832 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24837 args[i].mode = mode;
24843 pat = GEN_FCN (icode) (real_target, args[0].op);
24846 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24849 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24853 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24854 args[2].op, args[3].op);
24857 gcc_unreachable ();
24867 /* Subroutine of ix86_expand_builtin to take care of special insns
24868 with variable number of operands. */
24871 ix86_expand_special_args_builtin (const struct builtin_description *d,
24872 tree exp, rtx target)
24876 unsigned int i, nargs, arg_adjust, memory;
24880 enum machine_mode mode;
24882 enum insn_code icode = d->icode;
24883 bool last_arg_constant = false;
24884 const struct insn_data_d *insn_p = &insn_data[icode];
24885 enum machine_mode tmode = insn_p->operand[0].mode;
24886 enum { load, store } klass;
24888 switch ((enum ix86_builtin_func_type) d->flag)
24890 case VOID_FTYPE_VOID:
24891 emit_insn (GEN_FCN (icode) (target));
24893 case VOID_FTYPE_UINT64:
24894 case VOID_FTYPE_UNSIGNED:
24900 case UINT64_FTYPE_VOID:
24901 case UNSIGNED_FTYPE_VOID:
24902 case UINT16_FTYPE_VOID:
24907 case UINT64_FTYPE_PUNSIGNED:
24908 case V2DI_FTYPE_PV2DI:
24909 case V32QI_FTYPE_PCCHAR:
24910 case V16QI_FTYPE_PCCHAR:
24911 case V8SF_FTYPE_PCV4SF:
24912 case V8SF_FTYPE_PCFLOAT:
24913 case V4SF_FTYPE_PCFLOAT:
24914 case V4DF_FTYPE_PCV2DF:
24915 case V4DF_FTYPE_PCDOUBLE:
24916 case V2DF_FTYPE_PCDOUBLE:
24917 case VOID_FTYPE_PVOID:
24922 case VOID_FTYPE_PV2SF_V4SF:
24923 case VOID_FTYPE_PV4DI_V4DI:
24924 case VOID_FTYPE_PV2DI_V2DI:
24925 case VOID_FTYPE_PCHAR_V32QI:
24926 case VOID_FTYPE_PCHAR_V16QI:
24927 case VOID_FTYPE_PFLOAT_V8SF:
24928 case VOID_FTYPE_PFLOAT_V4SF:
24929 case VOID_FTYPE_PDOUBLE_V4DF:
24930 case VOID_FTYPE_PDOUBLE_V2DF:
24931 case VOID_FTYPE_PULONGLONG_ULONGLONG:
24932 case VOID_FTYPE_PINT_INT:
24935 /* Reserve memory operand for target. */
24936 memory = ARRAY_SIZE (args);
24938 case V4SF_FTYPE_V4SF_PCV2SF:
24939 case V2DF_FTYPE_V2DF_PCDOUBLE:
24944 case V8SF_FTYPE_PCV8SF_V8SF:
24945 case V4DF_FTYPE_PCV4DF_V4DF:
24946 case V4SF_FTYPE_PCV4SF_V4SF:
24947 case V2DF_FTYPE_PCV2DF_V2DF:
24952 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24953 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24954 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24955 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24958 /* Reserve memory operand for target. */
24959 memory = ARRAY_SIZE (args);
24961 case VOID_FTYPE_UINT_UINT_UINT:
24962 case VOID_FTYPE_UINT64_UINT_UINT:
24963 case UCHAR_FTYPE_UINT_UINT_UINT:
24964 case UCHAR_FTYPE_UINT64_UINT_UINT:
24967 memory = ARRAY_SIZE (args);
24968 last_arg_constant = true;
24971 gcc_unreachable ();
24974 gcc_assert (nargs <= ARRAY_SIZE (args));
24976 if (klass == store)
24978 arg = CALL_EXPR_ARG (exp, 0);
24979 op = expand_normal (arg);
24980 gcc_assert (target == 0);
24982 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24984 target = force_reg (tmode, op);
24992 || GET_MODE (target) != tmode
24993 || !insn_p->operand[0].predicate (target, tmode))
24994 target = gen_reg_rtx (tmode);
24997 for (i = 0; i < nargs; i++)
24999 enum machine_mode mode = insn_p->operand[i + 1].mode;
25002 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25003 op = expand_normal (arg);
25004 match = insn_p->operand[i + 1].predicate (op, mode);
25006 if (last_arg_constant && (i + 1) == nargs)
25010 if (icode == CODE_FOR_lwp_lwpvalsi3
25011 || icode == CODE_FOR_lwp_lwpinssi3
25012 || icode == CODE_FOR_lwp_lwpvaldi3
25013 || icode == CODE_FOR_lwp_lwpinsdi3)
25014 error ("the last argument must be a 32-bit immediate");
25016 error ("the last argument must be an 8-bit immediate");
25024 /* This must be the memory operand. */
25025 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25026 gcc_assert (GET_MODE (op) == mode
25027 || GET_MODE (op) == VOIDmode);
25031 /* This must be register. */
25032 if (VECTOR_MODE_P (mode))
25033 op = safe_vector_operand (op, mode);
25035 gcc_assert (GET_MODE (op) == mode
25036 || GET_MODE (op) == VOIDmode);
25037 op = copy_to_mode_reg (mode, op);
25042 args[i].mode = mode;
25048 pat = GEN_FCN (icode) (target);
25051 pat = GEN_FCN (icode) (target, args[0].op);
25054 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25057 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25060 gcc_unreachable ();
25066 return klass == store ? 0 : target;
25069 /* Return the integer constant in ARG. Constrain it to be in the range
25070 of the subparts of VEC_TYPE; issue an error if not. */
25073 get_element_number (tree vec_type, tree arg)
25075 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25077 if (!host_integerp (arg, 1)
25078 || (elt = tree_low_cst (arg, 1), elt > max))
25080 error ("selector must be an integer constant in the range 0..%wi", max);
25087 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25088 ix86_expand_vector_init. We DO have language-level syntax for this, in
25089 the form of (type){ init-list }. Except that since we can't place emms
25090 instructions from inside the compiler, we can't allow the use of MMX
25091 registers unless the user explicitly asks for it. So we do *not* define
25092 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25093 we have builtins invoked by mmintrin.h that gives us license to emit
25094 these sorts of instructions. */
25097 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25099 enum machine_mode tmode = TYPE_MODE (type);
25100 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25101 int i, n_elt = GET_MODE_NUNITS (tmode);
25102 rtvec v = rtvec_alloc (n_elt);
25104 gcc_assert (VECTOR_MODE_P (tmode));
25105 gcc_assert (call_expr_nargs (exp) == n_elt);
25107 for (i = 0; i < n_elt; ++i)
25109 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25110 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25113 if (!target || !register_operand (target, tmode))
25114 target = gen_reg_rtx (tmode);
25116 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25120 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25121 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25122 had a language-level syntax for referencing vector elements. */
25125 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25127 enum machine_mode tmode, mode0;
25132 arg0 = CALL_EXPR_ARG (exp, 0);
25133 arg1 = CALL_EXPR_ARG (exp, 1);
25135 op0 = expand_normal (arg0);
25136 elt = get_element_number (TREE_TYPE (arg0), arg1);
25138 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25139 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25140 gcc_assert (VECTOR_MODE_P (mode0));
25142 op0 = force_reg (mode0, op0);
25144 if (optimize || !target || !register_operand (target, tmode))
25145 target = gen_reg_rtx (tmode);
25147 ix86_expand_vector_extract (true, target, op0, elt);
25152 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25153 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25154 a language-level syntax for referencing vector elements. */
25157 ix86_expand_vec_set_builtin (tree exp)
25159 enum machine_mode tmode, mode1;
25160 tree arg0, arg1, arg2;
25162 rtx op0, op1, target;
25164 arg0 = CALL_EXPR_ARG (exp, 0);
25165 arg1 = CALL_EXPR_ARG (exp, 1);
25166 arg2 = CALL_EXPR_ARG (exp, 2);
25168 tmode = TYPE_MODE (TREE_TYPE (arg0));
25169 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25170 gcc_assert (VECTOR_MODE_P (tmode));
25172 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25173 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25174 elt = get_element_number (TREE_TYPE (arg0), arg2);
25176 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25177 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25179 op0 = force_reg (tmode, op0);
25180 op1 = force_reg (mode1, op1);
25182 /* OP0 is the source of these builtin functions and shouldn't be
25183 modified. Create a copy, use it and return it as target. */
25184 target = gen_reg_rtx (tmode);
25185 emit_move_insn (target, op0);
25186 ix86_expand_vector_set (true, target, op1, elt);
25191 /* Expand an expression EXP that calls a built-in function,
25192 with result going to TARGET if that's convenient
25193 (and in mode MODE if that's convenient).
25194 SUBTARGET may be used as the target for computing one of EXP's operands.
25195 IGNORE is nonzero if the value is to be ignored. */
25198 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25199 enum machine_mode mode ATTRIBUTE_UNUSED,
25200 int ignore ATTRIBUTE_UNUSED)
25202 const struct builtin_description *d;
25204 enum insn_code icode;
25205 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25206 tree arg0, arg1, arg2;
25207 rtx op0, op1, op2, pat;
25208 enum machine_mode mode0, mode1, mode2;
25209 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25211 /* Determine whether the builtin function is available under the current ISA.
25212 Originally the builtin was not created if it wasn't applicable to the
25213 current ISA based on the command line switches. With function specific
25214 options, we need to check in the context of the function making the call
25215 whether it is supported. */
25216 if (ix86_builtins_isa[fcode].isa
25217 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25219 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25220 NULL, NULL, false);
25223 error ("%qE needs unknown isa option", fndecl);
25226 gcc_assert (opts != NULL);
25227 error ("%qE needs isa option %s", fndecl, opts);
25235 case IX86_BUILTIN_MASKMOVQ:
25236 case IX86_BUILTIN_MASKMOVDQU:
25237 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25238 ? CODE_FOR_mmx_maskmovq
25239 : CODE_FOR_sse2_maskmovdqu);
25240 /* Note the arg order is different from the operand order. */
25241 arg1 = CALL_EXPR_ARG (exp, 0);
25242 arg2 = CALL_EXPR_ARG (exp, 1);
25243 arg0 = CALL_EXPR_ARG (exp, 2);
25244 op0 = expand_normal (arg0);
25245 op1 = expand_normal (arg1);
25246 op2 = expand_normal (arg2);
25247 mode0 = insn_data[icode].operand[0].mode;
25248 mode1 = insn_data[icode].operand[1].mode;
25249 mode2 = insn_data[icode].operand[2].mode;
25251 op0 = force_reg (Pmode, op0);
25252 op0 = gen_rtx_MEM (mode1, op0);
25254 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25255 op0 = copy_to_mode_reg (mode0, op0);
25256 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25257 op1 = copy_to_mode_reg (mode1, op1);
25258 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25259 op2 = copy_to_mode_reg (mode2, op2);
25260 pat = GEN_FCN (icode) (op0, op1, op2);
25266 case IX86_BUILTIN_LDMXCSR:
25267 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25268 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25269 emit_move_insn (target, op0);
25270 emit_insn (gen_sse_ldmxcsr (target));
25273 case IX86_BUILTIN_STMXCSR:
25274 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25275 emit_insn (gen_sse_stmxcsr (target));
25276 return copy_to_mode_reg (SImode, target);
25278 case IX86_BUILTIN_CLFLUSH:
25279 arg0 = CALL_EXPR_ARG (exp, 0);
25280 op0 = expand_normal (arg0);
25281 icode = CODE_FOR_sse2_clflush;
25282 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25283 op0 = copy_to_mode_reg (Pmode, op0);
25285 emit_insn (gen_sse2_clflush (op0));
25288 case IX86_BUILTIN_MONITOR:
25289 arg0 = CALL_EXPR_ARG (exp, 0);
25290 arg1 = CALL_EXPR_ARG (exp, 1);
25291 arg2 = CALL_EXPR_ARG (exp, 2);
25292 op0 = expand_normal (arg0);
25293 op1 = expand_normal (arg1);
25294 op2 = expand_normal (arg2);
25296 op0 = copy_to_mode_reg (Pmode, op0);
25298 op1 = copy_to_mode_reg (SImode, op1);
25300 op2 = copy_to_mode_reg (SImode, op2);
25301 emit_insn (ix86_gen_monitor (op0, op1, op2));
25304 case IX86_BUILTIN_MWAIT:
25305 arg0 = CALL_EXPR_ARG (exp, 0);
25306 arg1 = CALL_EXPR_ARG (exp, 1);
25307 op0 = expand_normal (arg0);
25308 op1 = expand_normal (arg1);
25310 op0 = copy_to_mode_reg (SImode, op0);
25312 op1 = copy_to_mode_reg (SImode, op1);
25313 emit_insn (gen_sse3_mwait (op0, op1));
25316 case IX86_BUILTIN_VEC_INIT_V2SI:
25317 case IX86_BUILTIN_VEC_INIT_V4HI:
25318 case IX86_BUILTIN_VEC_INIT_V8QI:
25319 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25321 case IX86_BUILTIN_VEC_EXT_V2DF:
25322 case IX86_BUILTIN_VEC_EXT_V2DI:
25323 case IX86_BUILTIN_VEC_EXT_V4SF:
25324 case IX86_BUILTIN_VEC_EXT_V4SI:
25325 case IX86_BUILTIN_VEC_EXT_V8HI:
25326 case IX86_BUILTIN_VEC_EXT_V2SI:
25327 case IX86_BUILTIN_VEC_EXT_V4HI:
25328 case IX86_BUILTIN_VEC_EXT_V16QI:
25329 return ix86_expand_vec_ext_builtin (exp, target);
25331 case IX86_BUILTIN_VEC_SET_V2DI:
25332 case IX86_BUILTIN_VEC_SET_V4SF:
25333 case IX86_BUILTIN_VEC_SET_V4SI:
25334 case IX86_BUILTIN_VEC_SET_V8HI:
25335 case IX86_BUILTIN_VEC_SET_V4HI:
25336 case IX86_BUILTIN_VEC_SET_V16QI:
25337 return ix86_expand_vec_set_builtin (exp);
25339 case IX86_BUILTIN_VEC_PERM_V2DF:
25340 case IX86_BUILTIN_VEC_PERM_V4SF:
25341 case IX86_BUILTIN_VEC_PERM_V2DI:
25342 case IX86_BUILTIN_VEC_PERM_V4SI:
25343 case IX86_BUILTIN_VEC_PERM_V8HI:
25344 case IX86_BUILTIN_VEC_PERM_V16QI:
25345 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25346 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25347 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25348 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25349 case IX86_BUILTIN_VEC_PERM_V4DF:
25350 case IX86_BUILTIN_VEC_PERM_V8SF:
25351 return ix86_expand_vec_perm_builtin (exp);
25353 case IX86_BUILTIN_INFQ:
25354 case IX86_BUILTIN_HUGE_VALQ:
25356 REAL_VALUE_TYPE inf;
25360 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25362 tmp = validize_mem (force_const_mem (mode, tmp));
25365 target = gen_reg_rtx (mode);
25367 emit_move_insn (target, tmp);
25371 case IX86_BUILTIN_LLWPCB:
25372 arg0 = CALL_EXPR_ARG (exp, 0);
25373 op0 = expand_normal (arg0);
25374 icode = CODE_FOR_lwp_llwpcb;
25375 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25376 op0 = copy_to_mode_reg (Pmode, op0);
25377 emit_insn (gen_lwp_llwpcb (op0));
25380 case IX86_BUILTIN_SLWPCB:
25381 icode = CODE_FOR_lwp_slwpcb;
25383 || !insn_data[icode].operand[0].predicate (target, Pmode))
25384 target = gen_reg_rtx (Pmode);
25385 emit_insn (gen_lwp_slwpcb (target));
25392 for (i = 0, d = bdesc_special_args;
25393 i < ARRAY_SIZE (bdesc_special_args);
25395 if (d->code == fcode)
25396 return ix86_expand_special_args_builtin (d, exp, target);
25398 for (i = 0, d = bdesc_args;
25399 i < ARRAY_SIZE (bdesc_args);
25401 if (d->code == fcode)
25404 case IX86_BUILTIN_FABSQ:
25405 case IX86_BUILTIN_COPYSIGNQ:
25407 /* Emit a normal call if SSE2 isn't available. */
25408 return expand_call (exp, target, ignore);
25410 return ix86_expand_args_builtin (d, exp, target);
25413 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25414 if (d->code == fcode)
25415 return ix86_expand_sse_comi (d, exp, target);
25417 for (i = 0, d = bdesc_pcmpestr;
25418 i < ARRAY_SIZE (bdesc_pcmpestr);
25420 if (d->code == fcode)
25421 return ix86_expand_sse_pcmpestr (d, exp, target);
25423 for (i = 0, d = bdesc_pcmpistr;
25424 i < ARRAY_SIZE (bdesc_pcmpistr);
25426 if (d->code == fcode)
25427 return ix86_expand_sse_pcmpistr (d, exp, target);
25429 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25430 if (d->code == fcode)
25431 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25432 (enum ix86_builtin_func_type)
25433 d->flag, d->comparison);
25435 gcc_unreachable ();
25438 /* Returns a function decl for a vectorized version of the builtin function
25439 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25440 if it is not available. */
25443 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25446 enum machine_mode in_mode, out_mode;
25448 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25450 if (TREE_CODE (type_out) != VECTOR_TYPE
25451 || TREE_CODE (type_in) != VECTOR_TYPE
25452 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25455 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25456 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25457 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25458 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25462 case BUILT_IN_SQRT:
25463 if (out_mode == DFmode && out_n == 2
25464 && in_mode == DFmode && in_n == 2)
25465 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25468 case BUILT_IN_SQRTF:
25469 if (out_mode == SFmode && out_n == 4
25470 && in_mode == SFmode && in_n == 4)
25471 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25474 case BUILT_IN_LRINT:
25475 if (out_mode == SImode && out_n == 4
25476 && in_mode == DFmode && in_n == 2)
25477 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25480 case BUILT_IN_LRINTF:
25481 if (out_mode == SImode && out_n == 4
25482 && in_mode == SFmode && in_n == 4)
25483 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25486 case BUILT_IN_COPYSIGN:
25487 if (out_mode == DFmode && out_n == 2
25488 && in_mode == DFmode && in_n == 2)
25489 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
25492 case BUILT_IN_COPYSIGNF:
25493 if (out_mode == SFmode && out_n == 4
25494 && in_mode == SFmode && in_n == 4)
25495 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
25502 /* Dispatch to a handler for a vectorization library. */
25503 if (ix86_veclib_handler)
25504 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
25510 /* Handler for an SVML-style interface to
25511 a library with vectorized intrinsics. */
25514 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25517 tree fntype, new_fndecl, args;
25520 enum machine_mode el_mode, in_mode;
25523 /* The SVML is suitable for unsafe math only. */
25524 if (!flag_unsafe_math_optimizations)
25527 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25528 n = TYPE_VECTOR_SUBPARTS (type_out);
25529 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25530 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25531 if (el_mode != in_mode
25539 case BUILT_IN_LOG10:
25541 case BUILT_IN_TANH:
25543 case BUILT_IN_ATAN:
25544 case BUILT_IN_ATAN2:
25545 case BUILT_IN_ATANH:
25546 case BUILT_IN_CBRT:
25547 case BUILT_IN_SINH:
25549 case BUILT_IN_ASINH:
25550 case BUILT_IN_ASIN:
25551 case BUILT_IN_COSH:
25553 case BUILT_IN_ACOSH:
25554 case BUILT_IN_ACOS:
25555 if (el_mode != DFmode || n != 2)
25559 case BUILT_IN_EXPF:
25560 case BUILT_IN_LOGF:
25561 case BUILT_IN_LOG10F:
25562 case BUILT_IN_POWF:
25563 case BUILT_IN_TANHF:
25564 case BUILT_IN_TANF:
25565 case BUILT_IN_ATANF:
25566 case BUILT_IN_ATAN2F:
25567 case BUILT_IN_ATANHF:
25568 case BUILT_IN_CBRTF:
25569 case BUILT_IN_SINHF:
25570 case BUILT_IN_SINF:
25571 case BUILT_IN_ASINHF:
25572 case BUILT_IN_ASINF:
25573 case BUILT_IN_COSHF:
25574 case BUILT_IN_COSF:
25575 case BUILT_IN_ACOSHF:
25576 case BUILT_IN_ACOSF:
25577 if (el_mode != SFmode || n != 4)
25585 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25587 if (fn == BUILT_IN_LOGF)
25588 strcpy (name, "vmlsLn4");
25589 else if (fn == BUILT_IN_LOG)
25590 strcpy (name, "vmldLn2");
25593 sprintf (name, "vmls%s", bname+10);
25594 name[strlen (name)-1] = '4';
25597 sprintf (name, "vmld%s2", bname+10);
25599 /* Convert to uppercase. */
25603 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25604 args = TREE_CHAIN (args))
25608 fntype = build_function_type_list (type_out, type_in, NULL);
25610 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25612 /* Build a function declaration for the vectorized function. */
25613 new_fndecl = build_decl (BUILTINS_LOCATION,
25614 FUNCTION_DECL, get_identifier (name), fntype);
25615 TREE_PUBLIC (new_fndecl) = 1;
25616 DECL_EXTERNAL (new_fndecl) = 1;
25617 DECL_IS_NOVOPS (new_fndecl) = 1;
25618 TREE_READONLY (new_fndecl) = 1;
25623 /* Handler for an ACML-style interface to
25624 a library with vectorized intrinsics. */
25627 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25629 char name[20] = "__vr.._";
25630 tree fntype, new_fndecl, args;
25633 enum machine_mode el_mode, in_mode;
25636 /* The ACML is 64bits only and suitable for unsafe math only as
25637 it does not correctly support parts of IEEE with the required
25638 precision such as denormals. */
25640 || !flag_unsafe_math_optimizations)
25643 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25644 n = TYPE_VECTOR_SUBPARTS (type_out);
25645 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25646 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25647 if (el_mode != in_mode
25657 case BUILT_IN_LOG2:
25658 case BUILT_IN_LOG10:
25661 if (el_mode != DFmode
25666 case BUILT_IN_SINF:
25667 case BUILT_IN_COSF:
25668 case BUILT_IN_EXPF:
25669 case BUILT_IN_POWF:
25670 case BUILT_IN_LOGF:
25671 case BUILT_IN_LOG2F:
25672 case BUILT_IN_LOG10F:
25675 if (el_mode != SFmode
25684 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25685 sprintf (name + 7, "%s", bname+10);
25688 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25689 args = TREE_CHAIN (args))
25693 fntype = build_function_type_list (type_out, type_in, NULL);
25695 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25697 /* Build a function declaration for the vectorized function. */
25698 new_fndecl = build_decl (BUILTINS_LOCATION,
25699 FUNCTION_DECL, get_identifier (name), fntype);
25700 TREE_PUBLIC (new_fndecl) = 1;
25701 DECL_EXTERNAL (new_fndecl) = 1;
25702 DECL_IS_NOVOPS (new_fndecl) = 1;
25703 TREE_READONLY (new_fndecl) = 1;
25709 /* Returns a decl of a function that implements conversion of an integer vector
25710 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
25711 are the types involved when converting according to CODE.
25712 Return NULL_TREE if it is not available. */
25715 ix86_vectorize_builtin_conversion (unsigned int code,
25716 tree dest_type, tree src_type)
25724 switch (TYPE_MODE (src_type))
25727 switch (TYPE_MODE (dest_type))
25730 return (TYPE_UNSIGNED (src_type)
25731 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
25732 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25734 return (TYPE_UNSIGNED (src_type)
25736 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
25742 switch (TYPE_MODE (dest_type))
25745 return (TYPE_UNSIGNED (src_type)
25747 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25756 case FIX_TRUNC_EXPR:
25757 switch (TYPE_MODE (dest_type))
25760 switch (TYPE_MODE (src_type))
25763 return (TYPE_UNSIGNED (dest_type)
25765 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
25767 return (TYPE_UNSIGNED (dest_type)
25769 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
25776 switch (TYPE_MODE (src_type))
25779 return (TYPE_UNSIGNED (dest_type)
25781 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
25798 /* Returns a code for a target-specific builtin that implements
25799 reciprocal of the function, or NULL_TREE if not available. */
25802 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25803 bool sqrt ATTRIBUTE_UNUSED)
25805 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
25806 && flag_finite_math_only && !flag_trapping_math
25807 && flag_unsafe_math_optimizations))
25811 /* Machine dependent builtins. */
25814 /* Vectorized version of sqrt to rsqrt conversion. */
25815 case IX86_BUILTIN_SQRTPS_NR:
25816 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25822 /* Normal builtins. */
25825 /* Sqrt to rsqrt conversion. */
25826 case BUILT_IN_SQRTF:
25827 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25834 /* Helper for avx_vpermilps256_operand et al. This is also used by
25835 the expansion functions to turn the parallel back into a mask.
25836 The return value is 0 for no match and the imm8+1 for a match. */
25839 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
25841 unsigned i, nelt = GET_MODE_NUNITS (mode);
25843 unsigned char ipar[8];
25845 if (XVECLEN (par, 0) != (int) nelt)
25848 /* Validate that all of the elements are constants, and not totally
25849 out of range. Copy the data into an integral array to make the
25850 subsequent checks easier. */
25851 for (i = 0; i < nelt; ++i)
25853 rtx er = XVECEXP (par, 0, i);
25854 unsigned HOST_WIDE_INT ei;
25856 if (!CONST_INT_P (er))
25867 /* In the 256-bit DFmode case, we can only move elements within
25869 for (i = 0; i < 2; ++i)
25873 mask |= ipar[i] << i;
25875 for (i = 2; i < 4; ++i)
25879 mask |= (ipar[i] - 2) << i;
25884 /* In the 256-bit SFmode case, we have full freedom of movement
25885 within the low 128-bit lane, but the high 128-bit lane must
25886 mirror the exact same pattern. */
25887 for (i = 0; i < 4; ++i)
25888 if (ipar[i] + 4 != ipar[i + 4])
25895 /* In the 128-bit case, we've full freedom in the placement of
25896 the elements from the source operand. */
25897 for (i = 0; i < nelt; ++i)
25898 mask |= ipar[i] << (i * (nelt / 2));
25902 gcc_unreachable ();
25905 /* Make sure success has a non-zero value by adding one. */
25909 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
25910 the expansion functions to turn the parallel back into a mask.
25911 The return value is 0 for no match and the imm8+1 for a match. */
25914 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
25916 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
25918 unsigned char ipar[8];
25920 if (XVECLEN (par, 0) != (int) nelt)
25923 /* Validate that all of the elements are constants, and not totally
25924 out of range. Copy the data into an integral array to make the
25925 subsequent checks easier. */
25926 for (i = 0; i < nelt; ++i)
25928 rtx er = XVECEXP (par, 0, i);
25929 unsigned HOST_WIDE_INT ei;
25931 if (!CONST_INT_P (er))
25934 if (ei >= 2 * nelt)
25939 /* Validate that the halves of the permute are halves. */
25940 for (i = 0; i < nelt2 - 1; ++i)
25941 if (ipar[i] + 1 != ipar[i + 1])
25943 for (i = nelt2; i < nelt - 1; ++i)
25944 if (ipar[i] + 1 != ipar[i + 1])
25947 /* Reconstruct the mask. */
25948 for (i = 0; i < 2; ++i)
25950 unsigned e = ipar[i * nelt2];
25954 mask |= e << (i * 4);
25957 /* Make sure success has a non-zero value by adding one. */
25962 /* Store OPERAND to the memory after reload is completed. This means
25963 that we can't easily use assign_stack_local. */
25965 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25969 gcc_assert (reload_completed);
25970 if (ix86_using_red_zone ())
25972 result = gen_rtx_MEM (mode,
25973 gen_rtx_PLUS (Pmode,
25975 GEN_INT (-RED_ZONE_SIZE)));
25976 emit_move_insn (result, operand);
25978 else if (TARGET_64BIT)
25984 operand = gen_lowpart (DImode, operand);
25988 gen_rtx_SET (VOIDmode,
25989 gen_rtx_MEM (DImode,
25990 gen_rtx_PRE_DEC (DImode,
25991 stack_pointer_rtx)),
25995 gcc_unreachable ();
25997 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26006 split_di (&operand, 1, operands, operands + 1);
26008 gen_rtx_SET (VOIDmode,
26009 gen_rtx_MEM (SImode,
26010 gen_rtx_PRE_DEC (Pmode,
26011 stack_pointer_rtx)),
26014 gen_rtx_SET (VOIDmode,
26015 gen_rtx_MEM (SImode,
26016 gen_rtx_PRE_DEC (Pmode,
26017 stack_pointer_rtx)),
26022 /* Store HImodes as SImodes. */
26023 operand = gen_lowpart (SImode, operand);
26027 gen_rtx_SET (VOIDmode,
26028 gen_rtx_MEM (GET_MODE (operand),
26029 gen_rtx_PRE_DEC (SImode,
26030 stack_pointer_rtx)),
26034 gcc_unreachable ();
26036 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26041 /* Free operand from the memory. */
26043 ix86_free_from_memory (enum machine_mode mode)
26045 if (!ix86_using_red_zone ())
26049 if (mode == DImode || TARGET_64BIT)
26053 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26054 to pop or add instruction if registers are available. */
26055 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26056 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26061 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26062 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26064 static const reg_class_t *
26065 i386_ira_cover_classes (void)
26067 static const reg_class_t sse_fpmath_classes[] = {
26068 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26070 static const reg_class_t no_sse_fpmath_classes[] = {
26071 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26074 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26077 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26078 QImode must go into class Q_REGS.
26079 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26080 movdf to do mem-to-mem moves through integer regs. */
26082 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26084 enum machine_mode mode = GET_MODE (x);
26086 /* We're only allowed to return a subclass of CLASS. Many of the
26087 following checks fail for NO_REGS, so eliminate that early. */
26088 if (regclass == NO_REGS)
26091 /* All classes can load zeros. */
26092 if (x == CONST0_RTX (mode))
26095 /* Force constants into memory if we are loading a (nonzero) constant into
26096 an MMX or SSE register. This is because there are no MMX/SSE instructions
26097 to load from a constant. */
26099 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26102 /* Prefer SSE regs only, if we can use them for math. */
26103 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26104 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26106 /* Floating-point constants need more complex checks. */
26107 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26109 /* General regs can load everything. */
26110 if (reg_class_subset_p (regclass, GENERAL_REGS))
26113 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26114 zero above. We only want to wind up preferring 80387 registers if
26115 we plan on doing computation with them. */
26117 && standard_80387_constant_p (x))
26119 /* Limit class to non-sse. */
26120 if (regclass == FLOAT_SSE_REGS)
26122 if (regclass == FP_TOP_SSE_REGS)
26124 if (regclass == FP_SECOND_SSE_REGS)
26125 return FP_SECOND_REG;
26126 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26133 /* Generally when we see PLUS here, it's the function invariant
26134 (plus soft-fp const_int). Which can only be computed into general
26136 if (GET_CODE (x) == PLUS)
26137 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26139 /* QImode constants are easy to load, but non-constant QImode data
26140 must go into Q_REGS. */
26141 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26143 if (reg_class_subset_p (regclass, Q_REGS))
26145 if (reg_class_subset_p (Q_REGS, regclass))
26153 /* Discourage putting floating-point values in SSE registers unless
26154 SSE math is being used, and likewise for the 387 registers. */
26156 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26158 enum machine_mode mode = GET_MODE (x);
26160 /* Restrict the output reload class to the register bank that we are doing
26161 math on. If we would like not to return a subset of CLASS, reject this
26162 alternative: if reload cannot do this, it will still use its choice. */
26163 mode = GET_MODE (x);
26164 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26165 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26167 if (X87_FLOAT_MODE_P (mode))
26169 if (regclass == FP_TOP_SSE_REGS)
26171 else if (regclass == FP_SECOND_SSE_REGS)
26172 return FP_SECOND_REG;
26174 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26181 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26182 enum machine_mode mode,
26183 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26185 /* QImode spills from non-QI registers require
26186 intermediate register on 32bit targets. */
26187 if (!in_p && mode == QImode && !TARGET_64BIT
26188 && (rclass == GENERAL_REGS
26189 || rclass == LEGACY_REGS
26190 || rclass == INDEX_REGS))
26199 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26200 regno = true_regnum (x);
26202 /* Return Q_REGS if the operand is in memory. */
26210 /* If we are copying between general and FP registers, we need a memory
26211 location. The same is true for SSE and MMX registers.
26213 To optimize register_move_cost performance, allow inline variant.
26215 The macro can't work reliably when one of the CLASSES is class containing
26216 registers from multiple units (SSE, MMX, integer). We avoid this by never
26217 combining those units in single alternative in the machine description.
26218 Ensure that this constraint holds to avoid unexpected surprises.
26220 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26221 enforce these sanity checks. */
26224 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26225 enum machine_mode mode, int strict)
26227 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26228 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26229 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26230 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26231 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26232 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26234 gcc_assert (!strict);
26238 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26241 /* ??? This is a lie. We do have moves between mmx/general, and for
26242 mmx/sse2. But by saying we need secondary memory we discourage the
26243 register allocator from using the mmx registers unless needed. */
26244 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26247 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26249 /* SSE1 doesn't have any direct moves from other classes. */
26253 /* If the target says that inter-unit moves are more expensive
26254 than moving through memory, then don't generate them. */
26255 if (!TARGET_INTER_UNIT_MOVES)
26258 /* Between SSE and general, we have moves no larger than word size. */
26259 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26267 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26268 enum machine_mode mode, int strict)
26270 return inline_secondary_memory_needed (class1, class2, mode, strict);
26273 /* Return true if the registers in CLASS cannot represent the change from
26274 modes FROM to TO. */
26277 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26278 enum reg_class regclass)
26283 /* x87 registers can't do subreg at all, as all values are reformatted
26284 to extended precision. */
26285 if (MAYBE_FLOAT_CLASS_P (regclass))
26288 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26290 /* Vector registers do not support QI or HImode loads. If we don't
26291 disallow a change to these modes, reload will assume it's ok to
26292 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26293 the vec_dupv4hi pattern. */
26294 if (GET_MODE_SIZE (from) < 4)
26297 /* Vector registers do not support subreg with nonzero offsets, which
26298 are otherwise valid for integer registers. Since we can't see
26299 whether we have a nonzero offset from here, prohibit all
26300 nonparadoxical subregs changing size. */
26301 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26308 /* Return the cost of moving data of mode M between a
26309 register and memory. A value of 2 is the default; this cost is
26310 relative to those in `REGISTER_MOVE_COST'.
26312 This function is used extensively by register_move_cost that is used to
26313 build tables at startup. Make it inline in this case.
26314 When IN is 2, return maximum of in and out move cost.
26316 If moving between registers and memory is more expensive than
26317 between two registers, you should define this macro to express the
26320 Model also increased moving costs of QImode registers in non
26324 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26328 if (FLOAT_CLASS_P (regclass))
26346 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26347 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26349 if (SSE_CLASS_P (regclass))
26352 switch (GET_MODE_SIZE (mode))
26367 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26368 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26370 if (MMX_CLASS_P (regclass))
26373 switch (GET_MODE_SIZE (mode))
26385 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26386 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26388 switch (GET_MODE_SIZE (mode))
26391 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26394 return ix86_cost->int_store[0];
26395 if (TARGET_PARTIAL_REG_DEPENDENCY
26396 && optimize_function_for_speed_p (cfun))
26397 cost = ix86_cost->movzbl_load;
26399 cost = ix86_cost->int_load[0];
26401 return MAX (cost, ix86_cost->int_store[0]);
26407 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26409 return ix86_cost->movzbl_load;
26411 return ix86_cost->int_store[0] + 4;
26416 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26417 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26419 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26420 if (mode == TFmode)
26423 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26425 cost = ix86_cost->int_load[2];
26427 cost = ix86_cost->int_store[2];
26428 return (cost * (((int) GET_MODE_SIZE (mode)
26429 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26434 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26437 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26441 /* Return the cost of moving data from a register in class CLASS1 to
26442 one in class CLASS2.
26444 It is not required that the cost always equal 2 when FROM is the same as TO;
26445 on some machines it is expensive to move between registers if they are not
26446 general registers. */
26449 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
26450 reg_class_t class2_i)
26452 enum reg_class class1 = (enum reg_class) class1_i;
26453 enum reg_class class2 = (enum reg_class) class2_i;
26455 /* In case we require secondary memory, compute cost of the store followed
26456 by load. In order to avoid bad register allocation choices, we need
26457 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26459 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26463 cost += inline_memory_move_cost (mode, class1, 2);
26464 cost += inline_memory_move_cost (mode, class2, 2);
26466 /* In case of copying from general_purpose_register we may emit multiple
26467 stores followed by single load causing memory size mismatch stall.
26468 Count this as arbitrarily high cost of 20. */
26469 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26472 /* In the case of FP/MMX moves, the registers actually overlap, and we
26473 have to switch modes in order to treat them differently. */
26474 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26475 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26481 /* Moves between SSE/MMX and integer unit are expensive. */
26482 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26483 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26485 /* ??? By keeping returned value relatively high, we limit the number
26486 of moves between integer and MMX/SSE registers for all targets.
26487 Additionally, high value prevents problem with x86_modes_tieable_p(),
26488 where integer modes in MMX/SSE registers are not tieable
26489 because of missing QImode and HImode moves to, from or between
26490 MMX/SSE registers. */
26491 return MAX (8, ix86_cost->mmxsse_to_integer);
26493 if (MAYBE_FLOAT_CLASS_P (class1))
26494 return ix86_cost->fp_move;
26495 if (MAYBE_SSE_CLASS_P (class1))
26496 return ix86_cost->sse_move;
26497 if (MAYBE_MMX_CLASS_P (class1))
26498 return ix86_cost->mmx_move;
26502 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26505 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26507 /* Flags and only flags can only hold CCmode values. */
26508 if (CC_REGNO_P (regno))
26509 return GET_MODE_CLASS (mode) == MODE_CC;
26510 if (GET_MODE_CLASS (mode) == MODE_CC
26511 || GET_MODE_CLASS (mode) == MODE_RANDOM
26512 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26514 if (FP_REGNO_P (regno))
26515 return VALID_FP_MODE_P (mode);
26516 if (SSE_REGNO_P (regno))
26518 /* We implement the move patterns for all vector modes into and
26519 out of SSE registers, even when no operation instructions
26520 are available. OImode move is available only when AVX is
26522 return ((TARGET_AVX && mode == OImode)
26523 || VALID_AVX256_REG_MODE (mode)
26524 || VALID_SSE_REG_MODE (mode)
26525 || VALID_SSE2_REG_MODE (mode)
26526 || VALID_MMX_REG_MODE (mode)
26527 || VALID_MMX_REG_MODE_3DNOW (mode));
26529 if (MMX_REGNO_P (regno))
26531 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26532 so if the register is available at all, then we can move data of
26533 the given mode into or out of it. */
26534 return (VALID_MMX_REG_MODE (mode)
26535 || VALID_MMX_REG_MODE_3DNOW (mode));
26538 if (mode == QImode)
26540 /* Take care for QImode values - they can be in non-QI regs,
26541 but then they do cause partial register stalls. */
26542 if (regno <= BX_REG || TARGET_64BIT)
26544 if (!TARGET_PARTIAL_REG_STALL)
26546 return reload_in_progress || reload_completed;
26548 /* We handle both integer and floats in the general purpose registers. */
26549 else if (VALID_INT_MODE_P (mode))
26551 else if (VALID_FP_MODE_P (mode))
26553 else if (VALID_DFP_MODE_P (mode))
26555 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26556 on to use that value in smaller contexts, this can easily force a
26557 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26558 supporting DImode, allow it. */
26559 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26565 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26566 tieable integer mode. */
26569 ix86_tieable_integer_mode_p (enum machine_mode mode)
26578 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26581 return TARGET_64BIT;
26588 /* Return true if MODE1 is accessible in a register that can hold MODE2
26589 without copying. That is, all register classes that can hold MODE2
26590 can also hold MODE1. */
26593 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26595 if (mode1 == mode2)
26598 if (ix86_tieable_integer_mode_p (mode1)
26599 && ix86_tieable_integer_mode_p (mode2))
26602 /* MODE2 being XFmode implies fp stack or general regs, which means we
26603 can tie any smaller floating point modes to it. Note that we do not
26604 tie this with TFmode. */
26605 if (mode2 == XFmode)
26606 return mode1 == SFmode || mode1 == DFmode;
26608 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26609 that we can tie it with SFmode. */
26610 if (mode2 == DFmode)
26611 return mode1 == SFmode;
26613 /* If MODE2 is only appropriate for an SSE register, then tie with
26614 any other mode acceptable to SSE registers. */
26615 if (GET_MODE_SIZE (mode2) == 16
26616 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26617 return (GET_MODE_SIZE (mode1) == 16
26618 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26620 /* If MODE2 is appropriate for an MMX register, then tie
26621 with any other mode acceptable to MMX registers. */
26622 if (GET_MODE_SIZE (mode2) == 8
26623 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26624 return (GET_MODE_SIZE (mode1) == 8
26625 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26630 /* Compute a (partial) cost for rtx X. Return true if the complete
26631 cost has been computed, and false if subexpressions should be
26632 scanned. In either case, *TOTAL contains the cost result. */
26635 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26637 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26638 enum machine_mode mode = GET_MODE (x);
26639 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26647 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26649 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26651 else if (flag_pic && SYMBOLIC_CONST (x)
26653 || (!GET_CODE (x) != LABEL_REF
26654 && (GET_CODE (x) != SYMBOL_REF
26655 || !SYMBOL_REF_LOCAL_P (x)))))
26662 if (mode == VOIDmode)
26665 switch (standard_80387_constant_p (x))
26670 default: /* Other constants */
26675 /* Start with (MEM (SYMBOL_REF)), since that's where
26676 it'll probably end up. Add a penalty for size. */
26677 *total = (COSTS_N_INSNS (1)
26678 + (flag_pic != 0 && !TARGET_64BIT)
26679 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26685 /* The zero extensions is often completely free on x86_64, so make
26686 it as cheap as possible. */
26687 if (TARGET_64BIT && mode == DImode
26688 && GET_MODE (XEXP (x, 0)) == SImode)
26690 else if (TARGET_ZERO_EXTEND_WITH_AND)
26691 *total = cost->add;
26693 *total = cost->movzx;
26697 *total = cost->movsx;
26701 if (CONST_INT_P (XEXP (x, 1))
26702 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26704 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26707 *total = cost->add;
26710 if ((value == 2 || value == 3)
26711 && cost->lea <= cost->shift_const)
26713 *total = cost->lea;
26723 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26725 if (CONST_INT_P (XEXP (x, 1)))
26727 if (INTVAL (XEXP (x, 1)) > 32)
26728 *total = cost->shift_const + COSTS_N_INSNS (2);
26730 *total = cost->shift_const * 2;
26734 if (GET_CODE (XEXP (x, 1)) == AND)
26735 *total = cost->shift_var * 2;
26737 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26742 if (CONST_INT_P (XEXP (x, 1)))
26743 *total = cost->shift_const;
26745 *total = cost->shift_var;
26750 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26752 /* ??? SSE scalar cost should be used here. */
26753 *total = cost->fmul;
26756 else if (X87_FLOAT_MODE_P (mode))
26758 *total = cost->fmul;
26761 else if (FLOAT_MODE_P (mode))
26763 /* ??? SSE vector cost should be used here. */
26764 *total = cost->fmul;
26769 rtx op0 = XEXP (x, 0);
26770 rtx op1 = XEXP (x, 1);
26772 if (CONST_INT_P (XEXP (x, 1)))
26774 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26775 for (nbits = 0; value != 0; value &= value - 1)
26779 /* This is arbitrary. */
26782 /* Compute costs correctly for widening multiplication. */
26783 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26784 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26785 == GET_MODE_SIZE (mode))
26787 int is_mulwiden = 0;
26788 enum machine_mode inner_mode = GET_MODE (op0);
26790 if (GET_CODE (op0) == GET_CODE (op1))
26791 is_mulwiden = 1, op1 = XEXP (op1, 0);
26792 else if (CONST_INT_P (op1))
26794 if (GET_CODE (op0) == SIGN_EXTEND)
26795 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26798 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26802 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26805 *total = (cost->mult_init[MODE_INDEX (mode)]
26806 + nbits * cost->mult_bit
26807 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26816 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26817 /* ??? SSE cost should be used here. */
26818 *total = cost->fdiv;
26819 else if (X87_FLOAT_MODE_P (mode))
26820 *total = cost->fdiv;
26821 else if (FLOAT_MODE_P (mode))
26822 /* ??? SSE vector cost should be used here. */
26823 *total = cost->fdiv;
26825 *total = cost->divide[MODE_INDEX (mode)];
26829 if (GET_MODE_CLASS (mode) == MODE_INT
26830 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26832 if (GET_CODE (XEXP (x, 0)) == PLUS
26833 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26834 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26835 && CONSTANT_P (XEXP (x, 1)))
26837 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26838 if (val == 2 || val == 4 || val == 8)
26840 *total = cost->lea;
26841 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26842 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26843 outer_code, speed);
26844 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26848 else if (GET_CODE (XEXP (x, 0)) == MULT
26849 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26851 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26852 if (val == 2 || val == 4 || val == 8)
26854 *total = cost->lea;
26855 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26856 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26860 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26862 *total = cost->lea;
26863 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26864 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26865 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26872 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26874 /* ??? SSE cost should be used here. */
26875 *total = cost->fadd;
26878 else if (X87_FLOAT_MODE_P (mode))
26880 *total = cost->fadd;
26883 else if (FLOAT_MODE_P (mode))
26885 /* ??? SSE vector cost should be used here. */
26886 *total = cost->fadd;
26894 if (!TARGET_64BIT && mode == DImode)
26896 *total = (cost->add * 2
26897 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26898 << (GET_MODE (XEXP (x, 0)) != DImode))
26899 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26900 << (GET_MODE (XEXP (x, 1)) != DImode)));
26906 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26908 /* ??? SSE cost should be used here. */
26909 *total = cost->fchs;
26912 else if (X87_FLOAT_MODE_P (mode))
26914 *total = cost->fchs;
26917 else if (FLOAT_MODE_P (mode))
26919 /* ??? SSE vector cost should be used here. */
26920 *total = cost->fchs;
26926 if (!TARGET_64BIT && mode == DImode)
26927 *total = cost->add * 2;
26929 *total = cost->add;
26933 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26934 && XEXP (XEXP (x, 0), 1) == const1_rtx
26935 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26936 && XEXP (x, 1) == const0_rtx)
26938 /* This kind of construct is implemented using test[bwl].
26939 Treat it as if we had an AND. */
26940 *total = (cost->add
26941 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26942 + rtx_cost (const1_rtx, outer_code, speed));
26948 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26953 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26954 /* ??? SSE cost should be used here. */
26955 *total = cost->fabs;
26956 else if (X87_FLOAT_MODE_P (mode))
26957 *total = cost->fabs;
26958 else if (FLOAT_MODE_P (mode))
26959 /* ??? SSE vector cost should be used here. */
26960 *total = cost->fabs;
26964 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26965 /* ??? SSE cost should be used here. */
26966 *total = cost->fsqrt;
26967 else if (X87_FLOAT_MODE_P (mode))
26968 *total = cost->fsqrt;
26969 else if (FLOAT_MODE_P (mode))
26970 /* ??? SSE vector cost should be used here. */
26971 *total = cost->fsqrt;
26975 if (XINT (x, 1) == UNSPEC_TP)
26982 case VEC_DUPLICATE:
26983 /* ??? Assume all of these vector manipulation patterns are
26984 recognizable. In which case they all pretty much have the
26986 *total = COSTS_N_INSNS (1);
26996 static int current_machopic_label_num;
26998 /* Given a symbol name and its associated stub, write out the
26999 definition of the stub. */
27002 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27004 unsigned int length;
27005 char *binder_name, *symbol_name, lazy_ptr_name[32];
27006 int label = ++current_machopic_label_num;
27008 /* For 64-bit we shouldn't get here. */
27009 gcc_assert (!TARGET_64BIT);
27011 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27012 symb = targetm.strip_name_encoding (symb);
27014 length = strlen (stub);
27015 binder_name = XALLOCAVEC (char, length + 32);
27016 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27018 length = strlen (symb);
27019 symbol_name = XALLOCAVEC (char, length + 32);
27020 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27022 sprintf (lazy_ptr_name, "L%d$lz", label);
27025 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27027 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27029 fprintf (file, "%s:\n", stub);
27030 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27034 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27035 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27036 fprintf (file, "\tjmp\t*%%edx\n");
27039 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27041 fprintf (file, "%s:\n", binder_name);
27045 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27046 fputs ("\tpushl\t%eax\n", file);
27049 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27051 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27053 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27054 fprintf (file, "%s:\n", lazy_ptr_name);
27055 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27056 fprintf (file, ASM_LONG "%s\n", binder_name);
27058 #endif /* TARGET_MACHO */
27060 /* Order the registers for register allocator. */
27063 x86_order_regs_for_local_alloc (void)
27068 /* First allocate the local general purpose registers. */
27069 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27070 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27071 reg_alloc_order [pos++] = i;
27073 /* Global general purpose registers. */
27074 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27075 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27076 reg_alloc_order [pos++] = i;
27078 /* x87 registers come first in case we are doing FP math
27080 if (!TARGET_SSE_MATH)
27081 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27082 reg_alloc_order [pos++] = i;
27084 /* SSE registers. */
27085 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27086 reg_alloc_order [pos++] = i;
27087 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27088 reg_alloc_order [pos++] = i;
27090 /* x87 registers. */
27091 if (TARGET_SSE_MATH)
27092 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27093 reg_alloc_order [pos++] = i;
27095 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27096 reg_alloc_order [pos++] = i;
27098 /* Initialize the rest of array as we do not allocate some registers
27100 while (pos < FIRST_PSEUDO_REGISTER)
27101 reg_alloc_order [pos++] = 0;
27104 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27105 struct attribute_spec.handler. */
27107 ix86_handle_abi_attribute (tree *node, tree name,
27108 tree args ATTRIBUTE_UNUSED,
27109 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27111 if (TREE_CODE (*node) != FUNCTION_TYPE
27112 && TREE_CODE (*node) != METHOD_TYPE
27113 && TREE_CODE (*node) != FIELD_DECL
27114 && TREE_CODE (*node) != TYPE_DECL)
27116 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27118 *no_add_attrs = true;
27123 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27125 *no_add_attrs = true;
27129 /* Can combine regparm with all attributes but fastcall. */
27130 if (is_attribute_p ("ms_abi", name))
27132 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27134 error ("ms_abi and sysv_abi attributes are not compatible");
27139 else if (is_attribute_p ("sysv_abi", name))
27141 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27143 error ("ms_abi and sysv_abi attributes are not compatible");
27152 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27153 struct attribute_spec.handler. */
27155 ix86_handle_struct_attribute (tree *node, tree name,
27156 tree args ATTRIBUTE_UNUSED,
27157 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27160 if (DECL_P (*node))
27162 if (TREE_CODE (*node) == TYPE_DECL)
27163 type = &TREE_TYPE (*node);
27168 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27169 || TREE_CODE (*type) == UNION_TYPE)))
27171 warning (OPT_Wattributes, "%qE attribute ignored",
27173 *no_add_attrs = true;
27176 else if ((is_attribute_p ("ms_struct", name)
27177 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27178 || ((is_attribute_p ("gcc_struct", name)
27179 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27181 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27183 *no_add_attrs = true;
27190 ix86_handle_fndecl_attribute (tree *node, tree name,
27191 tree args ATTRIBUTE_UNUSED,
27192 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27194 if (TREE_CODE (*node) != FUNCTION_DECL)
27196 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27198 *no_add_attrs = true;
27204 ix86_ms_bitfield_layout_p (const_tree record_type)
27206 return ((TARGET_MS_BITFIELD_LAYOUT
27207 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27208 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27211 /* Returns an expression indicating where the this parameter is
27212 located on entry to the FUNCTION. */
27215 x86_this_parameter (tree function)
27217 tree type = TREE_TYPE (function);
27218 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27223 const int *parm_regs;
27225 if (ix86_function_type_abi (type) == MS_ABI)
27226 parm_regs = x86_64_ms_abi_int_parameter_registers;
27228 parm_regs = x86_64_int_parameter_registers;
27229 return gen_rtx_REG (DImode, parm_regs[aggr]);
27232 nregs = ix86_function_regparm (type, function);
27234 if (nregs > 0 && !stdarg_p (type))
27238 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27239 regno = aggr ? DX_REG : CX_REG;
27240 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27244 return gen_rtx_MEM (SImode,
27245 plus_constant (stack_pointer_rtx, 4));
27254 return gen_rtx_MEM (SImode,
27255 plus_constant (stack_pointer_rtx, 4));
27258 return gen_rtx_REG (SImode, regno);
27261 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27264 /* Determine whether x86_output_mi_thunk can succeed. */
27267 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27268 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27269 HOST_WIDE_INT vcall_offset, const_tree function)
27271 /* 64-bit can handle anything. */
27275 /* For 32-bit, everything's fine if we have one free register. */
27276 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27279 /* Need a free register for vcall_offset. */
27283 /* Need a free register for GOT references. */
27284 if (flag_pic && !targetm.binds_local_p (function))
27287 /* Otherwise ok. */
27291 /* Output the assembler code for a thunk function. THUNK_DECL is the
27292 declaration for the thunk function itself, FUNCTION is the decl for
27293 the target function. DELTA is an immediate constant offset to be
27294 added to THIS. If VCALL_OFFSET is nonzero, the word at
27295 *(*this + vcall_offset) should be added to THIS. */
27298 x86_output_mi_thunk (FILE *file,
27299 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27300 HOST_WIDE_INT vcall_offset, tree function)
27303 rtx this_param = x86_this_parameter (function);
27306 /* Make sure unwind info is emitted for the thunk if needed. */
27307 final_start_function (emit_barrier (), file, 1);
27309 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27310 pull it in now and let DELTA benefit. */
27311 if (REG_P (this_param))
27312 this_reg = this_param;
27313 else if (vcall_offset)
27315 /* Put the this parameter into %eax. */
27316 xops[0] = this_param;
27317 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27318 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27321 this_reg = NULL_RTX;
27323 /* Adjust the this parameter by a fixed constant. */
27326 xops[0] = GEN_INT (delta);
27327 xops[1] = this_reg ? this_reg : this_param;
27330 if (!x86_64_general_operand (xops[0], DImode))
27332 tmp = gen_rtx_REG (DImode, R10_REG);
27334 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27336 xops[1] = this_param;
27338 if (x86_maybe_negate_const_int (&xops[0], DImode))
27339 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27341 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27343 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27344 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27346 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27349 /* Adjust the this parameter by a value stored in the vtable. */
27353 tmp = gen_rtx_REG (DImode, R10_REG);
27356 int tmp_regno = CX_REG;
27357 if (lookup_attribute ("fastcall",
27358 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27359 || lookup_attribute ("thiscall",
27360 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27361 tmp_regno = AX_REG;
27362 tmp = gen_rtx_REG (SImode, tmp_regno);
27365 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27367 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27369 /* Adjust the this parameter. */
27370 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27371 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27373 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27374 xops[0] = GEN_INT (vcall_offset);
27376 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27377 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27379 xops[1] = this_reg;
27380 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27383 /* If necessary, drop THIS back to its stack slot. */
27384 if (this_reg && this_reg != this_param)
27386 xops[0] = this_reg;
27387 xops[1] = this_param;
27388 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27391 xops[0] = XEXP (DECL_RTL (function), 0);
27394 if (!flag_pic || targetm.binds_local_p (function))
27395 output_asm_insn ("jmp\t%P0", xops);
27396 /* All thunks should be in the same object as their target,
27397 and thus binds_local_p should be true. */
27398 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27399 gcc_unreachable ();
27402 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27403 tmp = gen_rtx_CONST (Pmode, tmp);
27404 tmp = gen_rtx_MEM (QImode, tmp);
27406 output_asm_insn ("jmp\t%A0", xops);
27411 if (!flag_pic || targetm.binds_local_p (function))
27412 output_asm_insn ("jmp\t%P0", xops);
27417 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27418 if (TARGET_MACHO_BRANCH_ISLANDS)
27419 sym_ref = (gen_rtx_SYMBOL_REF
27421 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27422 tmp = gen_rtx_MEM (QImode, sym_ref);
27424 output_asm_insn ("jmp\t%0", xops);
27427 #endif /* TARGET_MACHO */
27429 tmp = gen_rtx_REG (SImode, CX_REG);
27430 output_set_got (tmp, NULL_RTX);
27433 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27434 output_asm_insn ("jmp\t{*}%1", xops);
27437 final_end_function ();
27441 x86_file_start (void)
27443 default_file_start ();
27445 darwin_file_start ();
27447 if (X86_FILE_START_VERSION_DIRECTIVE)
27448 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27449 if (X86_FILE_START_FLTUSED)
27450 fputs ("\t.global\t__fltused\n", asm_out_file);
27451 if (ix86_asm_dialect == ASM_INTEL)
27452 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27456 x86_field_alignment (tree field, int computed)
27458 enum machine_mode mode;
27459 tree type = TREE_TYPE (field);
27461 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27463 mode = TYPE_MODE (strip_array_types (type));
27464 if (mode == DFmode || mode == DCmode
27465 || GET_MODE_CLASS (mode) == MODE_INT
27466 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27467 return MIN (32, computed);
27471 /* Output assembler code to FILE to increment profiler label # LABELNO
27472 for profiling a function entry. */
27474 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27476 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
27481 #ifndef NO_PROFILE_COUNTERS
27482 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
27485 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27486 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
27488 fprintf (file, "\tcall\t%s\n", mcount_name);
27492 #ifndef NO_PROFILE_COUNTERS
27493 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
27496 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
27500 #ifndef NO_PROFILE_COUNTERS
27501 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
27504 fprintf (file, "\tcall\t%s\n", mcount_name);
27508 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27509 /* We don't have exact information about the insn sizes, but we may assume
27510 quite safely that we are informed about all 1 byte insns and memory
27511 address sizes. This is enough to eliminate unnecessary padding in
27515 min_insn_size (rtx insn)
27519 if (!INSN_P (insn) || !active_insn_p (insn))
27522 /* Discard alignments we've emit and jump instructions. */
27523 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27524 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27526 if (JUMP_TABLE_DATA_P (insn))
27529 /* Important case - calls are always 5 bytes.
27530 It is common to have many calls in the row. */
27532 && symbolic_reference_mentioned_p (PATTERN (insn))
27533 && !SIBLING_CALL_P (insn))
27535 len = get_attr_length (insn);
27539 /* For normal instructions we rely on get_attr_length being exact,
27540 with a few exceptions. */
27541 if (!JUMP_P (insn))
27543 enum attr_type type = get_attr_type (insn);
27548 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27549 || asm_noperands (PATTERN (insn)) >= 0)
27556 /* Otherwise trust get_attr_length. */
27560 l = get_attr_length_address (insn);
27561 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27570 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27574 ix86_avoid_jump_mispredicts (void)
27576 rtx insn, start = get_insns ();
27577 int nbytes = 0, njumps = 0;
27580 /* Look for all minimal intervals of instructions containing 4 jumps.
27581 The intervals are bounded by START and INSN. NBYTES is the total
27582 size of instructions in the interval including INSN and not including
27583 START. When the NBYTES is smaller than 16 bytes, it is possible
27584 that the end of START and INSN ends up in the same 16byte page.
27586 The smallest offset in the page INSN can start is the case where START
27587 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27588 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27590 for (insn = start; insn; insn = NEXT_INSN (insn))
27594 if (LABEL_P (insn))
27596 int align = label_to_alignment (insn);
27597 int max_skip = label_to_max_skip (insn);
27601 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27602 already in the current 16 byte page, because otherwise
27603 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27604 bytes to reach 16 byte boundary. */
27606 || (align <= 3 && max_skip != (1 << align) - 1))
27609 fprintf (dump_file, "Label %i with max_skip %i\n",
27610 INSN_UID (insn), max_skip);
27613 while (nbytes + max_skip >= 16)
27615 start = NEXT_INSN (start);
27616 if ((JUMP_P (start)
27617 && GET_CODE (PATTERN (start)) != ADDR_VEC
27618 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27620 njumps--, isjump = 1;
27623 nbytes -= min_insn_size (start);
27629 min_size = min_insn_size (insn);
27630 nbytes += min_size;
27632 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27633 INSN_UID (insn), min_size);
27635 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27636 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27644 start = NEXT_INSN (start);
27645 if ((JUMP_P (start)
27646 && GET_CODE (PATTERN (start)) != ADDR_VEC
27647 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27649 njumps--, isjump = 1;
27652 nbytes -= min_insn_size (start);
27654 gcc_assert (njumps >= 0);
27656 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27657 INSN_UID (start), INSN_UID (insn), nbytes);
27659 if (njumps == 3 && isjump && nbytes < 16)
27661 int padsize = 15 - nbytes + min_insn_size (insn);
27664 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27665 INSN_UID (insn), padsize);
27666 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27672 /* AMD Athlon works faster
27673 when RET is not destination of conditional jump or directly preceded
27674 by other jump instruction. We avoid the penalty by inserting NOP just
27675 before the RET instructions in such cases. */
27677 ix86_pad_returns (void)
27682 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27684 basic_block bb = e->src;
27685 rtx ret = BB_END (bb);
27687 bool replace = false;
27689 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27690 || optimize_bb_for_size_p (bb))
27692 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27693 if (active_insn_p (prev) || LABEL_P (prev))
27695 if (prev && LABEL_P (prev))
27700 FOR_EACH_EDGE (e, ei, bb->preds)
27701 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27702 && !(e->flags & EDGE_FALLTHRU))
27707 prev = prev_active_insn (ret);
27709 && ((JUMP_P (prev) && any_condjump_p (prev))
27712 /* Empty functions get branch mispredict even when the jump destination
27713 is not visible to us. */
27714 if (!prev && !optimize_function_for_size_p (cfun))
27719 emit_jump_insn_before (gen_return_internal_long (), ret);
27725 /* Implement machine specific optimizations. We implement padding of returns
27726 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27730 if (optimize && optimize_function_for_speed_p (cfun))
27732 if (TARGET_PAD_RETURNS)
27733 ix86_pad_returns ();
27734 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27735 if (TARGET_FOUR_JUMP_LIMIT)
27736 ix86_avoid_jump_mispredicts ();
27741 /* Return nonzero when QImode register that must be represented via REX prefix
27744 x86_extended_QIreg_mentioned_p (rtx insn)
27747 extract_insn_cached (insn);
27748 for (i = 0; i < recog_data.n_operands; i++)
27749 if (REG_P (recog_data.operand[i])
27750 && REGNO (recog_data.operand[i]) > BX_REG)
27755 /* Return nonzero when P points to register encoded via REX prefix.
27756 Called via for_each_rtx. */
27758 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27760 unsigned int regno;
27763 regno = REGNO (*p);
27764 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27767 /* Return true when INSN mentions register that must be encoded using REX
27770 x86_extended_reg_mentioned_p (rtx insn)
27772 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27773 extended_reg_mentioned_1, NULL);
27776 /* If profitable, negate (without causing overflow) integer constant
27777 of mode MODE at location LOC. Return true in this case. */
27779 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
27783 if (!CONST_INT_P (*loc))
27789 /* DImode x86_64 constants must fit in 32 bits. */
27790 gcc_assert (x86_64_immediate_operand (*loc, mode));
27801 gcc_unreachable ();
27804 /* Avoid overflows. */
27805 if (mode_signbit_p (mode, *loc))
27808 val = INTVAL (*loc);
27810 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
27811 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
27812 if ((val < 0 && val != -128)
27815 *loc = GEN_INT (-val);
27822 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27823 optabs would emit if we didn't have TFmode patterns. */
27826 x86_emit_floatuns (rtx operands[2])
27828 rtx neglab, donelab, i0, i1, f0, in, out;
27829 enum machine_mode mode, inmode;
27831 inmode = GET_MODE (operands[1]);
27832 gcc_assert (inmode == SImode || inmode == DImode);
27835 in = force_reg (inmode, operands[1]);
27836 mode = GET_MODE (out);
27837 neglab = gen_label_rtx ();
27838 donelab = gen_label_rtx ();
27839 f0 = gen_reg_rtx (mode);
27841 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27843 expand_float (out, in, 0);
27845 emit_jump_insn (gen_jump (donelab));
27848 emit_label (neglab);
27850 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27852 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27854 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27856 expand_float (f0, i0, 0);
27858 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27860 emit_label (donelab);
27863 /* AVX does not support 32-byte integer vector operations,
27864 thus the longest vector we are faced with is V16QImode. */
27865 #define MAX_VECT_LEN 16
27867 struct expand_vec_perm_d
27869 rtx target, op0, op1;
27870 unsigned char perm[MAX_VECT_LEN];
27871 enum machine_mode vmode;
27872 unsigned char nelt;
27876 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
27877 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
27879 /* Get a vector mode of the same size as the original but with elements
27880 twice as wide. This is only guaranteed to apply to integral vectors. */
27882 static inline enum machine_mode
27883 get_mode_wider_vector (enum machine_mode o)
27885 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
27886 enum machine_mode n = GET_MODE_WIDER_MODE (o);
27887 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
27888 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
27892 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27893 with all elements equal to VAR. Return true if successful. */
27896 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27897 rtx target, rtx val)
27920 /* First attempt to recognize VAL as-is. */
27921 dup = gen_rtx_VEC_DUPLICATE (mode, val);
27922 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
27923 if (recog_memoized (insn) < 0)
27926 /* If that fails, force VAL into a register. */
27929 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
27930 seq = get_insns ();
27933 emit_insn_before (seq, insn);
27935 ok = recog_memoized (insn) >= 0;
27944 if (TARGET_SSE || TARGET_3DNOW_A)
27948 val = gen_lowpart (SImode, val);
27949 x = gen_rtx_TRUNCATE (HImode, val);
27950 x = gen_rtx_VEC_DUPLICATE (mode, x);
27951 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27964 struct expand_vec_perm_d dperm;
27968 memset (&dperm, 0, sizeof (dperm));
27969 dperm.target = target;
27970 dperm.vmode = mode;
27971 dperm.nelt = GET_MODE_NUNITS (mode);
27972 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
27974 /* Extend to SImode using a paradoxical SUBREG. */
27975 tmp1 = gen_reg_rtx (SImode);
27976 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27978 /* Insert the SImode value as low element of a V4SImode vector. */
27979 tmp2 = gen_lowpart (V4SImode, dperm.op0);
27980 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
27982 ok = (expand_vec_perm_1 (&dperm)
27983 || expand_vec_perm_broadcast_1 (&dperm));
27995 /* Replicate the value once into the next wider mode and recurse. */
27997 enum machine_mode smode, wsmode, wvmode;
28000 smode = GET_MODE_INNER (mode);
28001 wvmode = get_mode_wider_vector (mode);
28002 wsmode = GET_MODE_INNER (wvmode);
28004 val = convert_modes (wsmode, smode, val, true);
28005 x = expand_simple_binop (wsmode, ASHIFT, val,
28006 GEN_INT (GET_MODE_BITSIZE (smode)),
28007 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28008 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28010 x = gen_lowpart (wvmode, target);
28011 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28019 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28020 rtx x = gen_reg_rtx (hvmode);
28022 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28025 x = gen_rtx_VEC_CONCAT (mode, x, x);
28026 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28035 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28036 whose ONE_VAR element is VAR, and other elements are zero. Return true
28040 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28041 rtx target, rtx var, int one_var)
28043 enum machine_mode vsimode;
28046 bool use_vector_set = false;
28051 /* For SSE4.1, we normally use vector set. But if the second
28052 element is zero and inter-unit moves are OK, we use movq
28054 use_vector_set = (TARGET_64BIT
28056 && !(TARGET_INTER_UNIT_MOVES
28062 use_vector_set = TARGET_SSE4_1;
28065 use_vector_set = TARGET_SSE2;
28068 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28075 use_vector_set = TARGET_AVX;
28078 /* Use ix86_expand_vector_set in 64bit mode only. */
28079 use_vector_set = TARGET_AVX && TARGET_64BIT;
28085 if (use_vector_set)
28087 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28088 var = force_reg (GET_MODE_INNER (mode), var);
28089 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28105 var = force_reg (GET_MODE_INNER (mode), var);
28106 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28107 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28112 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28113 new_target = gen_reg_rtx (mode);
28115 new_target = target;
28116 var = force_reg (GET_MODE_INNER (mode), var);
28117 x = gen_rtx_VEC_DUPLICATE (mode, var);
28118 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28119 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28122 /* We need to shuffle the value to the correct position, so
28123 create a new pseudo to store the intermediate result. */
28125 /* With SSE2, we can use the integer shuffle insns. */
28126 if (mode != V4SFmode && TARGET_SSE2)
28128 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28130 GEN_INT (one_var == 1 ? 0 : 1),
28131 GEN_INT (one_var == 2 ? 0 : 1),
28132 GEN_INT (one_var == 3 ? 0 : 1)));
28133 if (target != new_target)
28134 emit_move_insn (target, new_target);
28138 /* Otherwise convert the intermediate result to V4SFmode and
28139 use the SSE1 shuffle instructions. */
28140 if (mode != V4SFmode)
28142 tmp = gen_reg_rtx (V4SFmode);
28143 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28148 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28150 GEN_INT (one_var == 1 ? 0 : 1),
28151 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28152 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28154 if (mode != V4SFmode)
28155 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28156 else if (tmp != target)
28157 emit_move_insn (target, tmp);
28159 else if (target != new_target)
28160 emit_move_insn (target, new_target);
28165 vsimode = V4SImode;
28171 vsimode = V2SImode;
28177 /* Zero extend the variable element to SImode and recurse. */
28178 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28180 x = gen_reg_rtx (vsimode);
28181 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28183 gcc_unreachable ();
28185 emit_move_insn (target, gen_lowpart (mode, x));
28193 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28194 consisting of the values in VALS. It is known that all elements
28195 except ONE_VAR are constants. Return true if successful. */
28198 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28199 rtx target, rtx vals, int one_var)
28201 rtx var = XVECEXP (vals, 0, one_var);
28202 enum machine_mode wmode;
28205 const_vec = copy_rtx (vals);
28206 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28207 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28215 /* For the two element vectors, it's just as easy to use
28216 the general case. */
28220 /* Use ix86_expand_vector_set in 64bit mode only. */
28243 /* There's no way to set one QImode entry easily. Combine
28244 the variable value with its adjacent constant value, and
28245 promote to an HImode set. */
28246 x = XVECEXP (vals, 0, one_var ^ 1);
28249 var = convert_modes (HImode, QImode, var, true);
28250 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28251 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28252 x = GEN_INT (INTVAL (x) & 0xff);
28256 var = convert_modes (HImode, QImode, var, true);
28257 x = gen_int_mode (INTVAL (x) << 8, HImode);
28259 if (x != const0_rtx)
28260 var = expand_simple_binop (HImode, IOR, var, x, var,
28261 1, OPTAB_LIB_WIDEN);
28263 x = gen_reg_rtx (wmode);
28264 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28265 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28267 emit_move_insn (target, gen_lowpart (mode, x));
28274 emit_move_insn (target, const_vec);
28275 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28279 /* A subroutine of ix86_expand_vector_init_general. Use vector
28280 concatenate to handle the most general case: all values variable,
28281 and none identical. */
28284 ix86_expand_vector_init_concat (enum machine_mode mode,
28285 rtx target, rtx *ops, int n)
28287 enum machine_mode cmode, hmode = VOIDmode;
28288 rtx first[8], second[4];
28328 gcc_unreachable ();
28331 if (!register_operand (ops[1], cmode))
28332 ops[1] = force_reg (cmode, ops[1]);
28333 if (!register_operand (ops[0], cmode))
28334 ops[0] = force_reg (cmode, ops[0]);
28335 emit_insn (gen_rtx_SET (VOIDmode, target,
28336 gen_rtx_VEC_CONCAT (mode, ops[0],
28356 gcc_unreachable ();
28372 gcc_unreachable ();
28377 /* FIXME: We process inputs backward to help RA. PR 36222. */
28380 for (; i > 0; i -= 2, j--)
28382 first[j] = gen_reg_rtx (cmode);
28383 v = gen_rtvec (2, ops[i - 1], ops[i]);
28384 ix86_expand_vector_init (false, first[j],
28385 gen_rtx_PARALLEL (cmode, v));
28391 gcc_assert (hmode != VOIDmode);
28392 for (i = j = 0; i < n; i += 2, j++)
28394 second[j] = gen_reg_rtx (hmode);
28395 ix86_expand_vector_init_concat (hmode, second [j],
28399 ix86_expand_vector_init_concat (mode, target, second, n);
28402 ix86_expand_vector_init_concat (mode, target, first, n);
28406 gcc_unreachable ();
28410 /* A subroutine of ix86_expand_vector_init_general. Use vector
28411 interleave to handle the most general case: all values variable,
28412 and none identical. */
28415 ix86_expand_vector_init_interleave (enum machine_mode mode,
28416 rtx target, rtx *ops, int n)
28418 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28421 rtx (*gen_load_even) (rtx, rtx, rtx);
28422 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28423 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28428 gen_load_even = gen_vec_setv8hi;
28429 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28430 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28431 inner_mode = HImode;
28432 first_imode = V4SImode;
28433 second_imode = V2DImode;
28434 third_imode = VOIDmode;
28437 gen_load_even = gen_vec_setv16qi;
28438 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28439 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28440 inner_mode = QImode;
28441 first_imode = V8HImode;
28442 second_imode = V4SImode;
28443 third_imode = V2DImode;
28446 gcc_unreachable ();
28449 for (i = 0; i < n; i++)
28451 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28452 op0 = gen_reg_rtx (SImode);
28453 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28455 /* Insert the SImode value as low element of V4SImode vector. */
28456 op1 = gen_reg_rtx (V4SImode);
28457 op0 = gen_rtx_VEC_MERGE (V4SImode,
28458 gen_rtx_VEC_DUPLICATE (V4SImode,
28460 CONST0_RTX (V4SImode),
28462 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28464 /* Cast the V4SImode vector back to a vector in orignal mode. */
28465 op0 = gen_reg_rtx (mode);
28466 emit_move_insn (op0, gen_lowpart (mode, op1));
28468 /* Load even elements into the second positon. */
28469 emit_insn (gen_load_even (op0,
28470 force_reg (inner_mode,
28474 /* Cast vector to FIRST_IMODE vector. */
28475 ops[i] = gen_reg_rtx (first_imode);
28476 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28479 /* Interleave low FIRST_IMODE vectors. */
28480 for (i = j = 0; i < n; i += 2, j++)
28482 op0 = gen_reg_rtx (first_imode);
28483 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
28485 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28486 ops[j] = gen_reg_rtx (second_imode);
28487 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28490 /* Interleave low SECOND_IMODE vectors. */
28491 switch (second_imode)
28494 for (i = j = 0; i < n / 2; i += 2, j++)
28496 op0 = gen_reg_rtx (second_imode);
28497 emit_insn (gen_interleave_second_low (op0, ops[i],
28500 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28502 ops[j] = gen_reg_rtx (third_imode);
28503 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28505 second_imode = V2DImode;
28506 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28510 op0 = gen_reg_rtx (second_imode);
28511 emit_insn (gen_interleave_second_low (op0, ops[0],
28514 /* Cast the SECOND_IMODE vector back to a vector on original
28516 emit_insn (gen_rtx_SET (VOIDmode, target,
28517 gen_lowpart (mode, op0)));
28521 gcc_unreachable ();
28525 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28526 all values variable, and none identical. */
28529 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28530 rtx target, rtx vals)
28532 rtx ops[32], op0, op1;
28533 enum machine_mode half_mode = VOIDmode;
28540 if (!mmx_ok && !TARGET_SSE)
28552 n = GET_MODE_NUNITS (mode);
28553 for (i = 0; i < n; i++)
28554 ops[i] = XVECEXP (vals, 0, i);
28555 ix86_expand_vector_init_concat (mode, target, ops, n);
28559 half_mode = V16QImode;
28563 half_mode = V8HImode;
28567 n = GET_MODE_NUNITS (mode);
28568 for (i = 0; i < n; i++)
28569 ops[i] = XVECEXP (vals, 0, i);
28570 op0 = gen_reg_rtx (half_mode);
28571 op1 = gen_reg_rtx (half_mode);
28572 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28574 ix86_expand_vector_init_interleave (half_mode, op1,
28575 &ops [n >> 1], n >> 2);
28576 emit_insn (gen_rtx_SET (VOIDmode, target,
28577 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28581 if (!TARGET_SSE4_1)
28589 /* Don't use ix86_expand_vector_init_interleave if we can't
28590 move from GPR to SSE register directly. */
28591 if (!TARGET_INTER_UNIT_MOVES)
28594 n = GET_MODE_NUNITS (mode);
28595 for (i = 0; i < n; i++)
28596 ops[i] = XVECEXP (vals, 0, i);
28597 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28605 gcc_unreachable ();
28609 int i, j, n_elts, n_words, n_elt_per_word;
28610 enum machine_mode inner_mode;
28611 rtx words[4], shift;
28613 inner_mode = GET_MODE_INNER (mode);
28614 n_elts = GET_MODE_NUNITS (mode);
28615 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28616 n_elt_per_word = n_elts / n_words;
28617 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28619 for (i = 0; i < n_words; ++i)
28621 rtx word = NULL_RTX;
28623 for (j = 0; j < n_elt_per_word; ++j)
28625 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28626 elt = convert_modes (word_mode, inner_mode, elt, true);
28632 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28633 word, 1, OPTAB_LIB_WIDEN);
28634 word = expand_simple_binop (word_mode, IOR, word, elt,
28635 word, 1, OPTAB_LIB_WIDEN);
28643 emit_move_insn (target, gen_lowpart (mode, words[0]));
28644 else if (n_words == 2)
28646 rtx tmp = gen_reg_rtx (mode);
28647 emit_clobber (tmp);
28648 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28649 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28650 emit_move_insn (target, tmp);
28652 else if (n_words == 4)
28654 rtx tmp = gen_reg_rtx (V4SImode);
28655 gcc_assert (word_mode == SImode);
28656 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28657 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28658 emit_move_insn (target, gen_lowpart (mode, tmp));
28661 gcc_unreachable ();
28665 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28666 instructions unless MMX_OK is true. */
28669 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28671 enum machine_mode mode = GET_MODE (target);
28672 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28673 int n_elts = GET_MODE_NUNITS (mode);
28674 int n_var = 0, one_var = -1;
28675 bool all_same = true, all_const_zero = true;
28679 for (i = 0; i < n_elts; ++i)
28681 x = XVECEXP (vals, 0, i);
28682 if (!(CONST_INT_P (x)
28683 || GET_CODE (x) == CONST_DOUBLE
28684 || GET_CODE (x) == CONST_FIXED))
28685 n_var++, one_var = i;
28686 else if (x != CONST0_RTX (inner_mode))
28687 all_const_zero = false;
28688 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28692 /* Constants are best loaded from the constant pool. */
28695 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28699 /* If all values are identical, broadcast the value. */
28701 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28702 XVECEXP (vals, 0, 0)))
28705 /* Values where only one field is non-constant are best loaded from
28706 the pool and overwritten via move later. */
28710 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28711 XVECEXP (vals, 0, one_var),
28715 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28719 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28723 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28725 enum machine_mode mode = GET_MODE (target);
28726 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28727 enum machine_mode half_mode;
28728 bool use_vec_merge = false;
28730 static rtx (*gen_extract[6][2]) (rtx, rtx)
28732 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28733 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28734 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28735 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28736 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28737 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28739 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28741 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28742 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28743 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28744 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28745 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28746 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28756 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28757 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28759 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28761 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28762 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28768 use_vec_merge = TARGET_SSE4_1;
28776 /* For the two element vectors, we implement a VEC_CONCAT with
28777 the extraction of the other element. */
28779 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28780 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28783 op0 = val, op1 = tmp;
28785 op0 = tmp, op1 = val;
28787 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28788 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28793 use_vec_merge = TARGET_SSE4_1;
28800 use_vec_merge = true;
28804 /* tmp = target = A B C D */
28805 tmp = copy_to_reg (target);
28806 /* target = A A B B */
28807 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
28808 /* target = X A B B */
28809 ix86_expand_vector_set (false, target, val, 0);
28810 /* target = A X C D */
28811 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28812 const1_rtx, const0_rtx,
28813 GEN_INT (2+4), GEN_INT (3+4)));
28817 /* tmp = target = A B C D */
28818 tmp = copy_to_reg (target);
28819 /* tmp = X B C D */
28820 ix86_expand_vector_set (false, tmp, val, 0);
28821 /* target = A B X D */
28822 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28823 const0_rtx, const1_rtx,
28824 GEN_INT (0+4), GEN_INT (3+4)));
28828 /* tmp = target = A B C D */
28829 tmp = copy_to_reg (target);
28830 /* tmp = X B C D */
28831 ix86_expand_vector_set (false, tmp, val, 0);
28832 /* target = A B X D */
28833 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28834 const0_rtx, const1_rtx,
28835 GEN_INT (2+4), GEN_INT (0+4)));
28839 gcc_unreachable ();
28844 use_vec_merge = TARGET_SSE4_1;
28848 /* Element 0 handled by vec_merge below. */
28851 use_vec_merge = true;
28857 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28858 store into element 0, then shuffle them back. */
28862 order[0] = GEN_INT (elt);
28863 order[1] = const1_rtx;
28864 order[2] = const2_rtx;
28865 order[3] = GEN_INT (3);
28866 order[elt] = const0_rtx;
28868 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28869 order[1], order[2], order[3]));
28871 ix86_expand_vector_set (false, target, val, 0);
28873 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28874 order[1], order[2], order[3]));
28878 /* For SSE1, we have to reuse the V4SF code. */
28879 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28880 gen_lowpart (SFmode, val), elt);
28885 use_vec_merge = TARGET_SSE2;
28888 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28892 use_vec_merge = TARGET_SSE4_1;
28899 half_mode = V16QImode;
28905 half_mode = V8HImode;
28911 half_mode = V4SImode;
28917 half_mode = V2DImode;
28923 half_mode = V4SFmode;
28929 half_mode = V2DFmode;
28935 /* Compute offset. */
28939 gcc_assert (i <= 1);
28941 /* Extract the half. */
28942 tmp = gen_reg_rtx (half_mode);
28943 emit_insn (gen_extract[j][i] (tmp, target));
28945 /* Put val in tmp at elt. */
28946 ix86_expand_vector_set (false, tmp, val, elt);
28949 emit_insn (gen_insert[j][i] (target, target, tmp));
28958 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28959 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28960 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28964 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28966 emit_move_insn (mem, target);
28968 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28969 emit_move_insn (tmp, val);
28971 emit_move_insn (target, mem);
28976 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28978 enum machine_mode mode = GET_MODE (vec);
28979 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28980 bool use_vec_extr = false;
28993 use_vec_extr = true;
28997 use_vec_extr = TARGET_SSE4_1;
29009 tmp = gen_reg_rtx (mode);
29010 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29011 GEN_INT (elt), GEN_INT (elt),
29012 GEN_INT (elt+4), GEN_INT (elt+4)));
29016 tmp = gen_reg_rtx (mode);
29017 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29021 gcc_unreachable ();
29024 use_vec_extr = true;
29029 use_vec_extr = TARGET_SSE4_1;
29043 tmp = gen_reg_rtx (mode);
29044 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29045 GEN_INT (elt), GEN_INT (elt),
29046 GEN_INT (elt), GEN_INT (elt)));
29050 tmp = gen_reg_rtx (mode);
29051 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29055 gcc_unreachable ();
29058 use_vec_extr = true;
29063 /* For SSE1, we have to reuse the V4SF code. */
29064 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29065 gen_lowpart (V4SFmode, vec), elt);
29071 use_vec_extr = TARGET_SSE2;
29074 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29078 use_vec_extr = TARGET_SSE4_1;
29082 /* ??? Could extract the appropriate HImode element and shift. */
29089 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29090 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29092 /* Let the rtl optimizers know about the zero extension performed. */
29093 if (inner_mode == QImode || inner_mode == HImode)
29095 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29096 target = gen_lowpart (SImode, target);
29099 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29103 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29105 emit_move_insn (mem, vec);
29107 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29108 emit_move_insn (target, tmp);
29112 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29113 pattern to reduce; DEST is the destination; IN is the input vector. */
29116 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29118 rtx tmp1, tmp2, tmp3;
29120 tmp1 = gen_reg_rtx (V4SFmode);
29121 tmp2 = gen_reg_rtx (V4SFmode);
29122 tmp3 = gen_reg_rtx (V4SFmode);
29124 emit_insn (gen_sse_movhlps (tmp1, in, in));
29125 emit_insn (fn (tmp2, tmp1, in));
29127 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29128 const1_rtx, const1_rtx,
29129 GEN_INT (1+4), GEN_INT (1+4)));
29130 emit_insn (fn (dest, tmp2, tmp3));
29133 /* Target hook for scalar_mode_supported_p. */
29135 ix86_scalar_mode_supported_p (enum machine_mode mode)
29137 if (DECIMAL_FLOAT_MODE_P (mode))
29138 return default_decimal_float_supported_p ();
29139 else if (mode == TFmode)
29142 return default_scalar_mode_supported_p (mode);
29145 /* Implements target hook vector_mode_supported_p. */
29147 ix86_vector_mode_supported_p (enum machine_mode mode)
29149 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29151 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29153 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29155 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29157 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29162 /* Target hook for c_mode_for_suffix. */
29163 static enum machine_mode
29164 ix86_c_mode_for_suffix (char suffix)
29174 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29176 We do this in the new i386 backend to maintain source compatibility
29177 with the old cc0-based compiler. */
29180 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29181 tree inputs ATTRIBUTE_UNUSED,
29184 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29186 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29191 /* Implements target vector targetm.asm.encode_section_info. This
29192 is not used by netware. */
29194 static void ATTRIBUTE_UNUSED
29195 ix86_encode_section_info (tree decl, rtx rtl, int first)
29197 default_encode_section_info (decl, rtl, first);
29199 if (TREE_CODE (decl) == VAR_DECL
29200 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29201 && ix86_in_large_data_p (decl))
29202 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29205 /* Worker function for REVERSE_CONDITION. */
29208 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29210 return (mode != CCFPmode && mode != CCFPUmode
29211 ? reverse_condition (code)
29212 : reverse_condition_maybe_unordered (code));
29215 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29219 output_387_reg_move (rtx insn, rtx *operands)
29221 if (REG_P (operands[0]))
29223 if (REG_P (operands[1])
29224 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29226 if (REGNO (operands[0]) == FIRST_STACK_REG)
29227 return output_387_ffreep (operands, 0);
29228 return "fstp\t%y0";
29230 if (STACK_TOP_P (operands[0]))
29231 return "fld%Z1\t%y1";
29234 else if (MEM_P (operands[0]))
29236 gcc_assert (REG_P (operands[1]));
29237 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29238 return "fstp%Z0\t%y0";
29241 /* There is no non-popping store to memory for XFmode.
29242 So if we need one, follow the store with a load. */
29243 if (GET_MODE (operands[0]) == XFmode)
29244 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29246 return "fst%Z0\t%y0";
29253 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29254 FP status register is set. */
29257 ix86_emit_fp_unordered_jump (rtx label)
29259 rtx reg = gen_reg_rtx (HImode);
29262 emit_insn (gen_x86_fnstsw_1 (reg));
29264 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29266 emit_insn (gen_x86_sahf_1 (reg));
29268 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29269 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29273 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29275 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29276 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29279 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29280 gen_rtx_LABEL_REF (VOIDmode, label),
29282 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29284 emit_jump_insn (temp);
29285 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29288 /* Output code to perform a log1p XFmode calculation. */
29290 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29292 rtx label1 = gen_label_rtx ();
29293 rtx label2 = gen_label_rtx ();
29295 rtx tmp = gen_reg_rtx (XFmode);
29296 rtx tmp2 = gen_reg_rtx (XFmode);
29299 emit_insn (gen_absxf2 (tmp, op1));
29300 test = gen_rtx_GE (VOIDmode, tmp,
29301 CONST_DOUBLE_FROM_REAL_VALUE (
29302 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29304 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29306 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29307 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29308 emit_jump (label2);
29310 emit_label (label1);
29311 emit_move_insn (tmp, CONST1_RTX (XFmode));
29312 emit_insn (gen_addxf3 (tmp, op1, tmp));
29313 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29314 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29316 emit_label (label2);
29319 /* Output code to perform a Newton-Rhapson approximation of a single precision
29320 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29322 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29324 rtx x0, x1, e0, e1, two;
29326 x0 = gen_reg_rtx (mode);
29327 e0 = gen_reg_rtx (mode);
29328 e1 = gen_reg_rtx (mode);
29329 x1 = gen_reg_rtx (mode);
29331 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29333 if (VECTOR_MODE_P (mode))
29334 two = ix86_build_const_vector (SFmode, true, two);
29336 two = force_reg (mode, two);
29338 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29340 /* x0 = rcp(b) estimate */
29341 emit_insn (gen_rtx_SET (VOIDmode, x0,
29342 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29345 emit_insn (gen_rtx_SET (VOIDmode, e0,
29346 gen_rtx_MULT (mode, x0, a)));
29348 emit_insn (gen_rtx_SET (VOIDmode, e1,
29349 gen_rtx_MULT (mode, x0, b)));
29351 emit_insn (gen_rtx_SET (VOIDmode, x1,
29352 gen_rtx_MINUS (mode, two, e1)));
29353 /* res = e0 * x1 */
29354 emit_insn (gen_rtx_SET (VOIDmode, res,
29355 gen_rtx_MULT (mode, e0, x1)));
29358 /* Output code to perform a Newton-Rhapson approximation of a
29359 single precision floating point [reciprocal] square root. */
29361 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29364 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29367 x0 = gen_reg_rtx (mode);
29368 e0 = gen_reg_rtx (mode);
29369 e1 = gen_reg_rtx (mode);
29370 e2 = gen_reg_rtx (mode);
29371 e3 = gen_reg_rtx (mode);
29373 real_from_integer (&r, VOIDmode, -3, -1, 0);
29374 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29376 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29377 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29379 if (VECTOR_MODE_P (mode))
29381 mthree = ix86_build_const_vector (SFmode, true, mthree);
29382 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29385 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29386 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29388 /* x0 = rsqrt(a) estimate */
29389 emit_insn (gen_rtx_SET (VOIDmode, x0,
29390 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29393 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29398 zero = gen_reg_rtx (mode);
29399 mask = gen_reg_rtx (mode);
29401 zero = force_reg (mode, CONST0_RTX(mode));
29402 emit_insn (gen_rtx_SET (VOIDmode, mask,
29403 gen_rtx_NE (mode, zero, a)));
29405 emit_insn (gen_rtx_SET (VOIDmode, x0,
29406 gen_rtx_AND (mode, x0, mask)));
29410 emit_insn (gen_rtx_SET (VOIDmode, e0,
29411 gen_rtx_MULT (mode, x0, a)));
29413 emit_insn (gen_rtx_SET (VOIDmode, e1,
29414 gen_rtx_MULT (mode, e0, x0)));
29417 mthree = force_reg (mode, mthree);
29418 emit_insn (gen_rtx_SET (VOIDmode, e2,
29419 gen_rtx_PLUS (mode, e1, mthree)));
29421 mhalf = force_reg (mode, mhalf);
29423 /* e3 = -.5 * x0 */
29424 emit_insn (gen_rtx_SET (VOIDmode, e3,
29425 gen_rtx_MULT (mode, x0, mhalf)));
29427 /* e3 = -.5 * e0 */
29428 emit_insn (gen_rtx_SET (VOIDmode, e3,
29429 gen_rtx_MULT (mode, e0, mhalf)));
29430 /* ret = e2 * e3 */
29431 emit_insn (gen_rtx_SET (VOIDmode, res,
29432 gen_rtx_MULT (mode, e2, e3)));
29435 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29437 static void ATTRIBUTE_UNUSED
29438 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29441 /* With Binutils 2.15, the "@unwind" marker must be specified on
29442 every occurrence of the ".eh_frame" section, not just the first
29445 && strcmp (name, ".eh_frame") == 0)
29447 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29448 flags & SECTION_WRITE ? "aw" : "a");
29451 default_elf_asm_named_section (name, flags, decl);
29454 /* Return the mangling of TYPE if it is an extended fundamental type. */
29456 static const char *
29457 ix86_mangle_type (const_tree type)
29459 type = TYPE_MAIN_VARIANT (type);
29461 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29462 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29465 switch (TYPE_MODE (type))
29468 /* __float128 is "g". */
29471 /* "long double" or __float80 is "e". */
29478 /* For 32-bit code we can save PIC register setup by using
29479 __stack_chk_fail_local hidden function instead of calling
29480 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29481 register, so it is better to call __stack_chk_fail directly. */
29484 ix86_stack_protect_fail (void)
29486 return TARGET_64BIT
29487 ? default_external_stack_protect_fail ()
29488 : default_hidden_stack_protect_fail ();
29491 /* Select a format to encode pointers in exception handling data. CODE
29492 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29493 true if the symbol may be affected by dynamic relocations.
29495 ??? All x86 object file formats are capable of representing this.
29496 After all, the relocation needed is the same as for the call insn.
29497 Whether or not a particular assembler allows us to enter such, I
29498 guess we'll have to see. */
29500 asm_preferred_eh_data_format (int code, int global)
29504 int type = DW_EH_PE_sdata8;
29506 || ix86_cmodel == CM_SMALL_PIC
29507 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29508 type = DW_EH_PE_sdata4;
29509 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29511 if (ix86_cmodel == CM_SMALL
29512 || (ix86_cmodel == CM_MEDIUM && code))
29513 return DW_EH_PE_udata4;
29514 return DW_EH_PE_absptr;
29517 /* Expand copysign from SIGN to the positive value ABS_VALUE
29518 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29521 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29523 enum machine_mode mode = GET_MODE (sign);
29524 rtx sgn = gen_reg_rtx (mode);
29525 if (mask == NULL_RTX)
29527 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29528 if (!VECTOR_MODE_P (mode))
29530 /* We need to generate a scalar mode mask in this case. */
29531 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29532 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29533 mask = gen_reg_rtx (mode);
29534 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29538 mask = gen_rtx_NOT (mode, mask);
29539 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29540 gen_rtx_AND (mode, mask, sign)));
29541 emit_insn (gen_rtx_SET (VOIDmode, result,
29542 gen_rtx_IOR (mode, abs_value, sgn)));
29545 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29546 mask for masking out the sign-bit is stored in *SMASK, if that is
29549 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29551 enum machine_mode mode = GET_MODE (op0);
29554 xa = gen_reg_rtx (mode);
29555 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29556 if (!VECTOR_MODE_P (mode))
29558 /* We need to generate a scalar mode mask in this case. */
29559 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29560 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29561 mask = gen_reg_rtx (mode);
29562 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29564 emit_insn (gen_rtx_SET (VOIDmode, xa,
29565 gen_rtx_AND (mode, op0, mask)));
29573 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29574 swapping the operands if SWAP_OPERANDS is true. The expanded
29575 code is a forward jump to a newly created label in case the
29576 comparison is true. The generated label rtx is returned. */
29578 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29579 bool swap_operands)
29590 label = gen_label_rtx ();
29591 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29592 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29593 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29594 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29595 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29596 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29597 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29598 JUMP_LABEL (tmp) = label;
29603 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29604 using comparison code CODE. Operands are swapped for the comparison if
29605 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29607 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29608 bool swap_operands)
29610 enum machine_mode mode = GET_MODE (op0);
29611 rtx mask = gen_reg_rtx (mode);
29620 if (mode == DFmode)
29621 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29622 gen_rtx_fmt_ee (code, mode, op0, op1)));
29624 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29625 gen_rtx_fmt_ee (code, mode, op0, op1)));
29630 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29631 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29633 ix86_gen_TWO52 (enum machine_mode mode)
29635 REAL_VALUE_TYPE TWO52r;
29638 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29639 TWO52 = const_double_from_real_value (TWO52r, mode);
29640 TWO52 = force_reg (mode, TWO52);
29645 /* Expand SSE sequence for computing lround from OP1 storing
29648 ix86_expand_lround (rtx op0, rtx op1)
29650 /* C code for the stuff we're doing below:
29651 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29654 enum machine_mode mode = GET_MODE (op1);
29655 const struct real_format *fmt;
29656 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29659 /* load nextafter (0.5, 0.0) */
29660 fmt = REAL_MODE_FORMAT (mode);
29661 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29662 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29664 /* adj = copysign (0.5, op1) */
29665 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29666 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29668 /* adj = op1 + adj */
29669 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29671 /* op0 = (imode)adj */
29672 expand_fix (op0, adj, 0);
29675 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29678 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29680 /* C code for the stuff we're doing below (for do_floor):
29682 xi -= (double)xi > op1 ? 1 : 0;
29685 enum machine_mode fmode = GET_MODE (op1);
29686 enum machine_mode imode = GET_MODE (op0);
29687 rtx ireg, freg, label, tmp;
29689 /* reg = (long)op1 */
29690 ireg = gen_reg_rtx (imode);
29691 expand_fix (ireg, op1, 0);
29693 /* freg = (double)reg */
29694 freg = gen_reg_rtx (fmode);
29695 expand_float (freg, ireg, 0);
29697 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29698 label = ix86_expand_sse_compare_and_jump (UNLE,
29699 freg, op1, !do_floor);
29700 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29701 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29702 emit_move_insn (ireg, tmp);
29704 emit_label (label);
29705 LABEL_NUSES (label) = 1;
29707 emit_move_insn (op0, ireg);
29710 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29711 result in OPERAND0. */
29713 ix86_expand_rint (rtx operand0, rtx operand1)
29715 /* C code for the stuff we're doing below:
29716 xa = fabs (operand1);
29717 if (!isless (xa, 2**52))
29719 xa = xa + 2**52 - 2**52;
29720 return copysign (xa, operand1);
29722 enum machine_mode mode = GET_MODE (operand0);
29723 rtx res, xa, label, TWO52, mask;
29725 res = gen_reg_rtx (mode);
29726 emit_move_insn (res, operand1);
29728 /* xa = abs (operand1) */
29729 xa = ix86_expand_sse_fabs (res, &mask);
29731 /* if (!isless (xa, TWO52)) goto label; */
29732 TWO52 = ix86_gen_TWO52 (mode);
29733 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29735 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29736 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29738 ix86_sse_copysign_to_positive (res, xa, res, mask);
29740 emit_label (label);
29741 LABEL_NUSES (label) = 1;
29743 emit_move_insn (operand0, res);
29746 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29749 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29751 /* C code for the stuff we expand below.
29752 double xa = fabs (x), x2;
29753 if (!isless (xa, TWO52))
29755 xa = xa + TWO52 - TWO52;
29756 x2 = copysign (xa, x);
29765 enum machine_mode mode = GET_MODE (operand0);
29766 rtx xa, TWO52, tmp, label, one, res, mask;
29768 TWO52 = ix86_gen_TWO52 (mode);
29770 /* Temporary for holding the result, initialized to the input
29771 operand to ease control flow. */
29772 res = gen_reg_rtx (mode);
29773 emit_move_insn (res, operand1);
29775 /* xa = abs (operand1) */
29776 xa = ix86_expand_sse_fabs (res, &mask);
29778 /* if (!isless (xa, TWO52)) goto label; */
29779 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29781 /* xa = xa + TWO52 - TWO52; */
29782 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29783 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29785 /* xa = copysign (xa, operand1) */
29786 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29788 /* generate 1.0 or -1.0 */
29789 one = force_reg (mode,
29790 const_double_from_real_value (do_floor
29791 ? dconst1 : dconstm1, mode));
29793 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29794 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29795 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29796 gen_rtx_AND (mode, one, tmp)));
29797 /* We always need to subtract here to preserve signed zero. */
29798 tmp = expand_simple_binop (mode, MINUS,
29799 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29800 emit_move_insn (res, tmp);
29802 emit_label (label);
29803 LABEL_NUSES (label) = 1;
29805 emit_move_insn (operand0, res);
29808 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29811 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29813 /* C code for the stuff we expand below.
29814 double xa = fabs (x), x2;
29815 if (!isless (xa, TWO52))
29817 x2 = (double)(long)x;
29824 if (HONOR_SIGNED_ZEROS (mode))
29825 return copysign (x2, x);
29828 enum machine_mode mode = GET_MODE (operand0);
29829 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29831 TWO52 = ix86_gen_TWO52 (mode);
29833 /* Temporary for holding the result, initialized to the input
29834 operand to ease control flow. */
29835 res = gen_reg_rtx (mode);
29836 emit_move_insn (res, operand1);
29838 /* xa = abs (operand1) */
29839 xa = ix86_expand_sse_fabs (res, &mask);
29841 /* if (!isless (xa, TWO52)) goto label; */
29842 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29844 /* xa = (double)(long)x */
29845 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29846 expand_fix (xi, res, 0);
29847 expand_float (xa, xi, 0);
29850 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29852 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29853 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29854 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29855 gen_rtx_AND (mode, one, tmp)));
29856 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29857 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29858 emit_move_insn (res, tmp);
29860 if (HONOR_SIGNED_ZEROS (mode))
29861 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29863 emit_label (label);
29864 LABEL_NUSES (label) = 1;
29866 emit_move_insn (operand0, res);
29869 /* Expand SSE sequence for computing round from OPERAND1 storing
29870 into OPERAND0. Sequence that works without relying on DImode truncation
29871 via cvttsd2siq that is only available on 64bit targets. */
29873 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29875 /* C code for the stuff we expand below.
29876 double xa = fabs (x), xa2, x2;
29877 if (!isless (xa, TWO52))
29879 Using the absolute value and copying back sign makes
29880 -0.0 -> -0.0 correct.
29881 xa2 = xa + TWO52 - TWO52;
29886 else if (dxa > 0.5)
29888 x2 = copysign (xa2, x);
29891 enum machine_mode mode = GET_MODE (operand0);
29892 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29894 TWO52 = ix86_gen_TWO52 (mode);
29896 /* Temporary for holding the result, initialized to the input
29897 operand to ease control flow. */
29898 res = gen_reg_rtx (mode);
29899 emit_move_insn (res, operand1);
29901 /* xa = abs (operand1) */
29902 xa = ix86_expand_sse_fabs (res, &mask);
29904 /* if (!isless (xa, TWO52)) goto label; */
29905 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29907 /* xa2 = xa + TWO52 - TWO52; */
29908 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29909 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29911 /* dxa = xa2 - xa; */
29912 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29914 /* generate 0.5, 1.0 and -0.5 */
29915 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29916 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29917 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29921 tmp = gen_reg_rtx (mode);
29922 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29923 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29924 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29925 gen_rtx_AND (mode, one, tmp)));
29926 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29927 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29928 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29929 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29930 gen_rtx_AND (mode, one, tmp)));
29931 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29933 /* res = copysign (xa2, operand1) */
29934 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29936 emit_label (label);
29937 LABEL_NUSES (label) = 1;
29939 emit_move_insn (operand0, res);
29942 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29945 ix86_expand_trunc (rtx operand0, rtx operand1)
29947 /* C code for SSE variant we expand below.
29948 double xa = fabs (x), x2;
29949 if (!isless (xa, TWO52))
29951 x2 = (double)(long)x;
29952 if (HONOR_SIGNED_ZEROS (mode))
29953 return copysign (x2, x);
29956 enum machine_mode mode = GET_MODE (operand0);
29957 rtx xa, xi, TWO52, label, res, mask;
29959 TWO52 = ix86_gen_TWO52 (mode);
29961 /* Temporary for holding the result, initialized to the input
29962 operand to ease control flow. */
29963 res = gen_reg_rtx (mode);
29964 emit_move_insn (res, operand1);
29966 /* xa = abs (operand1) */
29967 xa = ix86_expand_sse_fabs (res, &mask);
29969 /* if (!isless (xa, TWO52)) goto label; */
29970 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29972 /* x = (double)(long)x */
29973 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29974 expand_fix (xi, res, 0);
29975 expand_float (res, xi, 0);
29977 if (HONOR_SIGNED_ZEROS (mode))
29978 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29980 emit_label (label);
29981 LABEL_NUSES (label) = 1;
29983 emit_move_insn (operand0, res);
29986 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29989 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29991 enum machine_mode mode = GET_MODE (operand0);
29992 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29994 /* C code for SSE variant we expand below.
29995 double xa = fabs (x), x2;
29996 if (!isless (xa, TWO52))
29998 xa2 = xa + TWO52 - TWO52;
30002 x2 = copysign (xa2, x);
30006 TWO52 = ix86_gen_TWO52 (mode);
30008 /* Temporary for holding the result, initialized to the input
30009 operand to ease control flow. */
30010 res = gen_reg_rtx (mode);
30011 emit_move_insn (res, operand1);
30013 /* xa = abs (operand1) */
30014 xa = ix86_expand_sse_fabs (res, &smask);
30016 /* if (!isless (xa, TWO52)) goto label; */
30017 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30019 /* res = xa + TWO52 - TWO52; */
30020 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30021 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30022 emit_move_insn (res, tmp);
30025 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30027 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30028 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30029 emit_insn (gen_rtx_SET (VOIDmode, mask,
30030 gen_rtx_AND (mode, mask, one)));
30031 tmp = expand_simple_binop (mode, MINUS,
30032 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30033 emit_move_insn (res, tmp);
30035 /* res = copysign (res, operand1) */
30036 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30038 emit_label (label);
30039 LABEL_NUSES (label) = 1;
30041 emit_move_insn (operand0, res);
30044 /* Expand SSE sequence for computing round from OPERAND1 storing
30047 ix86_expand_round (rtx operand0, rtx operand1)
30049 /* C code for the stuff we're doing below:
30050 double xa = fabs (x);
30051 if (!isless (xa, TWO52))
30053 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30054 return copysign (xa, x);
30056 enum machine_mode mode = GET_MODE (operand0);
30057 rtx res, TWO52, xa, label, xi, half, mask;
30058 const struct real_format *fmt;
30059 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30061 /* Temporary for holding the result, initialized to the input
30062 operand to ease control flow. */
30063 res = gen_reg_rtx (mode);
30064 emit_move_insn (res, operand1);
30066 TWO52 = ix86_gen_TWO52 (mode);
30067 xa = ix86_expand_sse_fabs (res, &mask);
30068 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30070 /* load nextafter (0.5, 0.0) */
30071 fmt = REAL_MODE_FORMAT (mode);
30072 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30073 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30075 /* xa = xa + 0.5 */
30076 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30077 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30079 /* xa = (double)(int64_t)xa */
30080 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30081 expand_fix (xi, xa, 0);
30082 expand_float (xa, xi, 0);
30084 /* res = copysign (xa, operand1) */
30085 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30087 emit_label (label);
30088 LABEL_NUSES (label) = 1;
30090 emit_move_insn (operand0, res);
30094 /* Table of valid machine attributes. */
30095 static const struct attribute_spec ix86_attribute_table[] =
30097 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30098 /* Stdcall attribute says callee is responsible for popping arguments
30099 if they are not variable. */
30100 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30101 /* Fastcall attribute says callee is responsible for popping arguments
30102 if they are not variable. */
30103 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30104 /* Thiscall attribute says callee is responsible for popping arguments
30105 if they are not variable. */
30106 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30107 /* Cdecl attribute says the callee is a normal C declaration */
30108 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30109 /* Regparm attribute specifies how many integer arguments are to be
30110 passed in registers. */
30111 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30112 /* Sseregparm attribute says we are using x86_64 calling conventions
30113 for FP arguments. */
30114 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30115 /* force_align_arg_pointer says this function realigns the stack at entry. */
30116 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30117 false, true, true, ix86_handle_cconv_attribute },
30118 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30119 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30120 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30121 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30123 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30124 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30125 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30126 SUBTARGET_ATTRIBUTE_TABLE,
30128 /* ms_abi and sysv_abi calling convention function attributes. */
30129 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30130 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30131 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30133 { NULL, 0, 0, false, false, false, NULL }
30136 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30138 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30139 tree vectype ATTRIBUTE_UNUSED,
30140 int misalign ATTRIBUTE_UNUSED)
30142 switch (type_of_cost)
30145 return ix86_cost->scalar_stmt_cost;
30148 return ix86_cost->scalar_load_cost;
30151 return ix86_cost->scalar_store_cost;
30154 return ix86_cost->vec_stmt_cost;
30157 return ix86_cost->vec_align_load_cost;
30160 return ix86_cost->vec_store_cost;
30162 case vec_to_scalar:
30163 return ix86_cost->vec_to_scalar_cost;
30165 case scalar_to_vec:
30166 return ix86_cost->scalar_to_vec_cost;
30168 case unaligned_load:
30169 case unaligned_store:
30170 return ix86_cost->vec_unalign_load_cost;
30172 case cond_branch_taken:
30173 return ix86_cost->cond_taken_branch_cost;
30175 case cond_branch_not_taken:
30176 return ix86_cost->cond_not_taken_branch_cost;
30182 gcc_unreachable ();
30187 /* Implement targetm.vectorize.builtin_vec_perm. */
30190 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30192 tree itype = TREE_TYPE (vec_type);
30193 bool u = TYPE_UNSIGNED (itype);
30194 enum machine_mode vmode = TYPE_MODE (vec_type);
30195 enum ix86_builtins fcode;
30196 bool ok = TARGET_SSE2;
30202 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30205 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30207 itype = ix86_get_builtin_type (IX86_BT_DI);
30212 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30216 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30218 itype = ix86_get_builtin_type (IX86_BT_SI);
30222 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30225 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30228 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30231 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30241 *mask_type = itype;
30242 return ix86_builtins[(int) fcode];
30245 /* Return a vector mode with twice as many elements as VMODE. */
30246 /* ??? Consider moving this to a table generated by genmodes.c. */
30248 static enum machine_mode
30249 doublesize_vector_mode (enum machine_mode vmode)
30253 case V2SFmode: return V4SFmode;
30254 case V1DImode: return V2DImode;
30255 case V2SImode: return V4SImode;
30256 case V4HImode: return V8HImode;
30257 case V8QImode: return V16QImode;
30259 case V2DFmode: return V4DFmode;
30260 case V4SFmode: return V8SFmode;
30261 case V2DImode: return V4DImode;
30262 case V4SImode: return V8SImode;
30263 case V8HImode: return V16HImode;
30264 case V16QImode: return V32QImode;
30266 case V4DFmode: return V8DFmode;
30267 case V8SFmode: return V16SFmode;
30268 case V4DImode: return V8DImode;
30269 case V8SImode: return V16SImode;
30270 case V16HImode: return V32HImode;
30271 case V32QImode: return V64QImode;
30274 gcc_unreachable ();
30278 /* Construct (set target (vec_select op0 (parallel perm))) and
30279 return true if that's a valid instruction in the active ISA. */
30282 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30284 rtx rperm[MAX_VECT_LEN], x;
30287 for (i = 0; i < nelt; ++i)
30288 rperm[i] = GEN_INT (perm[i]);
30290 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30291 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30292 x = gen_rtx_SET (VOIDmode, target, x);
30295 if (recog_memoized (x) < 0)
30303 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30306 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30307 const unsigned char *perm, unsigned nelt)
30309 enum machine_mode v2mode;
30312 v2mode = doublesize_vector_mode (GET_MODE (op0));
30313 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30314 return expand_vselect (target, x, perm, nelt);
30317 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30318 in terms of blendp[sd] / pblendw / pblendvb. */
30321 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30323 enum machine_mode vmode = d->vmode;
30324 unsigned i, mask, nelt = d->nelt;
30325 rtx target, op0, op1, x;
30327 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30329 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
30332 /* This is a blend, not a permute. Elements must stay in their
30333 respective lanes. */
30334 for (i = 0; i < nelt; ++i)
30336 unsigned e = d->perm[i];
30337 if (!(e == i || e == i + nelt))
30344 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
30345 decision should be extracted elsewhere, so that we only try that
30346 sequence once all budget==3 options have been tried. */
30348 /* For bytes, see if bytes move in pairs so we can use pblendw with
30349 an immediate argument, rather than pblendvb with a vector argument. */
30350 if (vmode == V16QImode)
30352 bool pblendw_ok = true;
30353 for (i = 0; i < 16 && pblendw_ok; i += 2)
30354 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
30358 rtx rperm[16], vperm;
30360 for (i = 0; i < nelt; ++i)
30361 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
30363 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30364 vperm = force_reg (V16QImode, vperm);
30366 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
30371 target = d->target;
30383 for (i = 0; i < nelt; ++i)
30384 mask |= (d->perm[i] >= nelt) << i;
30388 for (i = 0; i < 2; ++i)
30389 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
30393 for (i = 0; i < 4; ++i)
30394 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
30398 for (i = 0; i < 8; ++i)
30399 mask |= (d->perm[i * 2] >= 16) << i;
30403 target = gen_lowpart (vmode, target);
30404 op0 = gen_lowpart (vmode, op0);
30405 op1 = gen_lowpart (vmode, op1);
30409 gcc_unreachable ();
30412 /* This matches five different patterns with the different modes. */
30413 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
30414 x = gen_rtx_SET (VOIDmode, target, x);
30420 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30421 in terms of the variable form of vpermilps.
30423 Note that we will have already failed the immediate input vpermilps,
30424 which requires that the high and low part shuffle be identical; the
30425 variable form doesn't require that. */
30428 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
30430 rtx rperm[8], vperm;
30433 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
30436 /* We can only permute within the 128-bit lane. */
30437 for (i = 0; i < 8; ++i)
30439 unsigned e = d->perm[i];
30440 if (i < 4 ? e >= 4 : e < 4)
30447 for (i = 0; i < 8; ++i)
30449 unsigned e = d->perm[i];
30451 /* Within each 128-bit lane, the elements of op0 are numbered
30452 from 0 and the elements of op1 are numbered from 4. */
30458 rperm[i] = GEN_INT (e);
30461 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
30462 vperm = force_reg (V8SImode, vperm);
30463 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
30468 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30469 in terms of pshufb or vpperm. */
30472 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
30474 unsigned i, nelt, eltsz;
30475 rtx rperm[16], vperm, target, op0, op1;
30477 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
30479 if (GET_MODE_SIZE (d->vmode) != 16)
30486 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30488 for (i = 0; i < nelt; ++i)
30490 unsigned j, e = d->perm[i];
30491 for (j = 0; j < eltsz; ++j)
30492 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
30495 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30496 vperm = force_reg (V16QImode, vperm);
30498 target = gen_lowpart (V16QImode, d->target);
30499 op0 = gen_lowpart (V16QImode, d->op0);
30500 if (d->op0 == d->op1)
30501 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
30504 op1 = gen_lowpart (V16QImode, d->op1);
30505 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
30511 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
30512 in a single instruction. */
30515 expand_vec_perm_1 (struct expand_vec_perm_d *d)
30517 unsigned i, nelt = d->nelt;
30518 unsigned char perm2[MAX_VECT_LEN];
30520 /* Check plain VEC_SELECT first, because AVX has instructions that could
30521 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
30522 input where SEL+CONCAT may not. */
30523 if (d->op0 == d->op1)
30525 int mask = nelt - 1;
30527 for (i = 0; i < nelt; i++)
30528 perm2[i] = d->perm[i] & mask;
30530 if (expand_vselect (d->target, d->op0, perm2, nelt))
30533 /* There are plenty of patterns in sse.md that are written for
30534 SEL+CONCAT and are not replicated for a single op. Perhaps
30535 that should be changed, to avoid the nastiness here. */
30537 /* Recognize interleave style patterns, which means incrementing
30538 every other permutation operand. */
30539 for (i = 0; i < nelt; i += 2)
30541 perm2[i] = d->perm[i] & mask;
30542 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
30544 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30547 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
30550 for (i = 0; i < nelt; i += 4)
30552 perm2[i + 0] = d->perm[i + 0] & mask;
30553 perm2[i + 1] = d->perm[i + 1] & mask;
30554 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
30555 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
30558 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30563 /* Finally, try the fully general two operand permute. */
30564 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
30567 /* Recognize interleave style patterns with reversed operands. */
30568 if (d->op0 != d->op1)
30570 for (i = 0; i < nelt; ++i)
30572 unsigned e = d->perm[i];
30580 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
30584 /* Try the SSE4.1 blend variable merge instructions. */
30585 if (expand_vec_perm_blend (d))
30588 /* Try one of the AVX vpermil variable permutations. */
30589 if (expand_vec_perm_vpermil (d))
30592 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
30593 if (expand_vec_perm_pshufb (d))
30599 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30600 in terms of a pair of pshuflw + pshufhw instructions. */
30603 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
30605 unsigned char perm2[MAX_VECT_LEN];
30609 if (d->vmode != V8HImode || d->op0 != d->op1)
30612 /* The two permutations only operate in 64-bit lanes. */
30613 for (i = 0; i < 4; ++i)
30614 if (d->perm[i] >= 4)
30616 for (i = 4; i < 8; ++i)
30617 if (d->perm[i] < 4)
30623 /* Emit the pshuflw. */
30624 memcpy (perm2, d->perm, 4);
30625 for (i = 4; i < 8; ++i)
30627 ok = expand_vselect (d->target, d->op0, perm2, 8);
30630 /* Emit the pshufhw. */
30631 memcpy (perm2 + 4, d->perm + 4, 4);
30632 for (i = 0; i < 4; ++i)
30634 ok = expand_vselect (d->target, d->target, perm2, 8);
30640 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30641 the permutation using the SSSE3 palignr instruction. This succeeds
30642 when all of the elements in PERM fit within one vector and we merely
30643 need to shift them down so that a single vector permutation has a
30644 chance to succeed. */
30647 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
30649 unsigned i, nelt = d->nelt;
30654 /* Even with AVX, palignr only operates on 128-bit vectors. */
30655 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30658 min = nelt, max = 0;
30659 for (i = 0; i < nelt; ++i)
30661 unsigned e = d->perm[i];
30667 if (min == 0 || max - min >= nelt)
30670 /* Given that we have SSSE3, we know we'll be able to implement the
30671 single operand permutation after the palignr with pshufb. */
30675 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
30676 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
30677 gen_lowpart (TImode, d->op1),
30678 gen_lowpart (TImode, d->op0), shift));
30680 d->op0 = d->op1 = d->target;
30683 for (i = 0; i < nelt; ++i)
30685 unsigned e = d->perm[i] - min;
30691 /* Test for the degenerate case where the alignment by itself
30692 produces the desired permutation. */
30696 ok = expand_vec_perm_1 (d);
30702 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30703 a two vector permutation into a single vector permutation by using
30704 an interleave operation to merge the vectors. */
30707 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
30709 struct expand_vec_perm_d dremap, dfinal;
30710 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
30711 unsigned contents, h1, h2, h3, h4;
30712 unsigned char remap[2 * MAX_VECT_LEN];
30716 if (d->op0 == d->op1)
30719 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
30720 lanes. We can use similar techniques with the vperm2f128 instruction,
30721 but it requires slightly different logic. */
30722 if (GET_MODE_SIZE (d->vmode) != 16)
30725 /* Examine from whence the elements come. */
30727 for (i = 0; i < nelt; ++i)
30728 contents |= 1u << d->perm[i];
30730 /* Split the two input vectors into 4 halves. */
30731 h1 = (1u << nelt2) - 1;
30736 memset (remap, 0xff, sizeof (remap));
30739 /* If the elements from the low halves use interleave low, and similarly
30740 for interleave high. If the elements are from mis-matched halves, we
30741 can use shufps for V4SF/V4SI or do a DImode shuffle. */
30742 if ((contents & (h1 | h3)) == contents)
30744 for (i = 0; i < nelt2; ++i)
30747 remap[i + nelt] = i * 2 + 1;
30748 dremap.perm[i * 2] = i;
30749 dremap.perm[i * 2 + 1] = i + nelt;
30752 else if ((contents & (h2 | h4)) == contents)
30754 for (i = 0; i < nelt2; ++i)
30756 remap[i + nelt2] = i * 2;
30757 remap[i + nelt + nelt2] = i * 2 + 1;
30758 dremap.perm[i * 2] = i + nelt2;
30759 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
30762 else if ((contents & (h1 | h4)) == contents)
30764 for (i = 0; i < nelt2; ++i)
30767 remap[i + nelt + nelt2] = i + nelt2;
30768 dremap.perm[i] = i;
30769 dremap.perm[i + nelt2] = i + nelt + nelt2;
30773 dremap.vmode = V2DImode;
30775 dremap.perm[0] = 0;
30776 dremap.perm[1] = 3;
30779 else if ((contents & (h2 | h3)) == contents)
30781 for (i = 0; i < nelt2; ++i)
30783 remap[i + nelt2] = i;
30784 remap[i + nelt] = i + nelt2;
30785 dremap.perm[i] = i + nelt2;
30786 dremap.perm[i + nelt2] = i + nelt;
30790 dremap.vmode = V2DImode;
30792 dremap.perm[0] = 1;
30793 dremap.perm[1] = 2;
30799 /* Use the remapping array set up above to move the elements from their
30800 swizzled locations into their final destinations. */
30802 for (i = 0; i < nelt; ++i)
30804 unsigned e = remap[d->perm[i]];
30805 gcc_assert (e < nelt);
30806 dfinal.perm[i] = e;
30808 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
30809 dfinal.op1 = dfinal.op0;
30810 dremap.target = dfinal.op0;
30812 /* Test if the final remap can be done with a single insn. For V4SFmode or
30813 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
30815 ok = expand_vec_perm_1 (&dfinal);
30816 seq = get_insns ();
30822 if (dremap.vmode != dfinal.vmode)
30824 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
30825 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
30826 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
30829 ok = expand_vec_perm_1 (&dremap);
30836 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
30837 permutation with two pshufb insns and an ior. We should have already
30838 failed all two instruction sequences. */
30841 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
30843 rtx rperm[2][16], vperm, l, h, op, m128;
30844 unsigned int i, nelt, eltsz;
30846 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30848 gcc_assert (d->op0 != d->op1);
30851 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30853 /* Generate two permutation masks. If the required element is within
30854 the given vector it is shuffled into the proper lane. If the required
30855 element is in the other vector, force a zero into the lane by setting
30856 bit 7 in the permutation mask. */
30857 m128 = GEN_INT (-128);
30858 for (i = 0; i < nelt; ++i)
30860 unsigned j, e = d->perm[i];
30861 unsigned which = (e >= nelt);
30865 for (j = 0; j < eltsz; ++j)
30867 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
30868 rperm[1-which][i*eltsz + j] = m128;
30872 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
30873 vperm = force_reg (V16QImode, vperm);
30875 l = gen_reg_rtx (V16QImode);
30876 op = gen_lowpart (V16QImode, d->op0);
30877 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
30879 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
30880 vperm = force_reg (V16QImode, vperm);
30882 h = gen_reg_rtx (V16QImode);
30883 op = gen_lowpart (V16QImode, d->op1);
30884 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
30886 op = gen_lowpart (V16QImode, d->target);
30887 emit_insn (gen_iorv16qi3 (op, l, h));
30892 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
30893 and extract-odd permutations. */
30896 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
30898 rtx t1, t2, t3, t4;
30903 t1 = gen_reg_rtx (V4DFmode);
30904 t2 = gen_reg_rtx (V4DFmode);
30906 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
30907 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
30908 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
30910 /* Now an unpck[lh]pd will produce the result required. */
30912 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
30914 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
30920 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
30921 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
30922 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
30924 t1 = gen_reg_rtx (V8SFmode);
30925 t2 = gen_reg_rtx (V8SFmode);
30926 t3 = gen_reg_rtx (V8SFmode);
30927 t4 = gen_reg_rtx (V8SFmode);
30929 /* Shuffle within the 128-bit lanes to produce:
30930 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
30931 expand_vselect (t1, d->op0, perm1, 8);
30932 expand_vselect (t2, d->op1, perm1, 8);
30934 /* Shuffle the lanes around to produce:
30935 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
30936 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
30937 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
30939 /* Now a vpermil2p will produce the result required. */
30940 /* ??? The vpermil2p requires a vector constant. Another option
30941 is a unpck[lh]ps to merge the two vectors to produce
30942 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
30943 vpermilps to get the elements into the final order. */
30946 memcpy (d->perm, odd ? permo: perme, 8);
30947 expand_vec_perm_vpermil (d);
30955 /* These are always directly implementable by expand_vec_perm_1. */
30956 gcc_unreachable ();
30960 return expand_vec_perm_pshufb2 (d);
30963 /* We need 2*log2(N)-1 operations to achieve odd/even
30964 with interleave. */
30965 t1 = gen_reg_rtx (V8HImode);
30966 t2 = gen_reg_rtx (V8HImode);
30967 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
30968 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
30969 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
30970 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
30972 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
30974 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
30981 return expand_vec_perm_pshufb2 (d);
30984 t1 = gen_reg_rtx (V16QImode);
30985 t2 = gen_reg_rtx (V16QImode);
30986 t3 = gen_reg_rtx (V16QImode);
30987 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
30988 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
30989 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
30990 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
30991 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
30992 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
30994 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
30996 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
31002 gcc_unreachable ();
31008 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31009 extract-even and extract-odd permutations. */
31012 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
31014 unsigned i, odd, nelt = d->nelt;
31017 if (odd != 0 && odd != 1)
31020 for (i = 1; i < nelt; ++i)
31021 if (d->perm[i] != 2 * i + odd)
31024 return expand_vec_perm_even_odd_1 (d, odd);
31027 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31028 permutations. We assume that expand_vec_perm_1 has already failed. */
31031 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31033 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31034 enum machine_mode vmode = d->vmode;
31035 unsigned char perm2[4];
31043 /* These are special-cased in sse.md so that we can optionally
31044 use the vbroadcast instruction. They expand to two insns
31045 if the input happens to be in a register. */
31046 gcc_unreachable ();
31052 /* These are always implementable using standard shuffle patterns. */
31053 gcc_unreachable ();
31057 /* These can be implemented via interleave. We save one insn by
31058 stopping once we have promoted to V4SImode and then use pshufd. */
31061 optab otab = vec_interleave_low_optab;
31065 otab = vec_interleave_high_optab;
31070 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31071 vmode = get_mode_wider_vector (vmode);
31072 op0 = gen_lowpart (vmode, op0);
31074 while (vmode != V4SImode);
31076 memset (perm2, elt, 4);
31077 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31082 gcc_unreachable ();
31086 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31087 broadcast permutations. */
31090 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31092 unsigned i, elt, nelt = d->nelt;
31094 if (d->op0 != d->op1)
31098 for (i = 1; i < nelt; ++i)
31099 if (d->perm[i] != elt)
31102 return expand_vec_perm_broadcast_1 (d);
31105 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31106 With all of the interface bits taken care of, perform the expansion
31107 in D and return true on success. */
31110 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31112 /* Try a single instruction expansion. */
31113 if (expand_vec_perm_1 (d))
31116 /* Try sequences of two instructions. */
31118 if (expand_vec_perm_pshuflw_pshufhw (d))
31121 if (expand_vec_perm_palignr (d))
31124 if (expand_vec_perm_interleave2 (d))
31127 if (expand_vec_perm_broadcast (d))
31130 /* Try sequences of three instructions. */
31132 if (expand_vec_perm_pshufb2 (d))
31135 /* ??? Look for narrow permutations whose element orderings would
31136 allow the promotion to a wider mode. */
31138 /* ??? Look for sequences of interleave or a wider permute that place
31139 the data into the correct lanes for a half-vector shuffle like
31140 pshuf[lh]w or vpermilps. */
31142 /* ??? Look for sequences of interleave that produce the desired results.
31143 The combinatorics of punpck[lh] get pretty ugly... */
31145 if (expand_vec_perm_even_odd (d))
31151 /* Extract the values from the vector CST into the permutation array in D.
31152 Return 0 on error, 1 if all values from the permutation come from the
31153 first vector, 2 if all values from the second vector, and 3 otherwise. */
31156 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31158 tree list = TREE_VECTOR_CST_ELTS (cst);
31159 unsigned i, nelt = d->nelt;
31162 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31164 unsigned HOST_WIDE_INT e;
31166 if (!host_integerp (TREE_VALUE (list), 1))
31168 e = tree_low_cst (TREE_VALUE (list), 1);
31172 ret |= (e < nelt ? 1 : 2);
31175 gcc_assert (list == NULL);
31177 /* For all elements from second vector, fold the elements to first. */
31179 for (i = 0; i < nelt; ++i)
31180 d->perm[i] -= nelt;
31186 ix86_expand_vec_perm_builtin (tree exp)
31188 struct expand_vec_perm_d d;
31189 tree arg0, arg1, arg2;
31191 arg0 = CALL_EXPR_ARG (exp, 0);
31192 arg1 = CALL_EXPR_ARG (exp, 1);
31193 arg2 = CALL_EXPR_ARG (exp, 2);
31195 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31196 d.nelt = GET_MODE_NUNITS (d.vmode);
31197 d.testing_p = false;
31198 gcc_assert (VECTOR_MODE_P (d.vmode));
31200 if (TREE_CODE (arg2) != VECTOR_CST)
31202 error_at (EXPR_LOCATION (exp),
31203 "vector permutation requires vector constant");
31207 switch (extract_vec_perm_cst (&d, arg2))
31213 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31217 if (!operand_equal_p (arg0, arg1, 0))
31219 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31220 d.op0 = force_reg (d.vmode, d.op0);
31221 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31222 d.op1 = force_reg (d.vmode, d.op1);
31226 /* The elements of PERM do not suggest that only the first operand
31227 is used, but both operands are identical. Allow easier matching
31228 of the permutation by folding the permutation into the single
31231 unsigned i, nelt = d.nelt;
31232 for (i = 0; i < nelt; ++i)
31233 if (d.perm[i] >= nelt)
31239 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31240 d.op0 = force_reg (d.vmode, d.op0);
31245 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31246 d.op0 = force_reg (d.vmode, d.op0);
31251 d.target = gen_reg_rtx (d.vmode);
31252 if (ix86_expand_vec_perm_builtin_1 (&d))
31255 /* For compiler generated permutations, we should never got here, because
31256 the compiler should also be checking the ok hook. But since this is a
31257 builtin the user has access too, so don't abort. */
31261 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31264 sorry ("vector permutation (%d %d %d %d)",
31265 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31268 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31269 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31270 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31273 sorry ("vector permutation "
31274 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31275 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31276 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31277 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31278 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31281 gcc_unreachable ();
31284 return CONST0_RTX (d.vmode);
31287 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31290 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31292 struct expand_vec_perm_d d;
31296 d.vmode = TYPE_MODE (vec_type);
31297 d.nelt = GET_MODE_NUNITS (d.vmode);
31298 d.testing_p = true;
31300 /* Given sufficient ISA support we can just return true here
31301 for selected vector modes. */
31302 if (GET_MODE_SIZE (d.vmode) == 16)
31304 /* All implementable with a single vpperm insn. */
31307 /* All implementable with 2 pshufb + 1 ior. */
31310 /* All implementable with shufpd or unpck[lh]pd. */
31315 vec_mask = extract_vec_perm_cst (&d, mask);
31317 /* This hook is cannot be called in response to something that the
31318 user does (unlike the builtin expander) so we shouldn't ever see
31319 an error generated from the extract. */
31320 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31321 one_vec = (vec_mask != 3);
31323 /* Implementable with shufps or pshufd. */
31324 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31327 /* Otherwise we have to go through the motions and see if we can
31328 figure out how to generate the requested permutation. */
31329 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31330 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31332 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31335 ret = ix86_expand_vec_perm_builtin_1 (&d);
31342 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
31344 struct expand_vec_perm_d d;
31350 d.vmode = GET_MODE (targ);
31351 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31352 d.testing_p = false;
31354 for (i = 0; i < nelt; ++i)
31355 d.perm[i] = i * 2 + odd;
31357 /* We'll either be able to implement the permutation directly... */
31358 if (expand_vec_perm_1 (&d))
31361 /* ... or we use the special-case patterns. */
31362 expand_vec_perm_even_odd_1 (&d, odd);
31365 /* This function returns the calling abi specific va_list type node.
31366 It returns the FNDECL specific va_list type. */
31369 ix86_fn_abi_va_list (tree fndecl)
31372 return va_list_type_node;
31373 gcc_assert (fndecl != NULL_TREE);
31375 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
31376 return ms_va_list_type_node;
31378 return sysv_va_list_type_node;
31381 /* Returns the canonical va_list type specified by TYPE. If there
31382 is no valid TYPE provided, it return NULL_TREE. */
31385 ix86_canonical_va_list_type (tree type)
31389 /* Resolve references and pointers to va_list type. */
31390 if (TREE_CODE (type) == MEM_REF)
31391 type = TREE_TYPE (type);
31392 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
31393 type = TREE_TYPE (type);
31394 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
31395 type = TREE_TYPE (type);
31399 wtype = va_list_type_node;
31400 gcc_assert (wtype != NULL_TREE);
31402 if (TREE_CODE (wtype) == ARRAY_TYPE)
31404 /* If va_list is an array type, the argument may have decayed
31405 to a pointer type, e.g. by being passed to another function.
31406 In that case, unwrap both types so that we can compare the
31407 underlying records. */
31408 if (TREE_CODE (htype) == ARRAY_TYPE
31409 || POINTER_TYPE_P (htype))
31411 wtype = TREE_TYPE (wtype);
31412 htype = TREE_TYPE (htype);
31415 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31416 return va_list_type_node;
31417 wtype = sysv_va_list_type_node;
31418 gcc_assert (wtype != NULL_TREE);
31420 if (TREE_CODE (wtype) == ARRAY_TYPE)
31422 /* If va_list is an array type, the argument may have decayed
31423 to a pointer type, e.g. by being passed to another function.
31424 In that case, unwrap both types so that we can compare the
31425 underlying records. */
31426 if (TREE_CODE (htype) == ARRAY_TYPE
31427 || POINTER_TYPE_P (htype))
31429 wtype = TREE_TYPE (wtype);
31430 htype = TREE_TYPE (htype);
31433 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31434 return sysv_va_list_type_node;
31435 wtype = ms_va_list_type_node;
31436 gcc_assert (wtype != NULL_TREE);
31438 if (TREE_CODE (wtype) == ARRAY_TYPE)
31440 /* If va_list is an array type, the argument may have decayed
31441 to a pointer type, e.g. by being passed to another function.
31442 In that case, unwrap both types so that we can compare the
31443 underlying records. */
31444 if (TREE_CODE (htype) == ARRAY_TYPE
31445 || POINTER_TYPE_P (htype))
31447 wtype = TREE_TYPE (wtype);
31448 htype = TREE_TYPE (htype);
31451 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31452 return ms_va_list_type_node;
31455 return std_canonical_va_list_type (type);
31458 /* Iterate through the target-specific builtin types for va_list.
31459 IDX denotes the iterator, *PTREE is set to the result type of
31460 the va_list builtin, and *PNAME to its internal type.
31461 Returns zero if there is no element for this index, otherwise
31462 IDX should be increased upon the next call.
31463 Note, do not iterate a base builtin's name like __builtin_va_list.
31464 Used from c_common_nodes_and_builtins. */
31467 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
31477 *ptree = ms_va_list_type_node;
31478 *pname = "__builtin_ms_va_list";
31482 *ptree = sysv_va_list_type_node;
31483 *pname = "__builtin_sysv_va_list";
31491 /* Initialize the GCC target structure. */
31492 #undef TARGET_RETURN_IN_MEMORY
31493 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
31495 #undef TARGET_LEGITIMIZE_ADDRESS
31496 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
31498 #undef TARGET_ATTRIBUTE_TABLE
31499 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
31500 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31501 # undef TARGET_MERGE_DECL_ATTRIBUTES
31502 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
31505 #undef TARGET_COMP_TYPE_ATTRIBUTES
31506 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
31508 #undef TARGET_INIT_BUILTINS
31509 #define TARGET_INIT_BUILTINS ix86_init_builtins
31510 #undef TARGET_BUILTIN_DECL
31511 #define TARGET_BUILTIN_DECL ix86_builtin_decl
31512 #undef TARGET_EXPAND_BUILTIN
31513 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
31515 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
31516 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
31517 ix86_builtin_vectorized_function
31519 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
31520 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
31522 #undef TARGET_BUILTIN_RECIPROCAL
31523 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
31525 #undef TARGET_ASM_FUNCTION_EPILOGUE
31526 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
31528 #undef TARGET_ENCODE_SECTION_INFO
31529 #ifndef SUBTARGET_ENCODE_SECTION_INFO
31530 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
31532 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
31535 #undef TARGET_ASM_OPEN_PAREN
31536 #define TARGET_ASM_OPEN_PAREN ""
31537 #undef TARGET_ASM_CLOSE_PAREN
31538 #define TARGET_ASM_CLOSE_PAREN ""
31540 #undef TARGET_ASM_BYTE_OP
31541 #define TARGET_ASM_BYTE_OP ASM_BYTE
31543 #undef TARGET_ASM_ALIGNED_HI_OP
31544 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
31545 #undef TARGET_ASM_ALIGNED_SI_OP
31546 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
31548 #undef TARGET_ASM_ALIGNED_DI_OP
31549 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
31552 #undef TARGET_PROFILE_BEFORE_PROLOGUE
31553 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
31555 #undef TARGET_ASM_UNALIGNED_HI_OP
31556 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
31557 #undef TARGET_ASM_UNALIGNED_SI_OP
31558 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
31559 #undef TARGET_ASM_UNALIGNED_DI_OP
31560 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
31562 #undef TARGET_PRINT_OPERAND
31563 #define TARGET_PRINT_OPERAND ix86_print_operand
31564 #undef TARGET_PRINT_OPERAND_ADDRESS
31565 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
31566 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
31567 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
31568 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
31569 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
31571 #undef TARGET_SCHED_ADJUST_COST
31572 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
31573 #undef TARGET_SCHED_ISSUE_RATE
31574 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
31575 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
31576 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
31577 ia32_multipass_dfa_lookahead
31579 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
31580 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
31583 #undef TARGET_HAVE_TLS
31584 #define TARGET_HAVE_TLS true
31586 #undef TARGET_CANNOT_FORCE_CONST_MEM
31587 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
31588 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
31589 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
31591 #undef TARGET_DELEGITIMIZE_ADDRESS
31592 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
31594 #undef TARGET_MS_BITFIELD_LAYOUT_P
31595 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
31598 #undef TARGET_BINDS_LOCAL_P
31599 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
31601 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31602 #undef TARGET_BINDS_LOCAL_P
31603 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
31606 #undef TARGET_ASM_OUTPUT_MI_THUNK
31607 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
31608 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
31609 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
31611 #undef TARGET_ASM_FILE_START
31612 #define TARGET_ASM_FILE_START x86_file_start
31614 #undef TARGET_DEFAULT_TARGET_FLAGS
31615 #define TARGET_DEFAULT_TARGET_FLAGS \
31617 | TARGET_SUBTARGET_DEFAULT \
31618 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
31621 #undef TARGET_HANDLE_OPTION
31622 #define TARGET_HANDLE_OPTION ix86_handle_option
31624 #undef TARGET_REGISTER_MOVE_COST
31625 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
31626 #undef TARGET_MEMORY_MOVE_COST
31627 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
31628 #undef TARGET_RTX_COSTS
31629 #define TARGET_RTX_COSTS ix86_rtx_costs
31630 #undef TARGET_ADDRESS_COST
31631 #define TARGET_ADDRESS_COST ix86_address_cost
31633 #undef TARGET_FIXED_CONDITION_CODE_REGS
31634 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
31635 #undef TARGET_CC_MODES_COMPATIBLE
31636 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
31638 #undef TARGET_MACHINE_DEPENDENT_REORG
31639 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
31641 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
31642 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
31644 #undef TARGET_BUILD_BUILTIN_VA_LIST
31645 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
31647 #undef TARGET_ENUM_VA_LIST_P
31648 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
31650 #undef TARGET_FN_ABI_VA_LIST
31651 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
31653 #undef TARGET_CANONICAL_VA_LIST_TYPE
31654 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
31656 #undef TARGET_EXPAND_BUILTIN_VA_START
31657 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
31659 #undef TARGET_MD_ASM_CLOBBERS
31660 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
31662 #undef TARGET_PROMOTE_PROTOTYPES
31663 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
31664 #undef TARGET_STRUCT_VALUE_RTX
31665 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
31666 #undef TARGET_SETUP_INCOMING_VARARGS
31667 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
31668 #undef TARGET_MUST_PASS_IN_STACK
31669 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
31670 #undef TARGET_FUNCTION_ARG_ADVANCE
31671 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
31672 #undef TARGET_FUNCTION_ARG
31673 #define TARGET_FUNCTION_ARG ix86_function_arg
31674 #undef TARGET_PASS_BY_REFERENCE
31675 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
31676 #undef TARGET_INTERNAL_ARG_POINTER
31677 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
31678 #undef TARGET_UPDATE_STACK_BOUNDARY
31679 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
31680 #undef TARGET_GET_DRAP_RTX
31681 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
31682 #undef TARGET_STRICT_ARGUMENT_NAMING
31683 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
31684 #undef TARGET_STATIC_CHAIN
31685 #define TARGET_STATIC_CHAIN ix86_static_chain
31686 #undef TARGET_TRAMPOLINE_INIT
31687 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
31688 #undef TARGET_RETURN_POPS_ARGS
31689 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
31691 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
31692 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
31694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
31695 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
31697 #undef TARGET_VECTOR_MODE_SUPPORTED_P
31698 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
31700 #undef TARGET_C_MODE_FOR_SUFFIX
31701 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
31704 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
31705 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
31708 #ifdef SUBTARGET_INSERT_ATTRIBUTES
31709 #undef TARGET_INSERT_ATTRIBUTES
31710 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
31713 #undef TARGET_MANGLE_TYPE
31714 #define TARGET_MANGLE_TYPE ix86_mangle_type
31716 #undef TARGET_STACK_PROTECT_FAIL
31717 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
31719 #undef TARGET_FUNCTION_VALUE
31720 #define TARGET_FUNCTION_VALUE ix86_function_value
31722 #undef TARGET_FUNCTION_VALUE_REGNO_P
31723 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
31725 #undef TARGET_SECONDARY_RELOAD
31726 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
31728 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
31729 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
31730 ix86_builtin_vectorization_cost
31731 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
31732 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
31733 ix86_vectorize_builtin_vec_perm
31734 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
31735 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
31736 ix86_vectorize_builtin_vec_perm_ok
31738 #undef TARGET_SET_CURRENT_FUNCTION
31739 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
31741 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
31742 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
31744 #undef TARGET_OPTION_SAVE
31745 #define TARGET_OPTION_SAVE ix86_function_specific_save
31747 #undef TARGET_OPTION_RESTORE
31748 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
31750 #undef TARGET_OPTION_PRINT
31751 #define TARGET_OPTION_PRINT ix86_function_specific_print
31753 #undef TARGET_CAN_INLINE_P
31754 #define TARGET_CAN_INLINE_P ix86_can_inline_p
31756 #undef TARGET_EXPAND_TO_RTL_HOOK
31757 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
31759 #undef TARGET_LEGITIMATE_ADDRESS_P
31760 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
31762 #undef TARGET_IRA_COVER_CLASSES
31763 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
31765 #undef TARGET_FRAME_POINTER_REQUIRED
31766 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
31768 #undef TARGET_CAN_ELIMINATE
31769 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
31771 #undef TARGET_ASM_CODE_END
31772 #define TARGET_ASM_CODE_END ix86_code_end
31774 struct gcc_target targetm = TARGET_INITIALIZER;
31776 #include "gt-i386.h"