1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1484 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_ADD_ESP_8 */
1488 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1489 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1491 /* X86_TUNE_SUB_ESP_4 */
1492 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1495 /* X86_TUNE_SUB_ESP_8 */
1496 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1497 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_EXT_80387_CONSTANTS */
1580 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1581 | m_CORE2 | m_GENERIC,
1583 /* X86_TUNE_SHORTEN_X87_SSE */
1586 /* X86_TUNE_AVOID_VECTOR_DECODE */
1589 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1590 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1593 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1594 vector path on AMD machines. */
1595 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1597 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1599 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1601 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1605 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1606 but one byte longer. */
1609 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1610 operand that cannot be represented using a modRM byte. The XOR
1611 replacement is long decoded, so this split helps here as well. */
1614 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1616 m_AMDFAM10 | m_GENERIC,
1618 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1619 from integer to FP. */
1622 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1623 with a subsequent conditional jump instruction into a single
1624 compare-and-branch uop. */
1627 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1628 will impact LEA instruction selection. */
1632 /* Feature tests against the various architecture variations. */
1633 unsigned char ix86_arch_features[X86_ARCH_LAST];
1635 /* Feature tests against the various architecture variations, used to create
1636 ix86_arch_features based on the processor mask. */
1637 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1638 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1639 ~(m_386 | m_486 | m_PENT | m_K6),
1641 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1644 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1647 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1650 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1654 static const unsigned int x86_accumulate_outgoing_args
1655 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1658 static const unsigned int x86_arch_always_fancy_math_387
1659 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1660 | m_NOCONA | m_CORE2 | m_GENERIC;
1662 static enum stringop_alg stringop_alg = no_stringop;
1664 /* In case the average insn count for single function invocation is
1665 lower than this constant, emit fast (but longer) prologue and
1667 #define FAST_PROLOGUE_INSN_COUNT 20
1669 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1670 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1671 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1672 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1674 /* Array of the smallest class containing reg number REGNO, indexed by
1675 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1677 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1679 /* ax, dx, cx, bx */
1680 AREG, DREG, CREG, BREG,
1681 /* si, di, bp, sp */
1682 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1684 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1685 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1688 /* flags, fpsr, fpcr, frame */
1689 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1691 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1694 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1697 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1698 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1699 /* SSE REX registers */
1700 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1704 /* The "default" register map used in 32bit mode. */
1706 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1708 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1709 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1710 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1711 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1712 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1713 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1714 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1717 /* The "default" register map used in 64bit mode. */
1719 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1721 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1722 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1723 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1724 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1725 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1726 8,9,10,11,12,13,14,15, /* extended integer registers */
1727 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1730 /* Define the register numbers to be used in Dwarf debugging information.
1731 The SVR4 reference port C compiler uses the following register numbers
1732 in its Dwarf output code:
1733 0 for %eax (gcc regno = 0)
1734 1 for %ecx (gcc regno = 2)
1735 2 for %edx (gcc regno = 1)
1736 3 for %ebx (gcc regno = 3)
1737 4 for %esp (gcc regno = 7)
1738 5 for %ebp (gcc regno = 6)
1739 6 for %esi (gcc regno = 4)
1740 7 for %edi (gcc regno = 5)
1741 The following three DWARF register numbers are never generated by
1742 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1743 believes these numbers have these meanings.
1744 8 for %eip (no gcc equivalent)
1745 9 for %eflags (gcc regno = 17)
1746 10 for %trapno (no gcc equivalent)
1747 It is not at all clear how we should number the FP stack registers
1748 for the x86 architecture. If the version of SDB on x86/svr4 were
1749 a bit less brain dead with respect to floating-point then we would
1750 have a precedent to follow with respect to DWARF register numbers
1751 for x86 FP registers, but the SDB on x86/svr4 is so completely
1752 broken with respect to FP registers that it is hardly worth thinking
1753 of it as something to strive for compatibility with.
1754 The version of x86/svr4 SDB I have at the moment does (partially)
1755 seem to believe that DWARF register number 11 is associated with
1756 the x86 register %st(0), but that's about all. Higher DWARF
1757 register numbers don't seem to be associated with anything in
1758 particular, and even for DWARF regno 11, SDB only seems to under-
1759 stand that it should say that a variable lives in %st(0) (when
1760 asked via an `=' command) if we said it was in DWARF regno 11,
1761 but SDB still prints garbage when asked for the value of the
1762 variable in question (via a `/' command).
1763 (Also note that the labels SDB prints for various FP stack regs
1764 when doing an `x' command are all wrong.)
1765 Note that these problems generally don't affect the native SVR4
1766 C compiler because it doesn't allow the use of -O with -g and
1767 because when it is *not* optimizing, it allocates a memory
1768 location for each floating-point variable, and the memory
1769 location is what gets described in the DWARF AT_location
1770 attribute for the variable in question.
1771 Regardless of the severe mental illness of the x86/svr4 SDB, we
1772 do something sensible here and we use the following DWARF
1773 register numbers. Note that these are all stack-top-relative
1775 11 for %st(0) (gcc regno = 8)
1776 12 for %st(1) (gcc regno = 9)
1777 13 for %st(2) (gcc regno = 10)
1778 14 for %st(3) (gcc regno = 11)
1779 15 for %st(4) (gcc regno = 12)
1780 16 for %st(5) (gcc regno = 13)
1781 17 for %st(6) (gcc regno = 14)
1782 18 for %st(7) (gcc regno = 15)
1784 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1786 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1787 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1788 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1789 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1790 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1791 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1792 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1795 /* Define parameter passing and return registers. */
1797 static int const x86_64_int_parameter_registers[6] =
1799 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1802 static int const x86_64_ms_abi_int_parameter_registers[4] =
1804 CX_REG, DX_REG, R8_REG, R9_REG
1807 static int const x86_64_int_return_registers[4] =
1809 AX_REG, DX_REG, DI_REG, SI_REG
1812 /* Define the structure for the machine field in struct function. */
1814 struct GTY(()) stack_local_entry {
1815 unsigned short mode;
1818 struct stack_local_entry *next;
1821 /* Structure describing stack frame layout.
1822 Stack grows downward:
1828 saved static chain if ix86_static_chain_on_stack
1830 saved frame pointer if frame_pointer_needed
1831 <- HARD_FRAME_POINTER
1837 <- sse_regs_save_offset
1840 [va_arg registers] |
1844 [padding2] | = to_allocate
1853 int outgoing_arguments_size;
1854 HOST_WIDE_INT frame;
1856 /* The offsets relative to ARG_POINTER. */
1857 HOST_WIDE_INT frame_pointer_offset;
1858 HOST_WIDE_INT hard_frame_pointer_offset;
1859 HOST_WIDE_INT stack_pointer_offset;
1860 HOST_WIDE_INT reg_save_offset;
1861 HOST_WIDE_INT sse_reg_save_offset;
1863 /* When save_regs_using_mov is set, emit prologue using
1864 move instead of push instructions. */
1865 bool save_regs_using_mov;
1868 /* Code model option. */
1869 enum cmodel ix86_cmodel;
1871 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1873 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1875 /* Which unit we are generating floating point math for. */
1876 enum fpmath_unit ix86_fpmath;
1878 /* Which cpu are we scheduling for. */
1879 enum attr_cpu ix86_schedule;
1881 /* Which cpu are we optimizing for. */
1882 enum processor_type ix86_tune;
1884 /* Which instruction set architecture to use. */
1885 enum processor_type ix86_arch;
1887 /* true if sse prefetch instruction is not NOOP. */
1888 int x86_prefetch_sse;
1890 /* ix86_regparm_string as a number */
1891 static int ix86_regparm;
1893 /* -mstackrealign option */
1894 extern int ix86_force_align_arg_pointer;
1895 static const char ix86_force_align_arg_pointer_string[]
1896 = "force_align_arg_pointer";
1898 static rtx (*ix86_gen_leave) (void);
1899 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1900 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1901 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1902 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1903 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1904 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1905 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1906 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1909 /* Preferred alignment for stack boundary in bits. */
1910 unsigned int ix86_preferred_stack_boundary;
1912 /* Alignment for incoming stack boundary in bits specified at
1914 static unsigned int ix86_user_incoming_stack_boundary;
1916 /* Default alignment for incoming stack boundary in bits. */
1917 static unsigned int ix86_default_incoming_stack_boundary;
1919 /* Alignment for incoming stack boundary in bits. */
1920 unsigned int ix86_incoming_stack_boundary;
1922 /* The abi used by target. */
1923 enum calling_abi ix86_abi;
1925 /* Values 1-5: see jump.c */
1926 int ix86_branch_cost;
1928 /* Calling abi specific va_list type nodes. */
1929 static GTY(()) tree sysv_va_list_type_node;
1930 static GTY(()) tree ms_va_list_type_node;
1932 /* Variables which are this size or smaller are put in the data/bss
1933 or ldata/lbss sections. */
1935 int ix86_section_threshold = 65536;
1937 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1938 char internal_label_prefix[16];
1939 int internal_label_prefix_len;
1941 /* Fence to use after loop using movnt. */
1944 /* Register class used for passing given 64bit part of the argument.
1945 These represent classes as documented by the PS ABI, with the exception
1946 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1947 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1949 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1950 whenever possible (upper half does contain padding). */
1951 enum x86_64_reg_class
1954 X86_64_INTEGER_CLASS,
1955 X86_64_INTEGERSI_CLASS,
1962 X86_64_COMPLEX_X87_CLASS,
1966 #define MAX_CLASSES 4
1968 /* Table of constants used by fldpi, fldln2, etc.... */
1969 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1970 static bool ext_80387_constants_init = 0;
1973 static struct machine_function * ix86_init_machine_status (void);
1974 static rtx ix86_function_value (const_tree, const_tree, bool);
1975 static bool ix86_function_value_regno_p (const unsigned int);
1976 static rtx ix86_static_chain (const_tree, bool);
1977 static int ix86_function_regparm (const_tree, const_tree);
1978 static void ix86_compute_frame_layout (struct ix86_frame *);
1979 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1981 static void ix86_add_new_builtins (int);
1982 static rtx ix86_expand_vec_perm_builtin (tree);
1983 static tree ix86_canonical_va_list_type (tree);
1985 enum ix86_function_specific_strings
1987 IX86_FUNCTION_SPECIFIC_ARCH,
1988 IX86_FUNCTION_SPECIFIC_TUNE,
1989 IX86_FUNCTION_SPECIFIC_FPMATH,
1990 IX86_FUNCTION_SPECIFIC_MAX
1993 static char *ix86_target_string (int, int, const char *, const char *,
1994 const char *, bool);
1995 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1996 static void ix86_function_specific_save (struct cl_target_option *);
1997 static void ix86_function_specific_restore (struct cl_target_option *);
1998 static void ix86_function_specific_print (FILE *, int,
1999 struct cl_target_option *);
2000 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2001 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2002 static bool ix86_can_inline_p (tree, tree);
2003 static void ix86_set_current_function (tree);
2004 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2006 static enum calling_abi ix86_function_abi (const_tree);
2009 #ifndef SUBTARGET32_DEFAULT_CPU
2010 #define SUBTARGET32_DEFAULT_CPU "i386"
2013 /* The svr4 ABI for the i386 says that records and unions are returned
2015 #ifndef DEFAULT_PCC_STRUCT_RETURN
2016 #define DEFAULT_PCC_STRUCT_RETURN 1
2019 /* Whether -mtune= or -march= were specified */
2020 static int ix86_tune_defaulted;
2021 static int ix86_arch_specified;
2023 /* Bit flags that specify the ISA we are compiling for. */
2024 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
2026 /* A mask of ix86_isa_flags that includes bit X if X
2027 was set or cleared on the command line. */
2028 static int ix86_isa_flags_explicit;
2030 /* Define a set of ISAs which are available when a given ISA is
2031 enabled. MMX and SSE ISAs are handled separately. */
2033 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2034 #define OPTION_MASK_ISA_3DNOW_SET \
2035 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2037 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2038 #define OPTION_MASK_ISA_SSE2_SET \
2039 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2040 #define OPTION_MASK_ISA_SSE3_SET \
2041 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2042 #define OPTION_MASK_ISA_SSSE3_SET \
2043 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2044 #define OPTION_MASK_ISA_SSE4_1_SET \
2045 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2046 #define OPTION_MASK_ISA_SSE4_2_SET \
2047 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2048 #define OPTION_MASK_ISA_AVX_SET \
2049 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2050 #define OPTION_MASK_ISA_FMA_SET \
2051 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2053 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2055 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2057 #define OPTION_MASK_ISA_SSE4A_SET \
2058 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2059 #define OPTION_MASK_ISA_FMA4_SET \
2060 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2061 | OPTION_MASK_ISA_AVX_SET)
2062 #define OPTION_MASK_ISA_XOP_SET \
2063 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2064 #define OPTION_MASK_ISA_LWP_SET \
2067 /* AES and PCLMUL need SSE2 because they use xmm registers */
2068 #define OPTION_MASK_ISA_AES_SET \
2069 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2070 #define OPTION_MASK_ISA_PCLMUL_SET \
2071 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2073 #define OPTION_MASK_ISA_ABM_SET \
2074 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2076 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2077 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2078 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2079 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2080 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2082 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2083 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2084 #define OPTION_MASK_ISA_F16C_SET \
2085 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2087 /* Define a set of ISAs which aren't available when a given ISA is
2088 disabled. MMX and SSE ISAs are handled separately. */
2090 #define OPTION_MASK_ISA_MMX_UNSET \
2091 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2092 #define OPTION_MASK_ISA_3DNOW_UNSET \
2093 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2094 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2096 #define OPTION_MASK_ISA_SSE_UNSET \
2097 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2098 #define OPTION_MASK_ISA_SSE2_UNSET \
2099 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2100 #define OPTION_MASK_ISA_SSE3_UNSET \
2101 (OPTION_MASK_ISA_SSE3 \
2102 | OPTION_MASK_ISA_SSSE3_UNSET \
2103 | OPTION_MASK_ISA_SSE4A_UNSET )
2104 #define OPTION_MASK_ISA_SSSE3_UNSET \
2105 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2106 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2107 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2108 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2109 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2110 #define OPTION_MASK_ISA_AVX_UNSET \
2111 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2112 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2113 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2115 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2117 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2119 #define OPTION_MASK_ISA_SSE4A_UNSET \
2120 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2122 #define OPTION_MASK_ISA_FMA4_UNSET \
2123 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2124 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2125 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2127 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2128 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2129 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2130 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2131 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2132 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2133 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2134 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2136 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2137 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2138 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2140 /* Vectorization library interface and handlers. */
2141 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2143 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2144 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2146 /* Processor target table, indexed by processor number */
2149 const struct processor_costs *cost; /* Processor costs */
2150 const int align_loop; /* Default alignments. */
2151 const int align_loop_max_skip;
2152 const int align_jump;
2153 const int align_jump_max_skip;
2154 const int align_func;
2157 static const struct ptt processor_target_table[PROCESSOR_max] =
2159 {&i386_cost, 4, 3, 4, 3, 4},
2160 {&i486_cost, 16, 15, 16, 15, 16},
2161 {&pentium_cost, 16, 7, 16, 7, 16},
2162 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2163 {&geode_cost, 0, 0, 0, 0, 0},
2164 {&k6_cost, 32, 7, 32, 7, 32},
2165 {&athlon_cost, 16, 7, 16, 7, 16},
2166 {&pentium4_cost, 0, 0, 0, 0, 0},
2167 {&k8_cost, 16, 7, 16, 7, 16},
2168 {&nocona_cost, 0, 0, 0, 0, 0},
2169 {&core2_cost, 16, 10, 16, 10, 16},
2170 {&generic32_cost, 16, 7, 16, 7, 16},
2171 {&generic64_cost, 16, 10, 16, 10, 16},
2172 {&amdfam10_cost, 32, 24, 32, 7, 32},
2173 {&bdver1_cost, 32, 24, 32, 7, 32},
2174 {&atom_cost, 16, 7, 16, 7, 16}
2177 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2204 /* Return true if a red-zone is in use. */
2207 ix86_using_red_zone (void)
2209 return TARGET_RED_ZONE && !TARGET_64BIT_MS_ABI;
2212 /* Implement TARGET_HANDLE_OPTION. */
2215 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2222 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2223 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2227 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2228 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2235 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2236 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2240 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2241 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2251 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2252 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2256 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2257 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2264 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2265 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2269 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2270 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2277 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2278 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2282 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2283 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2290 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2291 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2295 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2296 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2303 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2304 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2308 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2309 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2316 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2317 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2321 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2322 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2329 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2330 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2334 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2335 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2342 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2343 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2347 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2348 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2353 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2354 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2358 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2359 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2365 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2366 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2370 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2371 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2378 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2379 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2383 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2384 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2391 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2392 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2396 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2397 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2404 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2405 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2409 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2410 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2417 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2418 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2422 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2423 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2430 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2431 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2435 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2436 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2443 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2444 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2448 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2449 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2456 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2457 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2461 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2462 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2469 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2470 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2474 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2475 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2482 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2483 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2487 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2488 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2495 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2496 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2500 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2501 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2508 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2509 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2513 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2514 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2521 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2522 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2526 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2527 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2534 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2535 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2539 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2540 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2547 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2548 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2552 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2553 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2562 /* Return a string that documents the current -m options. The caller is
2563 responsible for freeing the string. */
2566 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2567 const char *fpmath, bool add_nl_p)
2569 struct ix86_target_opts
2571 const char *option; /* option string */
2572 int mask; /* isa mask options */
2575 /* This table is ordered so that options like -msse4.2 that imply
2576 preceding options while match those first. */
2577 static struct ix86_target_opts isa_opts[] =
2579 { "-m64", OPTION_MASK_ISA_64BIT },
2580 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2581 { "-mfma", OPTION_MASK_ISA_FMA },
2582 { "-mxop", OPTION_MASK_ISA_XOP },
2583 { "-mlwp", OPTION_MASK_ISA_LWP },
2584 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2585 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2586 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2587 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2588 { "-msse3", OPTION_MASK_ISA_SSE3 },
2589 { "-msse2", OPTION_MASK_ISA_SSE2 },
2590 { "-msse", OPTION_MASK_ISA_SSE },
2591 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2592 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2593 { "-mmmx", OPTION_MASK_ISA_MMX },
2594 { "-mabm", OPTION_MASK_ISA_ABM },
2595 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2596 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2597 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2598 { "-maes", OPTION_MASK_ISA_AES },
2599 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2600 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2601 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2602 { "-mf16c", OPTION_MASK_ISA_F16C },
2606 static struct ix86_target_opts flag_opts[] =
2608 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2609 { "-m80387", MASK_80387 },
2610 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2611 { "-malign-double", MASK_ALIGN_DOUBLE },
2612 { "-mcld", MASK_CLD },
2613 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2614 { "-mieee-fp", MASK_IEEE_FP },
2615 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2616 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2617 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2618 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2619 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2620 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2621 { "-mno-red-zone", MASK_NO_RED_ZONE },
2622 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2623 { "-mrecip", MASK_RECIP },
2624 { "-mrtd", MASK_RTD },
2625 { "-msseregparm", MASK_SSEREGPARM },
2626 { "-mstack-arg-probe", MASK_STACK_PROBE },
2627 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2630 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2633 char target_other[40];
2642 memset (opts, '\0', sizeof (opts));
2644 /* Add -march= option. */
2647 opts[num][0] = "-march=";
2648 opts[num++][1] = arch;
2651 /* Add -mtune= option. */
2654 opts[num][0] = "-mtune=";
2655 opts[num++][1] = tune;
2658 /* Pick out the options in isa options. */
2659 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2661 if ((isa & isa_opts[i].mask) != 0)
2663 opts[num++][0] = isa_opts[i].option;
2664 isa &= ~ isa_opts[i].mask;
2668 if (isa && add_nl_p)
2670 opts[num++][0] = isa_other;
2671 sprintf (isa_other, "(other isa: %#x)", isa);
2674 /* Add flag options. */
2675 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2677 if ((flags & flag_opts[i].mask) != 0)
2679 opts[num++][0] = flag_opts[i].option;
2680 flags &= ~ flag_opts[i].mask;
2684 if (flags && add_nl_p)
2686 opts[num++][0] = target_other;
2687 sprintf (target_other, "(other flags: %#x)", flags);
2690 /* Add -fpmath= option. */
2693 opts[num][0] = "-mfpmath=";
2694 opts[num++][1] = fpmath;
2701 gcc_assert (num < ARRAY_SIZE (opts));
2703 /* Size the string. */
2705 sep_len = (add_nl_p) ? 3 : 1;
2706 for (i = 0; i < num; i++)
2709 for (j = 0; j < 2; j++)
2711 len += strlen (opts[i][j]);
2714 /* Build the string. */
2715 ret = ptr = (char *) xmalloc (len);
2718 for (i = 0; i < num; i++)
2722 for (j = 0; j < 2; j++)
2723 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2730 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2738 for (j = 0; j < 2; j++)
2741 memcpy (ptr, opts[i][j], len2[j]);
2743 line_len += len2[j];
2748 gcc_assert (ret + len >= ptr);
2753 /* Return TRUE if software prefetching is beneficial for the
2757 software_prefetching_beneficial_p (void)
2761 case PROCESSOR_GEODE:
2763 case PROCESSOR_ATHLON:
2765 case PROCESSOR_AMDFAM10:
2773 /* Return true, if profiling code should be emitted before
2774 prologue. Otherwise it returns false.
2775 Note: For x86 with "hotfix" it is sorried. */
2777 ix86_profile_before_prologue (void)
2779 return flag_fentry != 0;
2782 /* Function that is callable from the debugger to print the current
2785 ix86_debug_options (void)
2787 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2788 ix86_arch_string, ix86_tune_string,
2789 ix86_fpmath_string, true);
2793 fprintf (stderr, "%s\n\n", opts);
2797 fputs ("<no options>\n\n", stderr);
2802 /* Sometimes certain combinations of command options do not make
2803 sense on a particular target machine. You can define a macro
2804 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2805 defined, is executed once just after all the command options have
2808 Don't use this macro to turn on various extra optimizations for
2809 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2812 override_options (bool main_args_p)
2815 unsigned int ix86_arch_mask, ix86_tune_mask;
2816 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2821 /* Comes from final.c -- no real reason to change it. */
2822 #define MAX_CODE_ALIGN 16
2830 PTA_PREFETCH_SSE = 1 << 4,
2832 PTA_3DNOW_A = 1 << 6,
2836 PTA_POPCNT = 1 << 10,
2838 PTA_SSE4A = 1 << 12,
2839 PTA_NO_SAHF = 1 << 13,
2840 PTA_SSE4_1 = 1 << 14,
2841 PTA_SSE4_2 = 1 << 15,
2843 PTA_PCLMUL = 1 << 17,
2846 PTA_MOVBE = 1 << 20,
2850 PTA_FSGSBASE = 1 << 24,
2851 PTA_RDRND = 1 << 25,
2857 const char *const name; /* processor name or nickname. */
2858 const enum processor_type processor;
2859 const enum attr_cpu schedule;
2860 const unsigned /*enum pta_flags*/ flags;
2862 const processor_alias_table[] =
2864 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2865 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2866 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2867 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2868 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2869 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2870 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2871 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2872 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2873 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2874 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2875 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2876 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2878 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2880 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2881 PTA_MMX | PTA_SSE | PTA_SSE2},
2882 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2883 PTA_MMX |PTA_SSE | PTA_SSE2},
2884 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2885 PTA_MMX | PTA_SSE | PTA_SSE2},
2886 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2887 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2888 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2889 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2890 | PTA_CX16 | PTA_NO_SAHF},
2891 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2892 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2893 | PTA_SSSE3 | PTA_CX16},
2894 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2895 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2896 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2897 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2898 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2899 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2900 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2901 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2902 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2903 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2904 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2905 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2906 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2907 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2908 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2909 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2910 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2911 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2912 {"x86-64", PROCESSOR_K8, CPU_K8,
2913 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2914 {"k8", PROCESSOR_K8, CPU_K8,
2915 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2916 | PTA_SSE2 | PTA_NO_SAHF},
2917 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2918 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2919 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2920 {"opteron", PROCESSOR_K8, CPU_K8,
2921 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2922 | PTA_SSE2 | PTA_NO_SAHF},
2923 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2924 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2925 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2926 {"athlon64", PROCESSOR_K8, CPU_K8,
2927 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2928 | PTA_SSE2 | PTA_NO_SAHF},
2929 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2930 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2931 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2932 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2933 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2934 | PTA_SSE2 | PTA_NO_SAHF},
2935 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2936 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2937 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2938 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2939 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2940 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2941 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2942 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2943 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2944 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2945 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2946 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2947 0 /* flags are only used for -march switch. */ },
2948 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2949 PTA_64BIT /* flags are only used for -march switch. */ },
2952 int const pta_size = ARRAY_SIZE (processor_alias_table);
2954 /* Set up prefix/suffix so the error messages refer to either the command
2955 line argument, or the attribute(target). */
2964 prefix = "option(\"";
2969 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2970 SUBTARGET_OVERRIDE_OPTIONS;
2973 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2974 SUBSUBTARGET_OVERRIDE_OPTIONS;
2977 /* -fPIC is the default for x86_64. */
2978 if (TARGET_MACHO && TARGET_64BIT)
2981 /* Need to check -mtune=generic first. */
2982 if (ix86_tune_string)
2984 if (!strcmp (ix86_tune_string, "generic")
2985 || !strcmp (ix86_tune_string, "i686")
2986 /* As special support for cross compilers we read -mtune=native
2987 as -mtune=generic. With native compilers we won't see the
2988 -mtune=native, as it was changed by the driver. */
2989 || !strcmp (ix86_tune_string, "native"))
2992 ix86_tune_string = "generic64";
2994 ix86_tune_string = "generic32";
2996 /* If this call is for setting the option attribute, allow the
2997 generic32/generic64 that was previously set. */
2998 else if (!main_args_p
2999 && (!strcmp (ix86_tune_string, "generic32")
3000 || !strcmp (ix86_tune_string, "generic64")))
3002 else if (!strncmp (ix86_tune_string, "generic", 7))
3003 error ("bad value (%s) for %stune=%s %s",
3004 ix86_tune_string, prefix, suffix, sw);
3005 else if (!strcmp (ix86_tune_string, "x86-64"))
3006 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3007 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3008 prefix, suffix, prefix, suffix, prefix, suffix);
3012 if (ix86_arch_string)
3013 ix86_tune_string = ix86_arch_string;
3014 if (!ix86_tune_string)
3016 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3017 ix86_tune_defaulted = 1;
3020 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3021 need to use a sensible tune option. */
3022 if (!strcmp (ix86_tune_string, "generic")
3023 || !strcmp (ix86_tune_string, "x86-64")
3024 || !strcmp (ix86_tune_string, "i686"))
3027 ix86_tune_string = "generic64";
3029 ix86_tune_string = "generic32";
3033 if (ix86_stringop_string)
3035 if (!strcmp (ix86_stringop_string, "rep_byte"))
3036 stringop_alg = rep_prefix_1_byte;
3037 else if (!strcmp (ix86_stringop_string, "libcall"))
3038 stringop_alg = libcall;
3039 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3040 stringop_alg = rep_prefix_4_byte;
3041 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3043 /* rep; movq isn't available in 32-bit code. */
3044 stringop_alg = rep_prefix_8_byte;
3045 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3046 stringop_alg = loop_1_byte;
3047 else if (!strcmp (ix86_stringop_string, "loop"))
3048 stringop_alg = loop;
3049 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3050 stringop_alg = unrolled_loop;
3052 error ("bad value (%s) for %sstringop-strategy=%s %s",
3053 ix86_stringop_string, prefix, suffix, sw);
3056 if (!ix86_arch_string)
3057 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3059 ix86_arch_specified = 1;
3061 /* Validate -mabi= value. */
3062 if (ix86_abi_string)
3064 if (strcmp (ix86_abi_string, "sysv") == 0)
3065 ix86_abi = SYSV_ABI;
3066 else if (strcmp (ix86_abi_string, "ms") == 0)
3069 error ("unknown ABI (%s) for %sabi=%s %s",
3070 ix86_abi_string, prefix, suffix, sw);
3073 ix86_abi = DEFAULT_ABI;
3075 if (ix86_cmodel_string != 0)
3077 if (!strcmp (ix86_cmodel_string, "small"))
3078 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3079 else if (!strcmp (ix86_cmodel_string, "medium"))
3080 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3081 else if (!strcmp (ix86_cmodel_string, "large"))
3082 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3084 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3085 else if (!strcmp (ix86_cmodel_string, "32"))
3086 ix86_cmodel = CM_32;
3087 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3088 ix86_cmodel = CM_KERNEL;
3090 error ("bad value (%s) for %scmodel=%s %s",
3091 ix86_cmodel_string, prefix, suffix, sw);
3095 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3096 use of rip-relative addressing. This eliminates fixups that
3097 would otherwise be needed if this object is to be placed in a
3098 DLL, and is essentially just as efficient as direct addressing. */
3099 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3100 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3101 else if (TARGET_64BIT)
3102 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3104 ix86_cmodel = CM_32;
3106 if (ix86_asm_string != 0)
3109 && !strcmp (ix86_asm_string, "intel"))
3110 ix86_asm_dialect = ASM_INTEL;
3111 else if (!strcmp (ix86_asm_string, "att"))
3112 ix86_asm_dialect = ASM_ATT;
3114 error ("bad value (%s) for %sasm=%s %s",
3115 ix86_asm_string, prefix, suffix, sw);
3117 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3118 error ("code model %qs not supported in the %s bit mode",
3119 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3120 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3121 sorry ("%i-bit mode not compiled in",
3122 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3124 for (i = 0; i < pta_size; i++)
3125 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3127 ix86_schedule = processor_alias_table[i].schedule;
3128 ix86_arch = processor_alias_table[i].processor;
3129 /* Default cpu tuning to the architecture. */
3130 ix86_tune = ix86_arch;
3132 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3133 error ("CPU you selected does not support x86-64 "
3136 if (processor_alias_table[i].flags & PTA_MMX
3137 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3138 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3139 if (processor_alias_table[i].flags & PTA_3DNOW
3140 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3141 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3142 if (processor_alias_table[i].flags & PTA_3DNOW_A
3143 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3144 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3145 if (processor_alias_table[i].flags & PTA_SSE
3146 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3147 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3148 if (processor_alias_table[i].flags & PTA_SSE2
3149 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3150 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3151 if (processor_alias_table[i].flags & PTA_SSE3
3152 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3153 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3154 if (processor_alias_table[i].flags & PTA_SSSE3
3155 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3156 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3157 if (processor_alias_table[i].flags & PTA_SSE4_1
3158 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3159 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3160 if (processor_alias_table[i].flags & PTA_SSE4_2
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3162 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3163 if (processor_alias_table[i].flags & PTA_AVX
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3165 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3166 if (processor_alias_table[i].flags & PTA_FMA
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3168 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3169 if (processor_alias_table[i].flags & PTA_SSE4A
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3171 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3172 if (processor_alias_table[i].flags & PTA_FMA4
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3174 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3175 if (processor_alias_table[i].flags & PTA_XOP
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3177 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3178 if (processor_alias_table[i].flags & PTA_LWP
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3180 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3181 if (processor_alias_table[i].flags & PTA_ABM
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3183 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3184 if (processor_alias_table[i].flags & PTA_CX16
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3186 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3187 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3189 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3190 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3192 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3193 if (processor_alias_table[i].flags & PTA_MOVBE
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3195 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3196 if (processor_alias_table[i].flags & PTA_AES
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3198 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3199 if (processor_alias_table[i].flags & PTA_PCLMUL
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3201 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3202 if (processor_alias_table[i].flags & PTA_FSGSBASE
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3204 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3205 if (processor_alias_table[i].flags & PTA_RDRND
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3207 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3208 if (processor_alias_table[i].flags & PTA_F16C
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3210 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3211 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3212 x86_prefetch_sse = true;
3217 if (!strcmp (ix86_arch_string, "generic"))
3218 error ("generic CPU can be used only for %stune=%s %s",
3219 prefix, suffix, sw);
3220 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3221 error ("bad value (%s) for %sarch=%s %s",
3222 ix86_arch_string, prefix, suffix, sw);
3224 ix86_arch_mask = 1u << ix86_arch;
3225 for (i = 0; i < X86_ARCH_LAST; ++i)
3226 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3228 for (i = 0; i < pta_size; i++)
3229 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3231 ix86_schedule = processor_alias_table[i].schedule;
3232 ix86_tune = processor_alias_table[i].processor;
3233 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3235 if (ix86_tune_defaulted)
3237 ix86_tune_string = "x86-64";
3238 for (i = 0; i < pta_size; i++)
3239 if (! strcmp (ix86_tune_string,
3240 processor_alias_table[i].name))
3242 ix86_schedule = processor_alias_table[i].schedule;
3243 ix86_tune = processor_alias_table[i].processor;
3246 error ("CPU you selected does not support x86-64 "
3249 /* Intel CPUs have always interpreted SSE prefetch instructions as
3250 NOPs; so, we can enable SSE prefetch instructions even when
3251 -mtune (rather than -march) points us to a processor that has them.
3252 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3253 higher processors. */
3255 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3256 x86_prefetch_sse = true;
3260 if (ix86_tune_specified && i == pta_size)
3261 error ("bad value (%s) for %stune=%s %s",
3262 ix86_tune_string, prefix, suffix, sw);
3264 ix86_tune_mask = 1u << ix86_tune;
3265 for (i = 0; i < X86_TUNE_LAST; ++i)
3266 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3268 #ifndef USE_IX86_FRAME_POINTER
3269 #define USE_IX86_FRAME_POINTER 0
3272 /* Set the default values for switches whose default depends on TARGET_64BIT
3273 in case they weren't overwritten by command line options. */
3278 if (flag_omit_frame_pointer == 2)
3279 flag_omit_frame_pointer = 1;
3280 if (flag_asynchronous_unwind_tables == 2)
3281 flag_asynchronous_unwind_tables = 1;
3282 if (flag_pcc_struct_return == 2)
3283 flag_pcc_struct_return = 0;
3289 if (flag_omit_frame_pointer == 2)
3290 flag_omit_frame_pointer = !(USE_IX86_FRAME_POINTER || optimize_size);
3291 if (flag_asynchronous_unwind_tables == 2)
3292 flag_asynchronous_unwind_tables = !USE_IX86_FRAME_POINTER;
3293 if (flag_pcc_struct_return == 2)
3294 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
3298 ix86_cost = &ix86_size_cost;
3300 ix86_cost = processor_target_table[ix86_tune].cost;
3302 /* Arrange to set up i386_stack_locals for all functions. */
3303 init_machine_status = ix86_init_machine_status;
3305 /* Validate -mregparm= value. */
3306 if (ix86_regparm_string)
3309 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3310 i = atoi (ix86_regparm_string);
3311 if (i < 0 || i > REGPARM_MAX)
3312 error ("%sregparm=%d%s is not between 0 and %d",
3313 prefix, i, suffix, REGPARM_MAX);
3318 ix86_regparm = REGPARM_MAX;
3320 /* If the user has provided any of the -malign-* options,
3321 warn and use that value only if -falign-* is not set.
3322 Remove this code in GCC 3.2 or later. */
3323 if (ix86_align_loops_string)
3325 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3326 prefix, suffix, suffix);
3327 if (align_loops == 0)
3329 i = atoi (ix86_align_loops_string);
3330 if (i < 0 || i > MAX_CODE_ALIGN)
3331 error ("%salign-loops=%d%s is not between 0 and %d",
3332 prefix, i, suffix, MAX_CODE_ALIGN);
3334 align_loops = 1 << i;
3338 if (ix86_align_jumps_string)
3340 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3341 prefix, suffix, suffix);
3342 if (align_jumps == 0)
3344 i = atoi (ix86_align_jumps_string);
3345 if (i < 0 || i > MAX_CODE_ALIGN)
3346 error ("%salign-loops=%d%s is not between 0 and %d",
3347 prefix, i, suffix, MAX_CODE_ALIGN);
3349 align_jumps = 1 << i;
3353 if (ix86_align_funcs_string)
3355 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3356 prefix, suffix, suffix);
3357 if (align_functions == 0)
3359 i = atoi (ix86_align_funcs_string);
3360 if (i < 0 || i > MAX_CODE_ALIGN)
3361 error ("%salign-loops=%d%s is not between 0 and %d",
3362 prefix, i, suffix, MAX_CODE_ALIGN);
3364 align_functions = 1 << i;
3368 /* Default align_* from the processor table. */
3369 if (align_loops == 0)
3371 align_loops = processor_target_table[ix86_tune].align_loop;
3372 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3374 if (align_jumps == 0)
3376 align_jumps = processor_target_table[ix86_tune].align_jump;
3377 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3379 if (align_functions == 0)
3381 align_functions = processor_target_table[ix86_tune].align_func;
3384 /* Validate -mbranch-cost= value, or provide default. */
3385 ix86_branch_cost = ix86_cost->branch_cost;
3386 if (ix86_branch_cost_string)
3388 i = atoi (ix86_branch_cost_string);
3390 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3392 ix86_branch_cost = i;
3394 if (ix86_section_threshold_string)
3396 i = atoi (ix86_section_threshold_string);
3398 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3400 ix86_section_threshold = i;
3403 if (ix86_tls_dialect_string)
3405 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3406 ix86_tls_dialect = TLS_DIALECT_GNU;
3407 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3408 ix86_tls_dialect = TLS_DIALECT_GNU2;
3410 error ("bad value (%s) for %stls-dialect=%s %s",
3411 ix86_tls_dialect_string, prefix, suffix, sw);
3414 if (ix87_precision_string)
3416 i = atoi (ix87_precision_string);
3417 if (i != 32 && i != 64 && i != 80)
3418 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3423 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3425 /* Enable by default the SSE and MMX builtins. Do allow the user to
3426 explicitly disable any of these. In particular, disabling SSE and
3427 MMX for kernel code is extremely useful. */
3428 if (!ix86_arch_specified)
3430 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3431 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3434 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3438 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3440 if (!ix86_arch_specified)
3442 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3444 /* i386 ABI does not specify red zone. It still makes sense to use it
3445 when programmer takes care to stack from being destroyed. */
3446 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3447 target_flags |= MASK_NO_RED_ZONE;
3450 /* Keep nonleaf frame pointers. */
3451 if (flag_omit_frame_pointer)
3452 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3453 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3454 flag_omit_frame_pointer = 1;
3456 /* If we're doing fast math, we don't care about comparison order
3457 wrt NaNs. This lets us use a shorter comparison sequence. */
3458 if (flag_finite_math_only)
3459 target_flags &= ~MASK_IEEE_FP;
3461 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3462 since the insns won't need emulation. */
3463 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3464 target_flags &= ~MASK_NO_FANCY_MATH_387;
3466 /* Likewise, if the target doesn't have a 387, or we've specified
3467 software floating point, don't use 387 inline intrinsics. */
3469 target_flags |= MASK_NO_FANCY_MATH_387;
3471 /* Turn on MMX builtins for -msse. */
3474 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3475 x86_prefetch_sse = true;
3478 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3479 if (TARGET_SSE4_2 || TARGET_ABM)
3480 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3482 /* Validate -mpreferred-stack-boundary= value or default it to
3483 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3484 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3485 if (ix86_preferred_stack_boundary_string)
3487 i = atoi (ix86_preferred_stack_boundary_string);
3488 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3489 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3490 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3492 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3495 /* Set the default value for -mstackrealign. */
3496 if (ix86_force_align_arg_pointer == -1)
3497 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3499 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3501 /* Validate -mincoming-stack-boundary= value or default it to
3502 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3503 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3504 if (ix86_incoming_stack_boundary_string)
3506 i = atoi (ix86_incoming_stack_boundary_string);
3507 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3508 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3509 i, TARGET_64BIT ? 4 : 2);
3512 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3513 ix86_incoming_stack_boundary
3514 = ix86_user_incoming_stack_boundary;
3518 /* Accept -msseregparm only if at least SSE support is enabled. */
3519 if (TARGET_SSEREGPARM
3521 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3523 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3524 if (ix86_fpmath_string != 0)
3526 if (! strcmp (ix86_fpmath_string, "387"))
3527 ix86_fpmath = FPMATH_387;
3528 else if (! strcmp (ix86_fpmath_string, "sse"))
3532 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3533 ix86_fpmath = FPMATH_387;
3536 ix86_fpmath = FPMATH_SSE;
3538 else if (! strcmp (ix86_fpmath_string, "387,sse")
3539 || ! strcmp (ix86_fpmath_string, "387+sse")
3540 || ! strcmp (ix86_fpmath_string, "sse,387")
3541 || ! strcmp (ix86_fpmath_string, "sse+387")
3542 || ! strcmp (ix86_fpmath_string, "both"))
3546 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3547 ix86_fpmath = FPMATH_387;
3549 else if (!TARGET_80387)
3551 warning (0, "387 instruction set disabled, using SSE arithmetics");
3552 ix86_fpmath = FPMATH_SSE;
3555 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3558 error ("bad value (%s) for %sfpmath=%s %s",
3559 ix86_fpmath_string, prefix, suffix, sw);
3562 /* If the i387 is disabled, then do not return values in it. */
3564 target_flags &= ~MASK_FLOAT_RETURNS;
3566 /* Use external vectorized library in vectorizing intrinsics. */
3567 if (ix86_veclibabi_string)
3569 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3570 ix86_veclib_handler = ix86_veclibabi_svml;
3571 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3572 ix86_veclib_handler = ix86_veclibabi_acml;
3574 error ("unknown vectorization library ABI type (%s) for "
3575 "%sveclibabi=%s %s", ix86_veclibabi_string,
3576 prefix, suffix, sw);
3579 if ((!USE_IX86_FRAME_POINTER
3580 || (x86_accumulate_outgoing_args & ix86_tune_mask))
3581 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3583 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3585 /* ??? Unwind info is not correct around the CFG unless either a frame
3586 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3587 unwind info generation to be aware of the CFG and propagating states
3589 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3590 || flag_exceptions || flag_non_call_exceptions)
3591 && flag_omit_frame_pointer
3592 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3594 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3595 warning (0, "unwind tables currently require either a frame pointer "
3596 "or %saccumulate-outgoing-args%s for correctness",
3598 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3601 /* If stack probes are required, the space used for large function
3602 arguments on the stack must also be probed, so enable
3603 -maccumulate-outgoing-args so this happens in the prologue. */
3604 if (TARGET_STACK_PROBE
3605 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3607 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3608 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3609 "for correctness", prefix, suffix);
3610 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3613 /* For sane SSE instruction set generation we need fcomi instruction.
3614 It is safe to enable all CMOVE instructions. */
3618 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3621 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3622 p = strchr (internal_label_prefix, 'X');
3623 internal_label_prefix_len = p - internal_label_prefix;
3627 /* When scheduling description is not available, disable scheduler pass
3628 so it won't slow down the compilation and make x87 code slower. */
3629 if (!TARGET_SCHEDULE)
3630 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3632 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3633 set_param_value ("simultaneous-prefetches",
3634 ix86_cost->simultaneous_prefetches);
3635 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3636 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3637 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3638 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3639 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3640 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3642 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3643 if (flag_prefetch_loop_arrays < 0
3646 && software_prefetching_beneficial_p ())
3647 flag_prefetch_loop_arrays = 1;
3649 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3650 can be optimized to ap = __builtin_next_arg (0). */
3652 targetm.expand_builtin_va_start = NULL;
3656 ix86_gen_leave = gen_leave_rex64;
3657 ix86_gen_add3 = gen_adddi3;
3658 ix86_gen_sub3 = gen_subdi3;
3659 ix86_gen_sub3_carry = gen_subdi3_carry;
3660 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3661 ix86_gen_monitor = gen_sse3_monitor64;
3662 ix86_gen_andsp = gen_anddi3;
3663 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
3664 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3665 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3669 ix86_gen_leave = gen_leave;
3670 ix86_gen_add3 = gen_addsi3;
3671 ix86_gen_sub3 = gen_subsi3;
3672 ix86_gen_sub3_carry = gen_subsi3_carry;
3673 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3674 ix86_gen_monitor = gen_sse3_monitor;
3675 ix86_gen_andsp = gen_andsi3;
3676 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
3677 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3678 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3682 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3684 target_flags |= MASK_CLD & ~target_flags_explicit;
3687 if (!TARGET_64BIT && flag_pic)
3689 if (flag_fentry > 0)
3690 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3693 if (flag_fentry < 0)
3695 #if defined(PROFILE_BEFORE_PROLOGUE)
3702 /* Save the initial options in case the user does function specific options */
3704 target_option_default_node = target_option_current_node
3705 = build_target_option_node ();
3708 /* Update register usage after having seen the compiler flags. */
3711 ix86_conditional_register_usage (void)
3716 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3718 if (fixed_regs[i] > 1)
3719 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3720 if (call_used_regs[i] > 1)
3721 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3724 /* The PIC register, if it exists, is fixed. */
3725 j = PIC_OFFSET_TABLE_REGNUM;
3726 if (j != INVALID_REGNUM)
3727 fixed_regs[j] = call_used_regs[j] = 1;
3729 /* The MS_ABI changes the set of call-used registers. */
3730 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3732 call_used_regs[SI_REG] = 0;
3733 call_used_regs[DI_REG] = 0;
3734 call_used_regs[XMM6_REG] = 0;
3735 call_used_regs[XMM7_REG] = 0;
3736 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3737 call_used_regs[i] = 0;
3740 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3741 other call-clobbered regs for 64-bit. */
3744 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3746 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3747 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3748 && call_used_regs[i])
3749 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3752 /* If MMX is disabled, squash the registers. */
3754 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3755 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3756 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3758 /* If SSE is disabled, squash the registers. */
3760 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3761 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3762 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3764 /* If the FPU is disabled, squash the registers. */
3765 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3766 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3767 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3768 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3770 /* If 32-bit, squash the 64-bit registers. */
3773 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3775 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3781 /* Save the current options */
3784 ix86_function_specific_save (struct cl_target_option *ptr)
3786 ptr->arch = ix86_arch;
3787 ptr->schedule = ix86_schedule;
3788 ptr->tune = ix86_tune;
3789 ptr->fpmath = ix86_fpmath;
3790 ptr->branch_cost = ix86_branch_cost;
3791 ptr->tune_defaulted = ix86_tune_defaulted;
3792 ptr->arch_specified = ix86_arch_specified;
3793 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3794 ptr->target_flags_explicit = target_flags_explicit;
3796 /* The fields are char but the variables are not; make sure the
3797 values fit in the fields. */
3798 gcc_assert (ptr->arch == ix86_arch);
3799 gcc_assert (ptr->schedule == ix86_schedule);
3800 gcc_assert (ptr->tune == ix86_tune);
3801 gcc_assert (ptr->fpmath == ix86_fpmath);
3802 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3805 /* Restore the current options */
3808 ix86_function_specific_restore (struct cl_target_option *ptr)
3810 enum processor_type old_tune = ix86_tune;
3811 enum processor_type old_arch = ix86_arch;
3812 unsigned int ix86_arch_mask, ix86_tune_mask;
3815 ix86_arch = (enum processor_type) ptr->arch;
3816 ix86_schedule = (enum attr_cpu) ptr->schedule;
3817 ix86_tune = (enum processor_type) ptr->tune;
3818 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3819 ix86_branch_cost = ptr->branch_cost;
3820 ix86_tune_defaulted = ptr->tune_defaulted;
3821 ix86_arch_specified = ptr->arch_specified;
3822 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3823 target_flags_explicit = ptr->target_flags_explicit;
3825 /* Recreate the arch feature tests if the arch changed */
3826 if (old_arch != ix86_arch)
3828 ix86_arch_mask = 1u << ix86_arch;
3829 for (i = 0; i < X86_ARCH_LAST; ++i)
3830 ix86_arch_features[i]
3831 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3834 /* Recreate the tune optimization tests */
3835 if (old_tune != ix86_tune)
3837 ix86_tune_mask = 1u << ix86_tune;
3838 for (i = 0; i < X86_TUNE_LAST; ++i)
3839 ix86_tune_features[i]
3840 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3844 /* Print the current options */
3847 ix86_function_specific_print (FILE *file, int indent,
3848 struct cl_target_option *ptr)
3851 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3852 NULL, NULL, NULL, false);
3854 fprintf (file, "%*sarch = %d (%s)\n",
3857 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3858 ? cpu_names[ptr->arch]
3861 fprintf (file, "%*stune = %d (%s)\n",
3864 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3865 ? cpu_names[ptr->tune]
3868 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3869 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3870 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3871 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3875 fprintf (file, "%*s%s\n", indent, "", target_string);
3876 free (target_string);
3881 /* Inner function to process the attribute((target(...))), take an argument and
3882 set the current options from the argument. If we have a list, recursively go
3886 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3891 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3892 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3893 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3894 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3909 enum ix86_opt_type type;
3914 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3915 IX86_ATTR_ISA ("abm", OPT_mabm),
3916 IX86_ATTR_ISA ("aes", OPT_maes),
3917 IX86_ATTR_ISA ("avx", OPT_mavx),
3918 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3919 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3920 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3921 IX86_ATTR_ISA ("sse", OPT_msse),
3922 IX86_ATTR_ISA ("sse2", OPT_msse2),
3923 IX86_ATTR_ISA ("sse3", OPT_msse3),
3924 IX86_ATTR_ISA ("sse4", OPT_msse4),
3925 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3926 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3927 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3928 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3929 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3930 IX86_ATTR_ISA ("xop", OPT_mxop),
3931 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3932 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3933 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3934 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3936 /* string options */
3937 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3938 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3939 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3942 IX86_ATTR_YES ("cld",
3946 IX86_ATTR_NO ("fancy-math-387",
3947 OPT_mfancy_math_387,
3948 MASK_NO_FANCY_MATH_387),
3950 IX86_ATTR_YES ("ieee-fp",
3954 IX86_ATTR_YES ("inline-all-stringops",
3955 OPT_minline_all_stringops,
3956 MASK_INLINE_ALL_STRINGOPS),
3958 IX86_ATTR_YES ("inline-stringops-dynamically",
3959 OPT_minline_stringops_dynamically,
3960 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3962 IX86_ATTR_NO ("align-stringops",
3963 OPT_mno_align_stringops,
3964 MASK_NO_ALIGN_STRINGOPS),
3966 IX86_ATTR_YES ("recip",
3972 /* If this is a list, recurse to get the options. */
3973 if (TREE_CODE (args) == TREE_LIST)
3977 for (; args; args = TREE_CHAIN (args))
3978 if (TREE_VALUE (args)
3979 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3985 else if (TREE_CODE (args) != STRING_CST)
3988 /* Handle multiple arguments separated by commas. */
3989 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3991 while (next_optstr && *next_optstr != '\0')
3993 char *p = next_optstr;
3995 char *comma = strchr (next_optstr, ',');
3996 const char *opt_string;
3997 size_t len, opt_len;
4002 enum ix86_opt_type type = ix86_opt_unknown;
4008 len = comma - next_optstr;
4009 next_optstr = comma + 1;
4017 /* Recognize no-xxx. */
4018 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4027 /* Find the option. */
4030 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4032 type = attrs[i].type;
4033 opt_len = attrs[i].len;
4034 if (ch == attrs[i].string[0]
4035 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4036 && memcmp (p, attrs[i].string, opt_len) == 0)
4039 mask = attrs[i].mask;
4040 opt_string = attrs[i].string;
4045 /* Process the option. */
4048 error ("attribute(target(\"%s\")) is unknown", orig_p);
4052 else if (type == ix86_opt_isa)
4053 ix86_handle_option (opt, p, opt_set_p);
4055 else if (type == ix86_opt_yes || type == ix86_opt_no)
4057 if (type == ix86_opt_no)
4058 opt_set_p = !opt_set_p;
4061 target_flags |= mask;
4063 target_flags &= ~mask;
4066 else if (type == ix86_opt_str)
4070 error ("option(\"%s\") was already specified", opt_string);
4074 p_strings[opt] = xstrdup (p + opt_len);
4084 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4087 ix86_valid_target_attribute_tree (tree args)
4089 const char *orig_arch_string = ix86_arch_string;
4090 const char *orig_tune_string = ix86_tune_string;
4091 const char *orig_fpmath_string = ix86_fpmath_string;
4092 int orig_tune_defaulted = ix86_tune_defaulted;
4093 int orig_arch_specified = ix86_arch_specified;
4094 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4097 struct cl_target_option *def
4098 = TREE_TARGET_OPTION (target_option_default_node);
4100 /* Process each of the options on the chain. */
4101 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4104 /* If the changed options are different from the default, rerun override_options,
4105 and then save the options away. The string options are are attribute options,
4106 and will be undone when we copy the save structure. */
4107 if (ix86_isa_flags != def->ix86_isa_flags
4108 || target_flags != def->target_flags
4109 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4110 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4111 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4113 /* If we are using the default tune= or arch=, undo the string assigned,
4114 and use the default. */
4115 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4116 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4117 else if (!orig_arch_specified)
4118 ix86_arch_string = NULL;
4120 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4121 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4122 else if (orig_tune_defaulted)
4123 ix86_tune_string = NULL;
4125 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4126 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4127 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4128 else if (!TARGET_64BIT && TARGET_SSE)
4129 ix86_fpmath_string = "sse,387";
4131 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4132 override_options (false);
4134 /* Add any builtin functions with the new isa if any. */
4135 ix86_add_new_builtins (ix86_isa_flags);
4137 /* Save the current options unless we are validating options for
4139 t = build_target_option_node ();
4141 ix86_arch_string = orig_arch_string;
4142 ix86_tune_string = orig_tune_string;
4143 ix86_fpmath_string = orig_fpmath_string;
4145 /* Free up memory allocated to hold the strings */
4146 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4147 if (option_strings[i])
4148 free (option_strings[i]);
4154 /* Hook to validate attribute((target("string"))). */
4157 ix86_valid_target_attribute_p (tree fndecl,
4158 tree ARG_UNUSED (name),
4160 int ARG_UNUSED (flags))
4162 struct cl_target_option cur_target;
4164 tree old_optimize = build_optimization_node ();
4165 tree new_target, new_optimize;
4166 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4168 /* If the function changed the optimization levels as well as setting target
4169 options, start with the optimizations specified. */
4170 if (func_optimize && func_optimize != old_optimize)
4171 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
4173 /* The target attributes may also change some optimization flags, so update
4174 the optimization options if necessary. */
4175 cl_target_option_save (&cur_target);
4176 new_target = ix86_valid_target_attribute_tree (args);
4177 new_optimize = build_optimization_node ();
4184 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4186 if (old_optimize != new_optimize)
4187 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4190 cl_target_option_restore (&cur_target);
4192 if (old_optimize != new_optimize)
4193 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
4199 /* Hook to determine if one function can safely inline another. */
4202 ix86_can_inline_p (tree caller, tree callee)
4205 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4206 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4208 /* If callee has no option attributes, then it is ok to inline. */
4212 /* If caller has no option attributes, but callee does then it is not ok to
4214 else if (!caller_tree)
4219 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4220 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4222 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4223 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4225 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
4226 != callee_opts->ix86_isa_flags)
4229 /* See if we have the same non-isa options. */
4230 else if (caller_opts->target_flags != callee_opts->target_flags)
4233 /* See if arch, tune, etc. are the same. */
4234 else if (caller_opts->arch != callee_opts->arch)
4237 else if (caller_opts->tune != callee_opts->tune)
4240 else if (caller_opts->fpmath != callee_opts->fpmath)
4243 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4254 /* Remember the last target of ix86_set_current_function. */
4255 static GTY(()) tree ix86_previous_fndecl;
4257 /* Establish appropriate back-end context for processing the function
4258 FNDECL. The argument might be NULL to indicate processing at top
4259 level, outside of any function scope. */
4261 ix86_set_current_function (tree fndecl)
4263 /* Only change the context if the function changes. This hook is called
4264 several times in the course of compiling a function, and we don't want to
4265 slow things down too much or call target_reinit when it isn't safe. */
4266 if (fndecl && fndecl != ix86_previous_fndecl)
4268 tree old_tree = (ix86_previous_fndecl
4269 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4272 tree new_tree = (fndecl
4273 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4276 ix86_previous_fndecl = fndecl;
4277 if (old_tree == new_tree)
4282 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4288 struct cl_target_option *def
4289 = TREE_TARGET_OPTION (target_option_current_node);
4291 cl_target_option_restore (def);
4298 /* Return true if this goes in large data/bss. */
4301 ix86_in_large_data_p (tree exp)
4303 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4306 /* Functions are never large data. */
4307 if (TREE_CODE (exp) == FUNCTION_DECL)
4310 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4312 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4313 if (strcmp (section, ".ldata") == 0
4314 || strcmp (section, ".lbss") == 0)
4320 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4322 /* If this is an incomplete type with size 0, then we can't put it
4323 in data because it might be too big when completed. */
4324 if (!size || size > ix86_section_threshold)
4331 /* Switch to the appropriate section for output of DECL.
4332 DECL is either a `VAR_DECL' node or a constant of some sort.
4333 RELOC indicates whether forming the initial value of DECL requires
4334 link-time relocations. */
4336 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4340 x86_64_elf_select_section (tree decl, int reloc,
4341 unsigned HOST_WIDE_INT align)
4343 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4344 && ix86_in_large_data_p (decl))
4346 const char *sname = NULL;
4347 unsigned int flags = SECTION_WRITE;
4348 switch (categorize_decl_for_section (decl, reloc))
4353 case SECCAT_DATA_REL:
4354 sname = ".ldata.rel";
4356 case SECCAT_DATA_REL_LOCAL:
4357 sname = ".ldata.rel.local";
4359 case SECCAT_DATA_REL_RO:
4360 sname = ".ldata.rel.ro";
4362 case SECCAT_DATA_REL_RO_LOCAL:
4363 sname = ".ldata.rel.ro.local";
4367 flags |= SECTION_BSS;
4370 case SECCAT_RODATA_MERGE_STR:
4371 case SECCAT_RODATA_MERGE_STR_INIT:
4372 case SECCAT_RODATA_MERGE_CONST:
4376 case SECCAT_SRODATA:
4383 /* We don't split these for medium model. Place them into
4384 default sections and hope for best. */
4389 /* We might get called with string constants, but get_named_section
4390 doesn't like them as they are not DECLs. Also, we need to set
4391 flags in that case. */
4393 return get_section (sname, flags, NULL);
4394 return get_named_section (decl, sname, reloc);
4397 return default_elf_select_section (decl, reloc, align);
4400 /* Build up a unique section name, expressed as a
4401 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4402 RELOC indicates whether the initial value of EXP requires
4403 link-time relocations. */
4405 static void ATTRIBUTE_UNUSED
4406 x86_64_elf_unique_section (tree decl, int reloc)
4408 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4409 && ix86_in_large_data_p (decl))
4411 const char *prefix = NULL;
4412 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4413 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4415 switch (categorize_decl_for_section (decl, reloc))
4418 case SECCAT_DATA_REL:
4419 case SECCAT_DATA_REL_LOCAL:
4420 case SECCAT_DATA_REL_RO:
4421 case SECCAT_DATA_REL_RO_LOCAL:
4422 prefix = one_only ? ".ld" : ".ldata";
4425 prefix = one_only ? ".lb" : ".lbss";
4428 case SECCAT_RODATA_MERGE_STR:
4429 case SECCAT_RODATA_MERGE_STR_INIT:
4430 case SECCAT_RODATA_MERGE_CONST:
4431 prefix = one_only ? ".lr" : ".lrodata";
4433 case SECCAT_SRODATA:
4440 /* We don't split these for medium model. Place them into
4441 default sections and hope for best. */
4446 const char *name, *linkonce;
4449 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4450 name = targetm.strip_name_encoding (name);
4452 /* If we're using one_only, then there needs to be a .gnu.linkonce
4453 prefix to the section name. */
4454 linkonce = one_only ? ".gnu.linkonce" : "";
4456 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4458 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4462 default_unique_section (decl, reloc);
4465 #ifdef COMMON_ASM_OP
4466 /* This says how to output assembler code to declare an
4467 uninitialized external linkage data object.
4469 For medium model x86-64 we need to use .largecomm opcode for
4472 x86_elf_aligned_common (FILE *file,
4473 const char *name, unsigned HOST_WIDE_INT size,
4476 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4477 && size > (unsigned int)ix86_section_threshold)
4478 fputs (".largecomm\t", file);
4480 fputs (COMMON_ASM_OP, file);
4481 assemble_name (file, name);
4482 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4483 size, align / BITS_PER_UNIT);
4487 /* Utility function for targets to use in implementing
4488 ASM_OUTPUT_ALIGNED_BSS. */
4491 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4492 const char *name, unsigned HOST_WIDE_INT size,
4495 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4496 && size > (unsigned int)ix86_section_threshold)
4497 switch_to_section (get_named_section (decl, ".lbss", 0));
4499 switch_to_section (bss_section);
4500 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4501 #ifdef ASM_DECLARE_OBJECT_NAME
4502 last_assemble_variable_decl = decl;
4503 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4505 /* Standard thing is just output label for the object. */
4506 ASM_OUTPUT_LABEL (file, name);
4507 #endif /* ASM_DECLARE_OBJECT_NAME */
4508 ASM_OUTPUT_SKIP (file, size ? size : 1);
4512 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4514 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4515 make the problem with not enough registers even worse. */
4516 #ifdef INSN_SCHEDULING
4518 flag_schedule_insns = 0;
4522 /* The Darwin libraries never set errno, so we might as well
4523 avoid calling them when that's the only reason we would. */
4524 flag_errno_math = 0;
4526 /* The default values of these switches depend on the TARGET_64BIT
4527 that is not known at this moment. Mark these values with 2 and
4528 let user the to override these. In case there is no command line option
4529 specifying them, we will set the defaults in override_options. */
4531 flag_omit_frame_pointer = 2;
4533 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4537 flag_pcc_struct_return = 2;
4538 flag_asynchronous_unwind_tables = 2;
4539 flag_vect_cost_model = 1;
4540 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4541 SUBTARGET_OPTIMIZATION_OPTIONS;
4545 /* Decide whether we must probe the stack before any space allocation
4546 on this target. It's essentially TARGET_STACK_PROBE except when
4547 -fstack-check causes the stack to be already probed differently. */
4550 ix86_target_stack_probe (void)
4552 /* Do not probe the stack twice if static stack checking is enabled. */
4553 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4556 return TARGET_STACK_PROBE;
4559 /* Decide whether we can make a sibling call to a function. DECL is the
4560 declaration of the function being targeted by the call and EXP is the
4561 CALL_EXPR representing the call. */
4564 ix86_function_ok_for_sibcall (tree decl, tree exp)
4566 tree type, decl_or_type;
4569 /* If we are generating position-independent code, we cannot sibcall
4570 optimize any indirect call, or a direct call to a global function,
4571 as the PLT requires %ebx be live. */
4572 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4575 /* If we need to align the outgoing stack, then sibcalling would
4576 unalign the stack, which may break the called function. */
4577 if (ix86_minimum_incoming_stack_boundary (true)
4578 < PREFERRED_STACK_BOUNDARY)
4583 decl_or_type = decl;
4584 type = TREE_TYPE (decl);
4588 /* We're looking at the CALL_EXPR, we need the type of the function. */
4589 type = CALL_EXPR_FN (exp); /* pointer expression */
4590 type = TREE_TYPE (type); /* pointer type */
4591 type = TREE_TYPE (type); /* function type */
4592 decl_or_type = type;
4595 /* Check that the return value locations are the same. Like
4596 if we are returning floats on the 80387 register stack, we cannot
4597 make a sibcall from a function that doesn't return a float to a
4598 function that does or, conversely, from a function that does return
4599 a float to a function that doesn't; the necessary stack adjustment
4600 would not be executed. This is also the place we notice
4601 differences in the return value ABI. Note that it is ok for one
4602 of the functions to have void return type as long as the return
4603 value of the other is passed in a register. */
4604 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4605 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4607 if (STACK_REG_P (a) || STACK_REG_P (b))
4609 if (!rtx_equal_p (a, b))
4612 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4614 else if (!rtx_equal_p (a, b))
4619 /* The SYSV ABI has more call-clobbered registers;
4620 disallow sibcalls from MS to SYSV. */
4621 if (cfun->machine->call_abi == MS_ABI
4622 && ix86_function_type_abi (type) == SYSV_ABI)
4627 /* If this call is indirect, we'll need to be able to use a
4628 call-clobbered register for the address of the target function.
4629 Make sure that all such registers are not used for passing
4630 parameters. Note that DLLIMPORT functions are indirect. */
4632 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4634 if (ix86_function_regparm (type, NULL) >= 3)
4636 /* ??? Need to count the actual number of registers to be used,
4637 not the possible number of registers. Fix later. */
4643 /* Otherwise okay. That also includes certain types of indirect calls. */
4647 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4648 and "sseregparm" calling convention attributes;
4649 arguments as in struct attribute_spec.handler. */
4652 ix86_handle_cconv_attribute (tree *node, tree name,
4654 int flags ATTRIBUTE_UNUSED,
4657 if (TREE_CODE (*node) != FUNCTION_TYPE
4658 && TREE_CODE (*node) != METHOD_TYPE
4659 && TREE_CODE (*node) != FIELD_DECL
4660 && TREE_CODE (*node) != TYPE_DECL)
4662 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4664 *no_add_attrs = true;
4668 /* Can combine regparm with all attributes but fastcall. */
4669 if (is_attribute_p ("regparm", name))
4673 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4675 error ("fastcall and regparm attributes are not compatible");
4678 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4680 error ("regparam and thiscall attributes are not compatible");
4683 cst = TREE_VALUE (args);
4684 if (TREE_CODE (cst) != INTEGER_CST)
4686 warning (OPT_Wattributes,
4687 "%qE attribute requires an integer constant argument",
4689 *no_add_attrs = true;
4691 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4693 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4695 *no_add_attrs = true;
4703 /* Do not warn when emulating the MS ABI. */
4704 if ((TREE_CODE (*node) != FUNCTION_TYPE
4705 && TREE_CODE (*node) != METHOD_TYPE)
4706 || ix86_function_type_abi (*node) != MS_ABI)
4707 warning (OPT_Wattributes, "%qE attribute ignored",
4709 *no_add_attrs = true;
4713 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4714 if (is_attribute_p ("fastcall", name))
4716 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4718 error ("fastcall and cdecl attributes are not compatible");
4720 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4722 error ("fastcall and stdcall attributes are not compatible");
4724 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4726 error ("fastcall and regparm attributes are not compatible");
4728 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4730 error ("fastcall and thiscall attributes are not compatible");
4734 /* Can combine stdcall with fastcall (redundant), regparm and
4736 else if (is_attribute_p ("stdcall", name))
4738 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4740 error ("stdcall and cdecl attributes are not compatible");
4742 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4744 error ("stdcall and fastcall attributes are not compatible");
4746 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4748 error ("stdcall and thiscall attributes are not compatible");
4752 /* Can combine cdecl with regparm and sseregparm. */
4753 else if (is_attribute_p ("cdecl", name))
4755 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4757 error ("stdcall and cdecl attributes are not compatible");
4759 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4761 error ("fastcall and cdecl attributes are not compatible");
4763 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4765 error ("cdecl and thiscall attributes are not compatible");
4768 else if (is_attribute_p ("thiscall", name))
4770 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4771 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4773 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4775 error ("stdcall and thiscall attributes are not compatible");
4777 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4779 error ("fastcall and thiscall attributes are not compatible");
4781 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4783 error ("cdecl and thiscall attributes are not compatible");
4787 /* Can combine sseregparm with all attributes. */
4792 /* Return 0 if the attributes for two types are incompatible, 1 if they
4793 are compatible, and 2 if they are nearly compatible (which causes a
4794 warning to be generated). */
4797 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4799 /* Check for mismatch of non-default calling convention. */
4800 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4802 if (TREE_CODE (type1) != FUNCTION_TYPE
4803 && TREE_CODE (type1) != METHOD_TYPE)
4806 /* Check for mismatched fastcall/regparm types. */
4807 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4808 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4809 || (ix86_function_regparm (type1, NULL)
4810 != ix86_function_regparm (type2, NULL)))
4813 /* Check for mismatched sseregparm types. */
4814 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4815 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4818 /* Check for mismatched thiscall types. */
4819 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4820 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4823 /* Check for mismatched return types (cdecl vs stdcall). */
4824 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4825 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4831 /* Return the regparm value for a function with the indicated TYPE and DECL.
4832 DECL may be NULL when calling function indirectly
4833 or considering a libcall. */
4836 ix86_function_regparm (const_tree type, const_tree decl)
4842 return (ix86_function_type_abi (type) == SYSV_ABI
4843 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4845 regparm = ix86_regparm;
4846 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4849 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4853 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4856 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4859 /* Use register calling convention for local functions when possible. */
4861 && TREE_CODE (decl) == FUNCTION_DECL
4863 && !(profile_flag && !flag_fentry))
4865 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4866 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4869 int local_regparm, globals = 0, regno;
4871 /* Make sure no regparm register is taken by a
4872 fixed register variable. */
4873 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4874 if (fixed_regs[local_regparm])
4877 /* We don't want to use regparm(3) for nested functions as
4878 these use a static chain pointer in the third argument. */
4879 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4882 /* Each fixed register usage increases register pressure,
4883 so less registers should be used for argument passing.
4884 This functionality can be overriden by an explicit
4886 for (regno = 0; regno <= DI_REG; regno++)
4887 if (fixed_regs[regno])
4891 = globals < local_regparm ? local_regparm - globals : 0;
4893 if (local_regparm > regparm)
4894 regparm = local_regparm;
4901 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4902 DFmode (2) arguments in SSE registers for a function with the
4903 indicated TYPE and DECL. DECL may be NULL when calling function
4904 indirectly or considering a libcall. Otherwise return 0. */
4907 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4909 gcc_assert (!TARGET_64BIT);
4911 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4912 by the sseregparm attribute. */
4913 if (TARGET_SSEREGPARM
4914 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4921 error ("Calling %qD with attribute sseregparm without "
4922 "SSE/SSE2 enabled", decl);
4924 error ("Calling %qT with attribute sseregparm without "
4925 "SSE/SSE2 enabled", type);
4933 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4934 (and DFmode for SSE2) arguments in SSE registers. */
4935 if (decl && TARGET_SSE_MATH && optimize
4936 && !(profile_flag && !flag_fentry))
4938 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4939 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4941 return TARGET_SSE2 ? 2 : 1;
4947 /* Return true if EAX is live at the start of the function. Used by
4948 ix86_expand_prologue to determine if we need special help before
4949 calling allocate_stack_worker. */
4952 ix86_eax_live_at_start_p (void)
4954 /* Cheat. Don't bother working forward from ix86_function_regparm
4955 to the function type to whether an actual argument is located in
4956 eax. Instead just look at cfg info, which is still close enough
4957 to correct at this point. This gives false positives for broken
4958 functions that might use uninitialized data that happens to be
4959 allocated in eax, but who cares? */
4960 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4963 /* Value is the number of bytes of arguments automatically
4964 popped when returning from a subroutine call.
4965 FUNDECL is the declaration node of the function (as a tree),
4966 FUNTYPE is the data type of the function (as a tree),
4967 or for a library call it is an identifier node for the subroutine name.
4968 SIZE is the number of bytes of arguments passed on the stack.
4970 On the 80386, the RTD insn may be used to pop them if the number
4971 of args is fixed, but if the number is variable then the caller
4972 must pop them all. RTD can't be used for library calls now
4973 because the library is compiled with the Unix compiler.
4974 Use of RTD is a selectable option, since it is incompatible with
4975 standard Unix calling sequences. If the option is not selected,
4976 the caller must always pop the args.
4978 The attribute stdcall is equivalent to RTD on a per module basis. */
4981 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4985 /* None of the 64-bit ABIs pop arguments. */
4989 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4991 /* Cdecl functions override -mrtd, and never pop the stack. */
4992 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4994 /* Stdcall and fastcall functions will pop the stack if not
4996 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4997 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
4998 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
5001 if (rtd && ! stdarg_p (funtype))
5005 /* Lose any fake structure return argument if it is passed on the stack. */
5006 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
5007 && !KEEP_AGGREGATE_RETURN_POINTER)
5009 int nregs = ix86_function_regparm (funtype, fundecl);
5011 return GET_MODE_SIZE (Pmode);
5017 /* Argument support functions. */
5019 /* Return true when register may be used to pass function parameters. */
5021 ix86_function_arg_regno_p (int regno)
5024 const int *parm_regs;
5029 return (regno < REGPARM_MAX
5030 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5032 return (regno < REGPARM_MAX
5033 || (TARGET_MMX && MMX_REGNO_P (regno)
5034 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5035 || (TARGET_SSE && SSE_REGNO_P (regno)
5036 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5041 if (SSE_REGNO_P (regno) && TARGET_SSE)
5046 if (TARGET_SSE && SSE_REGNO_P (regno)
5047 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5051 /* TODO: The function should depend on current function ABI but
5052 builtins.c would need updating then. Therefore we use the
5055 /* RAX is used as hidden argument to va_arg functions. */
5056 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5059 if (ix86_abi == MS_ABI)
5060 parm_regs = x86_64_ms_abi_int_parameter_registers;
5062 parm_regs = x86_64_int_parameter_registers;
5063 for (i = 0; i < (ix86_abi == MS_ABI
5064 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5065 if (regno == parm_regs[i])
5070 /* Return if we do not know how to pass TYPE solely in registers. */
5073 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5075 if (must_pass_in_stack_var_size_or_pad (mode, type))
5078 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5079 The layout_type routine is crafty and tries to trick us into passing
5080 currently unsupported vector types on the stack by using TImode. */
5081 return (!TARGET_64BIT && mode == TImode
5082 && type && TREE_CODE (type) != VECTOR_TYPE);
5085 /* It returns the size, in bytes, of the area reserved for arguments passed
5086 in registers for the function represented by fndecl dependent to the used
5089 ix86_reg_parm_stack_space (const_tree fndecl)
5091 enum calling_abi call_abi = SYSV_ABI;
5092 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5093 call_abi = ix86_function_abi (fndecl);
5095 call_abi = ix86_function_type_abi (fndecl);
5096 if (call_abi == MS_ABI)
5101 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5104 ix86_function_type_abi (const_tree fntype)
5106 if (TARGET_64BIT && fntype != NULL)
5108 enum calling_abi abi = ix86_abi;
5109 if (abi == SYSV_ABI)
5111 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5114 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5122 ix86_function_ms_hook_prologue (const_tree fn)
5124 if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
5126 if (decl_function_context (fn) != NULL_TREE)
5127 error_at (DECL_SOURCE_LOCATION (fn),
5128 "ms_hook_prologue is not compatible with nested function");
5135 static enum calling_abi
5136 ix86_function_abi (const_tree fndecl)
5140 return ix86_function_type_abi (TREE_TYPE (fndecl));
5143 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5146 ix86_cfun_abi (void)
5148 if (! cfun || ! TARGET_64BIT)
5150 return cfun->machine->call_abi;
5153 /* Write the extra assembler code needed to declare a function properly. */
5156 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5159 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5163 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5164 unsigned int filler_cc = 0xcccccccc;
5166 for (i = 0; i < filler_count; i += 4)
5167 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5170 ASM_OUTPUT_LABEL (asm_out_file, fname);
5172 /* Output magic byte marker, if hot-patch attribute is set. */
5177 /* leaq [%rsp + 0], %rsp */
5178 asm_fprintf (asm_out_file, ASM_BYTE
5179 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5183 /* movl.s %edi, %edi
5185 movl.s %esp, %ebp */
5186 asm_fprintf (asm_out_file, ASM_BYTE
5187 "0x8b, 0xff, 0x55, 0x8b, 0xec\n");
5193 extern void init_regs (void);
5195 /* Implementation of call abi switching target hook. Specific to FNDECL
5196 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5197 for more details. */
5199 ix86_call_abi_override (const_tree fndecl)
5201 if (fndecl == NULL_TREE)
5202 cfun->machine->call_abi = ix86_abi;
5204 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5207 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5208 re-initialization of init_regs each time we switch function context since
5209 this is needed only during RTL expansion. */
5211 ix86_maybe_switch_abi (void)
5214 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5218 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5219 for a call to a function whose data type is FNTYPE.
5220 For a library call, FNTYPE is 0. */
5223 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5224 tree fntype, /* tree ptr for function decl */
5225 rtx libname, /* SYMBOL_REF of library name or 0 */
5228 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5229 memset (cum, 0, sizeof (*cum));
5232 cum->call_abi = ix86_function_abi (fndecl);
5234 cum->call_abi = ix86_function_type_abi (fntype);
5235 /* Set up the number of registers to use for passing arguments. */
5237 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5238 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5239 "or subtarget optimization implying it");
5240 cum->nregs = ix86_regparm;
5243 cum->nregs = (cum->call_abi == SYSV_ABI
5244 ? X86_64_REGPARM_MAX
5245 : X86_64_MS_REGPARM_MAX);
5249 cum->sse_nregs = SSE_REGPARM_MAX;
5252 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5253 ? X86_64_SSE_REGPARM_MAX
5254 : X86_64_MS_SSE_REGPARM_MAX);
5258 cum->mmx_nregs = MMX_REGPARM_MAX;
5259 cum->warn_avx = true;
5260 cum->warn_sse = true;
5261 cum->warn_mmx = true;
5263 /* Because type might mismatch in between caller and callee, we need to
5264 use actual type of function for local calls.
5265 FIXME: cgraph_analyze can be told to actually record if function uses
5266 va_start so for local functions maybe_vaarg can be made aggressive
5268 FIXME: once typesytem is fixed, we won't need this code anymore. */
5270 fntype = TREE_TYPE (fndecl);
5271 cum->maybe_vaarg = (fntype
5272 ? (!prototype_p (fntype) || stdarg_p (fntype))
5277 /* If there are variable arguments, then we won't pass anything
5278 in registers in 32-bit mode. */
5279 if (stdarg_p (fntype))
5290 /* Use ecx and edx registers if function has fastcall attribute,
5291 else look for regparm information. */
5294 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5297 cum->fastcall = 1; /* Same first register as in fastcall. */
5299 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5305 cum->nregs = ix86_function_regparm (fntype, fndecl);
5308 /* Set up the number of SSE registers used for passing SFmode
5309 and DFmode arguments. Warn for mismatching ABI. */
5310 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5314 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5315 But in the case of vector types, it is some vector mode.
5317 When we have only some of our vector isa extensions enabled, then there
5318 are some modes for which vector_mode_supported_p is false. For these
5319 modes, the generic vector support in gcc will choose some non-vector mode
5320 in order to implement the type. By computing the natural mode, we'll
5321 select the proper ABI location for the operand and not depend on whatever
5322 the middle-end decides to do with these vector types.
5324 The midde-end can't deal with the vector types > 16 bytes. In this
5325 case, we return the original mode and warn ABI change if CUM isn't
5328 static enum machine_mode
5329 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5331 enum machine_mode mode = TYPE_MODE (type);
5333 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5335 HOST_WIDE_INT size = int_size_in_bytes (type);
5336 if ((size == 8 || size == 16 || size == 32)
5337 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5338 && TYPE_VECTOR_SUBPARTS (type) > 1)
5340 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5342 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5343 mode = MIN_MODE_VECTOR_FLOAT;
5345 mode = MIN_MODE_VECTOR_INT;
5347 /* Get the mode which has this inner mode and number of units. */
5348 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5349 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5350 && GET_MODE_INNER (mode) == innermode)
5352 if (size == 32 && !TARGET_AVX)
5354 static bool warnedavx;
5361 warning (0, "AVX vector argument without AVX "
5362 "enabled changes the ABI");
5364 return TYPE_MODE (type);
5377 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5378 this may not agree with the mode that the type system has chosen for the
5379 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5380 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5383 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5388 if (orig_mode != BLKmode)
5389 tmp = gen_rtx_REG (orig_mode, regno);
5392 tmp = gen_rtx_REG (mode, regno);
5393 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5394 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5400 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5401 of this code is to classify each 8bytes of incoming argument by the register
5402 class and assign registers accordingly. */
5404 /* Return the union class of CLASS1 and CLASS2.
5405 See the x86-64 PS ABI for details. */
5407 static enum x86_64_reg_class
5408 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5410 /* Rule #1: If both classes are equal, this is the resulting class. */
5411 if (class1 == class2)
5414 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5416 if (class1 == X86_64_NO_CLASS)
5418 if (class2 == X86_64_NO_CLASS)
5421 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5422 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5423 return X86_64_MEMORY_CLASS;
5425 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5426 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5427 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5428 return X86_64_INTEGERSI_CLASS;
5429 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5430 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5431 return X86_64_INTEGER_CLASS;
5433 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5435 if (class1 == X86_64_X87_CLASS
5436 || class1 == X86_64_X87UP_CLASS
5437 || class1 == X86_64_COMPLEX_X87_CLASS
5438 || class2 == X86_64_X87_CLASS
5439 || class2 == X86_64_X87UP_CLASS
5440 || class2 == X86_64_COMPLEX_X87_CLASS)
5441 return X86_64_MEMORY_CLASS;
5443 /* Rule #6: Otherwise class SSE is used. */
5444 return X86_64_SSE_CLASS;
5447 /* Classify the argument of type TYPE and mode MODE.
5448 CLASSES will be filled by the register class used to pass each word
5449 of the operand. The number of words is returned. In case the parameter
5450 should be passed in memory, 0 is returned. As a special case for zero
5451 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5453 BIT_OFFSET is used internally for handling records and specifies offset
5454 of the offset in bits modulo 256 to avoid overflow cases.
5456 See the x86-64 PS ABI for details.
5460 classify_argument (enum machine_mode mode, const_tree type,
5461 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5463 HOST_WIDE_INT bytes =
5464 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5465 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5467 /* Variable sized entities are always passed/returned in memory. */
5471 if (mode != VOIDmode
5472 && targetm.calls.must_pass_in_stack (mode, type))
5475 if (type && AGGREGATE_TYPE_P (type))
5479 enum x86_64_reg_class subclasses[MAX_CLASSES];
5481 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5485 for (i = 0; i < words; i++)
5486 classes[i] = X86_64_NO_CLASS;
5488 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5489 signalize memory class, so handle it as special case. */
5492 classes[0] = X86_64_NO_CLASS;
5496 /* Classify each field of record and merge classes. */
5497 switch (TREE_CODE (type))
5500 /* And now merge the fields of structure. */
5501 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5503 if (TREE_CODE (field) == FIELD_DECL)
5507 if (TREE_TYPE (field) == error_mark_node)
5510 /* Bitfields are always classified as integer. Handle them
5511 early, since later code would consider them to be
5512 misaligned integers. */
5513 if (DECL_BIT_FIELD (field))
5515 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5516 i < ((int_bit_position (field) + (bit_offset % 64))
5517 + tree_low_cst (DECL_SIZE (field), 0)
5520 merge_classes (X86_64_INTEGER_CLASS,
5527 type = TREE_TYPE (field);
5529 /* Flexible array member is ignored. */
5530 if (TYPE_MODE (type) == BLKmode
5531 && TREE_CODE (type) == ARRAY_TYPE
5532 && TYPE_SIZE (type) == NULL_TREE
5533 && TYPE_DOMAIN (type) != NULL_TREE
5534 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5539 if (!warned && warn_psabi)
5542 inform (input_location,
5543 "The ABI of passing struct with"
5544 " a flexible array member has"
5545 " changed in GCC 4.4");
5549 num = classify_argument (TYPE_MODE (type), type,
5551 (int_bit_position (field)
5552 + bit_offset) % 256);
5555 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5556 for (i = 0; i < num && (i + pos) < words; i++)
5558 merge_classes (subclasses[i], classes[i + pos]);
5565 /* Arrays are handled as small records. */
5568 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5569 TREE_TYPE (type), subclasses, bit_offset);
5573 /* The partial classes are now full classes. */
5574 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5575 subclasses[0] = X86_64_SSE_CLASS;
5576 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5577 && !((bit_offset % 64) == 0 && bytes == 4))
5578 subclasses[0] = X86_64_INTEGER_CLASS;
5580 for (i = 0; i < words; i++)
5581 classes[i] = subclasses[i % num];
5586 case QUAL_UNION_TYPE:
5587 /* Unions are similar to RECORD_TYPE but offset is always 0.
5589 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5591 if (TREE_CODE (field) == FIELD_DECL)
5595 if (TREE_TYPE (field) == error_mark_node)
5598 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5599 TREE_TYPE (field), subclasses,
5603 for (i = 0; i < num; i++)
5604 classes[i] = merge_classes (subclasses[i], classes[i]);
5615 /* When size > 16 bytes, if the first one isn't
5616 X86_64_SSE_CLASS or any other ones aren't
5617 X86_64_SSEUP_CLASS, everything should be passed in
5619 if (classes[0] != X86_64_SSE_CLASS)
5622 for (i = 1; i < words; i++)
5623 if (classes[i] != X86_64_SSEUP_CLASS)
5627 /* Final merger cleanup. */
5628 for (i = 0; i < words; i++)
5630 /* If one class is MEMORY, everything should be passed in
5632 if (classes[i] == X86_64_MEMORY_CLASS)
5635 /* The X86_64_SSEUP_CLASS should be always preceded by
5636 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5637 if (classes[i] == X86_64_SSEUP_CLASS
5638 && classes[i - 1] != X86_64_SSE_CLASS
5639 && classes[i - 1] != X86_64_SSEUP_CLASS)
5641 /* The first one should never be X86_64_SSEUP_CLASS. */
5642 gcc_assert (i != 0);
5643 classes[i] = X86_64_SSE_CLASS;
5646 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5647 everything should be passed in memory. */
5648 if (classes[i] == X86_64_X87UP_CLASS
5649 && (classes[i - 1] != X86_64_X87_CLASS))
5653 /* The first one should never be X86_64_X87UP_CLASS. */
5654 gcc_assert (i != 0);
5655 if (!warned && warn_psabi)
5658 inform (input_location,
5659 "The ABI of passing union with long double"
5660 " has changed in GCC 4.4");
5668 /* Compute alignment needed. We align all types to natural boundaries with
5669 exception of XFmode that is aligned to 64bits. */
5670 if (mode != VOIDmode && mode != BLKmode)
5672 int mode_alignment = GET_MODE_BITSIZE (mode);
5675 mode_alignment = 128;
5676 else if (mode == XCmode)
5677 mode_alignment = 256;
5678 if (COMPLEX_MODE_P (mode))
5679 mode_alignment /= 2;
5680 /* Misaligned fields are always returned in memory. */
5681 if (bit_offset % mode_alignment)
5685 /* for V1xx modes, just use the base mode */
5686 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5687 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5688 mode = GET_MODE_INNER (mode);
5690 /* Classification of atomic types. */
5695 classes[0] = X86_64_SSE_CLASS;
5698 classes[0] = X86_64_SSE_CLASS;
5699 classes[1] = X86_64_SSEUP_CLASS;
5709 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5713 classes[0] = X86_64_INTEGERSI_CLASS;
5716 else if (size <= 64)
5718 classes[0] = X86_64_INTEGER_CLASS;
5721 else if (size <= 64+32)
5723 classes[0] = X86_64_INTEGER_CLASS;
5724 classes[1] = X86_64_INTEGERSI_CLASS;
5727 else if (size <= 64+64)
5729 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5737 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5741 /* OImode shouldn't be used directly. */
5746 if (!(bit_offset % 64))
5747 classes[0] = X86_64_SSESF_CLASS;
5749 classes[0] = X86_64_SSE_CLASS;
5752 classes[0] = X86_64_SSEDF_CLASS;
5755 classes[0] = X86_64_X87_CLASS;
5756 classes[1] = X86_64_X87UP_CLASS;
5759 classes[0] = X86_64_SSE_CLASS;
5760 classes[1] = X86_64_SSEUP_CLASS;
5763 classes[0] = X86_64_SSE_CLASS;
5764 if (!(bit_offset % 64))
5770 if (!warned && warn_psabi)
5773 inform (input_location,
5774 "The ABI of passing structure with complex float"
5775 " member has changed in GCC 4.4");
5777 classes[1] = X86_64_SSESF_CLASS;
5781 classes[0] = X86_64_SSEDF_CLASS;
5782 classes[1] = X86_64_SSEDF_CLASS;
5785 classes[0] = X86_64_COMPLEX_X87_CLASS;
5788 /* This modes is larger than 16 bytes. */
5796 classes[0] = X86_64_SSE_CLASS;
5797 classes[1] = X86_64_SSEUP_CLASS;
5798 classes[2] = X86_64_SSEUP_CLASS;
5799 classes[3] = X86_64_SSEUP_CLASS;
5807 classes[0] = X86_64_SSE_CLASS;
5808 classes[1] = X86_64_SSEUP_CLASS;
5816 classes[0] = X86_64_SSE_CLASS;
5822 gcc_assert (VECTOR_MODE_P (mode));
5827 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5829 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5830 classes[0] = X86_64_INTEGERSI_CLASS;
5832 classes[0] = X86_64_INTEGER_CLASS;
5833 classes[1] = X86_64_INTEGER_CLASS;
5834 return 1 + (bytes > 8);
5838 /* Examine the argument and return set number of register required in each
5839 class. Return 0 iff parameter should be passed in memory. */
5841 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5842 int *int_nregs, int *sse_nregs)
5844 enum x86_64_reg_class regclass[MAX_CLASSES];
5845 int n = classify_argument (mode, type, regclass, 0);
5851 for (n--; n >= 0; n--)
5852 switch (regclass[n])
5854 case X86_64_INTEGER_CLASS:
5855 case X86_64_INTEGERSI_CLASS:
5858 case X86_64_SSE_CLASS:
5859 case X86_64_SSESF_CLASS:
5860 case X86_64_SSEDF_CLASS:
5863 case X86_64_NO_CLASS:
5864 case X86_64_SSEUP_CLASS:
5866 case X86_64_X87_CLASS:
5867 case X86_64_X87UP_CLASS:
5871 case X86_64_COMPLEX_X87_CLASS:
5872 return in_return ? 2 : 0;
5873 case X86_64_MEMORY_CLASS:
5879 /* Construct container for the argument used by GCC interface. See
5880 FUNCTION_ARG for the detailed description. */
5883 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5884 const_tree type, int in_return, int nintregs, int nsseregs,
5885 const int *intreg, int sse_regno)
5887 /* The following variables hold the static issued_error state. */
5888 static bool issued_sse_arg_error;
5889 static bool issued_sse_ret_error;
5890 static bool issued_x87_ret_error;
5892 enum machine_mode tmpmode;
5894 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5895 enum x86_64_reg_class regclass[MAX_CLASSES];
5899 int needed_sseregs, needed_intregs;
5900 rtx exp[MAX_CLASSES];
5903 n = classify_argument (mode, type, regclass, 0);
5906 if (!examine_argument (mode, type, in_return, &needed_intregs,
5909 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5912 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5913 some less clueful developer tries to use floating-point anyway. */
5914 if (needed_sseregs && !TARGET_SSE)
5918 if (!issued_sse_ret_error)
5920 error ("SSE register return with SSE disabled");
5921 issued_sse_ret_error = true;
5924 else if (!issued_sse_arg_error)
5926 error ("SSE register argument with SSE disabled");
5927 issued_sse_arg_error = true;
5932 /* Likewise, error if the ABI requires us to return values in the
5933 x87 registers and the user specified -mno-80387. */
5934 if (!TARGET_80387 && in_return)
5935 for (i = 0; i < n; i++)
5936 if (regclass[i] == X86_64_X87_CLASS
5937 || regclass[i] == X86_64_X87UP_CLASS
5938 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5940 if (!issued_x87_ret_error)
5942 error ("x87 register return with x87 disabled");
5943 issued_x87_ret_error = true;
5948 /* First construct simple cases. Avoid SCmode, since we want to use
5949 single register to pass this type. */
5950 if (n == 1 && mode != SCmode)
5951 switch (regclass[0])
5953 case X86_64_INTEGER_CLASS:
5954 case X86_64_INTEGERSI_CLASS:
5955 return gen_rtx_REG (mode, intreg[0]);
5956 case X86_64_SSE_CLASS:
5957 case X86_64_SSESF_CLASS:
5958 case X86_64_SSEDF_CLASS:
5959 if (mode != BLKmode)
5960 return gen_reg_or_parallel (mode, orig_mode,
5961 SSE_REGNO (sse_regno));
5963 case X86_64_X87_CLASS:
5964 case X86_64_COMPLEX_X87_CLASS:
5965 return gen_rtx_REG (mode, FIRST_STACK_REG);
5966 case X86_64_NO_CLASS:
5967 /* Zero sized array, struct or class. */
5972 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5973 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5974 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5976 && regclass[0] == X86_64_SSE_CLASS
5977 && regclass[1] == X86_64_SSEUP_CLASS
5978 && regclass[2] == X86_64_SSEUP_CLASS
5979 && regclass[3] == X86_64_SSEUP_CLASS
5981 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5984 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5985 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5986 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5987 && regclass[1] == X86_64_INTEGER_CLASS
5988 && (mode == CDImode || mode == TImode || mode == TFmode)
5989 && intreg[0] + 1 == intreg[1])
5990 return gen_rtx_REG (mode, intreg[0]);
5992 /* Otherwise figure out the entries of the PARALLEL. */
5993 for (i = 0; i < n; i++)
5997 switch (regclass[i])
5999 case X86_64_NO_CLASS:
6001 case X86_64_INTEGER_CLASS:
6002 case X86_64_INTEGERSI_CLASS:
6003 /* Merge TImodes on aligned occasions here too. */
6004 if (i * 8 + 8 > bytes)
6005 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
6006 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
6010 /* We've requested 24 bytes we don't have mode for. Use DImode. */
6011 if (tmpmode == BLKmode)
6013 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6014 gen_rtx_REG (tmpmode, *intreg),
6018 case X86_64_SSESF_CLASS:
6019 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6020 gen_rtx_REG (SFmode,
6021 SSE_REGNO (sse_regno)),
6025 case X86_64_SSEDF_CLASS:
6026 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6027 gen_rtx_REG (DFmode,
6028 SSE_REGNO (sse_regno)),
6032 case X86_64_SSE_CLASS:
6040 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6050 && regclass[1] == X86_64_SSEUP_CLASS
6051 && regclass[2] == X86_64_SSEUP_CLASS
6052 && regclass[3] == X86_64_SSEUP_CLASS);
6059 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6060 gen_rtx_REG (tmpmode,
6061 SSE_REGNO (sse_regno)),
6070 /* Empty aligned struct, union or class. */
6074 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6075 for (i = 0; i < nexps; i++)
6076 XVECEXP (ret, 0, i) = exp [i];
6080 /* Update the data in CUM to advance over an argument of mode MODE
6081 and data type TYPE. (TYPE is null for libcalls where that information
6082 may not be available.) */
6085 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6086 const_tree type, HOST_WIDE_INT bytes,
6087 HOST_WIDE_INT words)
6103 cum->words += words;
6104 cum->nregs -= words;
6105 cum->regno += words;
6107 if (cum->nregs <= 0)
6115 /* OImode shouldn't be used directly. */
6119 if (cum->float_in_sse < 2)
6122 if (cum->float_in_sse < 1)
6139 if (!type || !AGGREGATE_TYPE_P (type))
6141 cum->sse_words += words;
6142 cum->sse_nregs -= 1;
6143 cum->sse_regno += 1;
6144 if (cum->sse_nregs <= 0)
6158 if (!type || !AGGREGATE_TYPE_P (type))
6160 cum->mmx_words += words;
6161 cum->mmx_nregs -= 1;
6162 cum->mmx_regno += 1;
6163 if (cum->mmx_nregs <= 0)
6174 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6175 const_tree type, HOST_WIDE_INT words, bool named)
6177 int int_nregs, sse_nregs;
6179 /* Unnamed 256bit vector mode parameters are passed on stack. */
6180 if (!named && VALID_AVX256_REG_MODE (mode))
6183 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6184 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6186 cum->nregs -= int_nregs;
6187 cum->sse_nregs -= sse_nregs;
6188 cum->regno += int_nregs;
6189 cum->sse_regno += sse_nregs;
6193 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6194 cum->words = (cum->words + align - 1) & ~(align - 1);
6195 cum->words += words;
6200 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6201 HOST_WIDE_INT words)
6203 /* Otherwise, this should be passed indirect. */
6204 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6206 cum->words += words;
6214 /* Update the data in CUM to advance over an argument of mode MODE and
6215 data type TYPE. (TYPE is null for libcalls where that information
6216 may not be available.) */
6219 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6220 const_tree type, bool named)
6222 HOST_WIDE_INT bytes, words;
6224 if (mode == BLKmode)
6225 bytes = int_size_in_bytes (type);
6227 bytes = GET_MODE_SIZE (mode);
6228 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6231 mode = type_natural_mode (type, NULL);
6233 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6234 function_arg_advance_ms_64 (cum, bytes, words);
6235 else if (TARGET_64BIT)
6236 function_arg_advance_64 (cum, mode, type, words, named);
6238 function_arg_advance_32 (cum, mode, type, bytes, words);
6241 /* Define where to put the arguments to a function.
6242 Value is zero to push the argument on the stack,
6243 or a hard register in which to store the argument.
6245 MODE is the argument's machine mode.
6246 TYPE is the data type of the argument (as a tree).
6247 This is null for libcalls where that information may
6249 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6250 the preceding args and about the function being called.
6251 NAMED is nonzero if this argument is a named parameter
6252 (otherwise it is an extra parameter matching an ellipsis). */
6255 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6256 enum machine_mode orig_mode, const_tree type,
6257 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6259 static bool warnedsse, warnedmmx;
6261 /* Avoid the AL settings for the Unix64 ABI. */
6262 if (mode == VOIDmode)
6278 if (words <= cum->nregs)
6280 int regno = cum->regno;
6282 /* Fastcall allocates the first two DWORD (SImode) or
6283 smaller arguments to ECX and EDX if it isn't an
6289 || (type && AGGREGATE_TYPE_P (type)))
6292 /* ECX not EAX is the first allocated register. */
6293 if (regno == AX_REG)
6296 return gen_rtx_REG (mode, regno);
6301 if (cum->float_in_sse < 2)
6304 if (cum->float_in_sse < 1)
6308 /* In 32bit, we pass TImode in xmm registers. */
6315 if (!type || !AGGREGATE_TYPE_P (type))
6317 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6320 warning (0, "SSE vector argument without SSE enabled "
6324 return gen_reg_or_parallel (mode, orig_mode,
6325 cum->sse_regno + FIRST_SSE_REG);
6330 /* OImode shouldn't be used directly. */
6339 if (!type || !AGGREGATE_TYPE_P (type))
6342 return gen_reg_or_parallel (mode, orig_mode,
6343 cum->sse_regno + FIRST_SSE_REG);
6353 if (!type || !AGGREGATE_TYPE_P (type))
6355 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6358 warning (0, "MMX vector argument without MMX enabled "
6362 return gen_reg_or_parallel (mode, orig_mode,
6363 cum->mmx_regno + FIRST_MMX_REG);
6372 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6373 enum machine_mode orig_mode, const_tree type, bool named)
6375 /* Handle a hidden AL argument containing number of registers
6376 for varargs x86-64 functions. */
6377 if (mode == VOIDmode)
6378 return GEN_INT (cum->maybe_vaarg
6379 ? (cum->sse_nregs < 0
6380 ? X86_64_SSE_REGPARM_MAX
6395 /* Unnamed 256bit vector mode parameters are passed on stack. */
6401 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6403 &x86_64_int_parameter_registers [cum->regno],
6408 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6409 enum machine_mode orig_mode, bool named,
6410 HOST_WIDE_INT bytes)
6414 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6415 We use value of -2 to specify that current function call is MSABI. */
6416 if (mode == VOIDmode)
6417 return GEN_INT (-2);
6419 /* If we've run out of registers, it goes on the stack. */
6420 if (cum->nregs == 0)
6423 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6425 /* Only floating point modes are passed in anything but integer regs. */
6426 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6429 regno = cum->regno + FIRST_SSE_REG;
6434 /* Unnamed floating parameters are passed in both the
6435 SSE and integer registers. */
6436 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6437 t2 = gen_rtx_REG (mode, regno);
6438 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6439 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6440 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6443 /* Handle aggregated types passed in register. */
6444 if (orig_mode == BLKmode)
6446 if (bytes > 0 && bytes <= 8)
6447 mode = (bytes > 4 ? DImode : SImode);
6448 if (mode == BLKmode)
6452 return gen_reg_or_parallel (mode, orig_mode, regno);
6455 /* Return where to put the arguments to a function.
6456 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6458 MODE is the argument's machine mode. TYPE is the data type of the
6459 argument. It is null for libcalls where that information may not be
6460 available. CUM gives information about the preceding args and about
6461 the function being called. NAMED is nonzero if this argument is a
6462 named parameter (otherwise it is an extra parameter matching an
6466 ix86_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
6467 const_tree type, bool named)
6469 enum machine_mode mode = omode;
6470 HOST_WIDE_INT bytes, words;
6472 if (mode == BLKmode)
6473 bytes = int_size_in_bytes (type);
6475 bytes = GET_MODE_SIZE (mode);
6476 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6478 /* To simplify the code below, represent vector types with a vector mode
6479 even if MMX/SSE are not active. */
6480 if (type && TREE_CODE (type) == VECTOR_TYPE)
6481 mode = type_natural_mode (type, cum);
6483 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6484 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6485 else if (TARGET_64BIT)
6486 return function_arg_64 (cum, mode, omode, type, named);
6488 return function_arg_32 (cum, mode, omode, type, bytes, words);
6491 /* A C expression that indicates when an argument must be passed by
6492 reference. If nonzero for an argument, a copy of that argument is
6493 made in memory and a pointer to the argument is passed instead of
6494 the argument itself. The pointer is passed in whatever way is
6495 appropriate for passing a pointer to that type. */
6498 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6499 enum machine_mode mode ATTRIBUTE_UNUSED,
6500 const_tree type, bool named ATTRIBUTE_UNUSED)
6502 /* See Windows x64 Software Convention. */
6503 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6505 int msize = (int) GET_MODE_SIZE (mode);
6508 /* Arrays are passed by reference. */
6509 if (TREE_CODE (type) == ARRAY_TYPE)
6512 if (AGGREGATE_TYPE_P (type))
6514 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6515 are passed by reference. */
6516 msize = int_size_in_bytes (type);
6520 /* __m128 is passed by reference. */
6522 case 1: case 2: case 4: case 8:
6528 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6534 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6537 contains_aligned_value_p (const_tree type)
6539 enum machine_mode mode = TYPE_MODE (type);
6540 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6544 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6546 if (TYPE_ALIGN (type) < 128)
6549 if (AGGREGATE_TYPE_P (type))
6551 /* Walk the aggregates recursively. */
6552 switch (TREE_CODE (type))
6556 case QUAL_UNION_TYPE:
6560 /* Walk all the structure fields. */
6561 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6563 if (TREE_CODE (field) == FIELD_DECL
6564 && contains_aligned_value_p (TREE_TYPE (field)))
6571 /* Just for use if some languages passes arrays by value. */
6572 if (contains_aligned_value_p (TREE_TYPE (type)))
6583 /* Gives the alignment boundary, in bits, of an argument with the
6584 specified mode and type. */
6587 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6592 /* Since the main variant type is used for call, we convert it to
6593 the main variant type. */
6594 type = TYPE_MAIN_VARIANT (type);
6595 align = TYPE_ALIGN (type);
6598 align = GET_MODE_ALIGNMENT (mode);
6599 if (align < PARM_BOUNDARY)
6600 align = PARM_BOUNDARY;
6601 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6602 natural boundaries. */
6603 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6605 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6606 make an exception for SSE modes since these require 128bit
6609 The handling here differs from field_alignment. ICC aligns MMX
6610 arguments to 4 byte boundaries, while structure fields are aligned
6611 to 8 byte boundaries. */
6614 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6615 align = PARM_BOUNDARY;
6619 if (!contains_aligned_value_p (type))
6620 align = PARM_BOUNDARY;
6623 if (align > BIGGEST_ALIGNMENT)
6624 align = BIGGEST_ALIGNMENT;
6628 /* Return true if N is a possible register number of function value. */
6631 ix86_function_value_regno_p (const unsigned int regno)
6638 case FIRST_FLOAT_REG:
6639 /* TODO: The function should depend on current function ABI but
6640 builtins.c would need updating then. Therefore we use the
6642 if (TARGET_64BIT && ix86_abi == MS_ABI)
6644 return TARGET_FLOAT_RETURNS_IN_80387;
6650 if (TARGET_MACHO || TARGET_64BIT)
6658 /* Define how to find the value returned by a function.
6659 VALTYPE is the data type of the value (as a tree).
6660 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6661 otherwise, FUNC is 0. */
6664 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6665 const_tree fntype, const_tree fn)
6669 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6670 we normally prevent this case when mmx is not available. However
6671 some ABIs may require the result to be returned like DImode. */
6672 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6673 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6675 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6676 we prevent this case when sse is not available. However some ABIs
6677 may require the result to be returned like integer TImode. */
6678 else if (mode == TImode
6679 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6680 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6682 /* 32-byte vector modes in %ymm0. */
6683 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6684 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6686 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6687 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6688 regno = FIRST_FLOAT_REG;
6690 /* Most things go in %eax. */
6693 /* Override FP return register with %xmm0 for local functions when
6694 SSE math is enabled or for functions with sseregparm attribute. */
6695 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6697 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6698 if ((sse_level >= 1 && mode == SFmode)
6699 || (sse_level == 2 && mode == DFmode))
6700 regno = FIRST_SSE_REG;
6703 /* OImode shouldn't be used directly. */
6704 gcc_assert (mode != OImode);
6706 return gen_rtx_REG (orig_mode, regno);
6710 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6715 /* Handle libcalls, which don't provide a type node. */
6716 if (valtype == NULL)
6728 return gen_rtx_REG (mode, FIRST_SSE_REG);
6731 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6735 return gen_rtx_REG (mode, AX_REG);
6739 ret = construct_container (mode, orig_mode, valtype, 1,
6740 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6741 x86_64_int_return_registers, 0);
6743 /* For zero sized structures, construct_container returns NULL, but we
6744 need to keep rest of compiler happy by returning meaningful value. */
6746 ret = gen_rtx_REG (orig_mode, AX_REG);
6752 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6754 unsigned int regno = AX_REG;
6758 switch (GET_MODE_SIZE (mode))
6761 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6762 && !COMPLEX_MODE_P (mode))
6763 regno = FIRST_SSE_REG;
6767 if (mode == SFmode || mode == DFmode)
6768 regno = FIRST_SSE_REG;
6774 return gen_rtx_REG (orig_mode, regno);
6778 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6779 enum machine_mode orig_mode, enum machine_mode mode)
6781 const_tree fn, fntype;
6784 if (fntype_or_decl && DECL_P (fntype_or_decl))
6785 fn = fntype_or_decl;
6786 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6788 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6789 return function_value_ms_64 (orig_mode, mode);
6790 else if (TARGET_64BIT)
6791 return function_value_64 (orig_mode, mode, valtype);
6793 return function_value_32 (orig_mode, mode, fntype, fn);
6797 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6798 bool outgoing ATTRIBUTE_UNUSED)
6800 enum machine_mode mode, orig_mode;
6802 orig_mode = TYPE_MODE (valtype);
6803 mode = type_natural_mode (valtype, NULL);
6804 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6808 ix86_libcall_value (enum machine_mode mode)
6810 return ix86_function_value_1 (NULL, NULL, mode, mode);
6813 /* Return true iff type is returned in memory. */
6815 static int ATTRIBUTE_UNUSED
6816 return_in_memory_32 (const_tree type, enum machine_mode mode)
6820 if (mode == BLKmode)
6823 size = int_size_in_bytes (type);
6825 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6828 if (VECTOR_MODE_P (mode) || mode == TImode)
6830 /* User-created vectors small enough to fit in EAX. */
6834 /* MMX/3dNow values are returned in MM0,
6835 except when it doesn't exits. */
6837 return (TARGET_MMX ? 0 : 1);
6839 /* SSE values are returned in XMM0, except when it doesn't exist. */
6841 return (TARGET_SSE ? 0 : 1);
6843 /* AVX values are returned in YMM0, except when it doesn't exist. */
6845 return TARGET_AVX ? 0 : 1;
6854 /* OImode shouldn't be used directly. */
6855 gcc_assert (mode != OImode);
6860 static int ATTRIBUTE_UNUSED
6861 return_in_memory_64 (const_tree type, enum machine_mode mode)
6863 int needed_intregs, needed_sseregs;
6864 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6867 static int ATTRIBUTE_UNUSED
6868 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6870 HOST_WIDE_INT size = int_size_in_bytes (type);
6872 /* __m128 is returned in xmm0. */
6873 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6874 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6877 /* Otherwise, the size must be exactly in [1248]. */
6878 return (size != 1 && size != 2 && size != 4 && size != 8);
6882 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6884 #ifdef SUBTARGET_RETURN_IN_MEMORY
6885 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6887 const enum machine_mode mode = type_natural_mode (type, NULL);
6891 if (ix86_function_type_abi (fntype) == MS_ABI)
6892 return return_in_memory_ms_64 (type, mode);
6894 return return_in_memory_64 (type, mode);
6897 return return_in_memory_32 (type, mode);
6901 /* Return false iff TYPE is returned in memory. This version is used
6902 on Solaris 2. It is similar to the generic ix86_return_in_memory,
6903 but differs notably in that when MMX is available, 8-byte vectors
6904 are returned in memory, rather than in MMX registers. */
6907 ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6910 enum machine_mode mode = type_natural_mode (type, NULL);
6913 return return_in_memory_64 (type, mode);
6915 if (mode == BLKmode)
6918 size = int_size_in_bytes (type);
6920 if (VECTOR_MODE_P (mode))
6922 /* Return in memory only if MMX registers *are* available. This
6923 seems backwards, but it is consistent with the existing
6930 else if (mode == TImode)
6932 else if (mode == XFmode)
6938 /* When returning SSE vector types, we have a choice of either
6939 (1) being abi incompatible with a -march switch, or
6940 (2) generating an error.
6941 Given no good solution, I think the safest thing is one warning.
6942 The user won't be able to use -Werror, but....
6944 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6945 called in response to actually generating a caller or callee that
6946 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6947 via aggregate_value_p for general type probing from tree-ssa. */
6950 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6952 static bool warnedsse, warnedmmx;
6954 if (!TARGET_64BIT && type)
6956 /* Look at the return type of the function, not the function type. */
6957 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6959 if (!TARGET_SSE && !warnedsse)
6962 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6965 warning (0, "SSE vector return without SSE enabled "
6970 if (!TARGET_MMX && !warnedmmx)
6972 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6975 warning (0, "MMX vector return without MMX enabled "
6985 /* Create the va_list data type. */
6987 /* Returns the calling convention specific va_list date type.
6988 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6991 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6993 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6995 /* For i386 we use plain pointer to argument area. */
6996 if (!TARGET_64BIT || abi == MS_ABI)
6997 return build_pointer_type (char_type_node);
6999 record = lang_hooks.types.make_type (RECORD_TYPE);
7000 type_decl = build_decl (BUILTINS_LOCATION,
7001 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7003 f_gpr = build_decl (BUILTINS_LOCATION,
7004 FIELD_DECL, get_identifier ("gp_offset"),
7005 unsigned_type_node);
7006 f_fpr = build_decl (BUILTINS_LOCATION,
7007 FIELD_DECL, get_identifier ("fp_offset"),
7008 unsigned_type_node);
7009 f_ovf = build_decl (BUILTINS_LOCATION,
7010 FIELD_DECL, get_identifier ("overflow_arg_area"),
7012 f_sav = build_decl (BUILTINS_LOCATION,
7013 FIELD_DECL, get_identifier ("reg_save_area"),
7016 va_list_gpr_counter_field = f_gpr;
7017 va_list_fpr_counter_field = f_fpr;
7019 DECL_FIELD_CONTEXT (f_gpr) = record;
7020 DECL_FIELD_CONTEXT (f_fpr) = record;
7021 DECL_FIELD_CONTEXT (f_ovf) = record;
7022 DECL_FIELD_CONTEXT (f_sav) = record;
7024 TREE_CHAIN (record) = type_decl;
7025 TYPE_NAME (record) = type_decl;
7026 TYPE_FIELDS (record) = f_gpr;
7027 DECL_CHAIN (f_gpr) = f_fpr;
7028 DECL_CHAIN (f_fpr) = f_ovf;
7029 DECL_CHAIN (f_ovf) = f_sav;
7031 layout_type (record);
7033 /* The correct type is an array type of one element. */
7034 return build_array_type (record, build_index_type (size_zero_node));
7037 /* Setup the builtin va_list data type and for 64-bit the additional
7038 calling convention specific va_list data types. */
7041 ix86_build_builtin_va_list (void)
7043 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7045 /* Initialize abi specific va_list builtin types. */
7049 if (ix86_abi == MS_ABI)
7051 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7052 if (TREE_CODE (t) != RECORD_TYPE)
7053 t = build_variant_type_copy (t);
7054 sysv_va_list_type_node = t;
7059 if (TREE_CODE (t) != RECORD_TYPE)
7060 t = build_variant_type_copy (t);
7061 sysv_va_list_type_node = t;
7063 if (ix86_abi != MS_ABI)
7065 t = ix86_build_builtin_va_list_abi (MS_ABI);
7066 if (TREE_CODE (t) != RECORD_TYPE)
7067 t = build_variant_type_copy (t);
7068 ms_va_list_type_node = t;
7073 if (TREE_CODE (t) != RECORD_TYPE)
7074 t = build_variant_type_copy (t);
7075 ms_va_list_type_node = t;
7082 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7085 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7091 /* GPR size of varargs save area. */
7092 if (cfun->va_list_gpr_size)
7093 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7095 ix86_varargs_gpr_size = 0;
7097 /* FPR size of varargs save area. We don't need it if we don't pass
7098 anything in SSE registers. */
7099 if (TARGET_SSE && cfun->va_list_fpr_size)
7100 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7102 ix86_varargs_fpr_size = 0;
7104 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7107 save_area = frame_pointer_rtx;
7108 set = get_varargs_alias_set ();
7110 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7111 if (max > X86_64_REGPARM_MAX)
7112 max = X86_64_REGPARM_MAX;
7114 for (i = cum->regno; i < max; i++)
7116 mem = gen_rtx_MEM (Pmode,
7117 plus_constant (save_area, i * UNITS_PER_WORD));
7118 MEM_NOTRAP_P (mem) = 1;
7119 set_mem_alias_set (mem, set);
7120 emit_move_insn (mem, gen_rtx_REG (Pmode,
7121 x86_64_int_parameter_registers[i]));
7124 if (ix86_varargs_fpr_size)
7126 enum machine_mode smode;
7129 /* Now emit code to save SSE registers. The AX parameter contains number
7130 of SSE parameter registers used to call this function, though all we
7131 actually check here is the zero/non-zero status. */
7133 label = gen_label_rtx ();
7134 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7135 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7138 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7139 we used movdqa (i.e. TImode) instead? Perhaps even better would
7140 be if we could determine the real mode of the data, via a hook
7141 into pass_stdarg. Ignore all that for now. */
7143 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7144 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7146 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7147 if (max > X86_64_SSE_REGPARM_MAX)
7148 max = X86_64_SSE_REGPARM_MAX;
7150 for (i = cum->sse_regno; i < max; ++i)
7152 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7153 mem = gen_rtx_MEM (smode, mem);
7154 MEM_NOTRAP_P (mem) = 1;
7155 set_mem_alias_set (mem, set);
7156 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7158 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7166 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7168 alias_set_type set = get_varargs_alias_set ();
7171 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7175 mem = gen_rtx_MEM (Pmode,
7176 plus_constant (virtual_incoming_args_rtx,
7177 i * UNITS_PER_WORD));
7178 MEM_NOTRAP_P (mem) = 1;
7179 set_mem_alias_set (mem, set);
7181 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7182 emit_move_insn (mem, reg);
7187 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7188 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7191 CUMULATIVE_ARGS next_cum;
7194 /* This argument doesn't appear to be used anymore. Which is good,
7195 because the old code here didn't suppress rtl generation. */
7196 gcc_assert (!no_rtl);
7201 fntype = TREE_TYPE (current_function_decl);
7203 /* For varargs, we do not want to skip the dummy va_dcl argument.
7204 For stdargs, we do want to skip the last named argument. */
7206 if (stdarg_p (fntype))
7207 ix86_function_arg_advance (&next_cum, mode, type, true);
7209 if (cum->call_abi == MS_ABI)
7210 setup_incoming_varargs_ms_64 (&next_cum);
7212 setup_incoming_varargs_64 (&next_cum);
7215 /* Checks if TYPE is of kind va_list char *. */
7218 is_va_list_char_pointer (tree type)
7222 /* For 32-bit it is always true. */
7225 canonic = ix86_canonical_va_list_type (type);
7226 return (canonic == ms_va_list_type_node
7227 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7230 /* Implement va_start. */
7233 ix86_va_start (tree valist, rtx nextarg)
7235 HOST_WIDE_INT words, n_gpr, n_fpr;
7236 tree f_gpr, f_fpr, f_ovf, f_sav;
7237 tree gpr, fpr, ovf, sav, t;
7240 /* Only 64bit target needs something special. */
7241 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7243 std_expand_builtin_va_start (valist, nextarg);
7247 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7248 f_fpr = DECL_CHAIN (f_gpr);
7249 f_ovf = DECL_CHAIN (f_fpr);
7250 f_sav = DECL_CHAIN (f_ovf);
7252 valist = build_simple_mem_ref (valist);
7253 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7254 /* The following should be folded into the MEM_REF offset. */
7255 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7257 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7259 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7261 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7264 /* Count number of gp and fp argument registers used. */
7265 words = crtl->args.info.words;
7266 n_gpr = crtl->args.info.regno;
7267 n_fpr = crtl->args.info.sse_regno;
7269 if (cfun->va_list_gpr_size)
7271 type = TREE_TYPE (gpr);
7272 t = build2 (MODIFY_EXPR, type,
7273 gpr, build_int_cst (type, n_gpr * 8));
7274 TREE_SIDE_EFFECTS (t) = 1;
7275 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7278 if (TARGET_SSE && cfun->va_list_fpr_size)
7280 type = TREE_TYPE (fpr);
7281 t = build2 (MODIFY_EXPR, type, fpr,
7282 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7283 TREE_SIDE_EFFECTS (t) = 1;
7284 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7287 /* Find the overflow area. */
7288 type = TREE_TYPE (ovf);
7289 t = make_tree (type, crtl->args.internal_arg_pointer);
7291 t = build2 (POINTER_PLUS_EXPR, type, t,
7292 size_int (words * UNITS_PER_WORD));
7293 t = build2 (MODIFY_EXPR, type, ovf, t);
7294 TREE_SIDE_EFFECTS (t) = 1;
7295 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7297 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7299 /* Find the register save area.
7300 Prologue of the function save it right above stack frame. */
7301 type = TREE_TYPE (sav);
7302 t = make_tree (type, frame_pointer_rtx);
7303 if (!ix86_varargs_gpr_size)
7304 t = build2 (POINTER_PLUS_EXPR, type, t,
7305 size_int (-8 * X86_64_REGPARM_MAX));
7306 t = build2 (MODIFY_EXPR, type, sav, t);
7307 TREE_SIDE_EFFECTS (t) = 1;
7308 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7312 /* Implement va_arg. */
7315 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7318 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7319 tree f_gpr, f_fpr, f_ovf, f_sav;
7320 tree gpr, fpr, ovf, sav, t;
7322 tree lab_false, lab_over = NULL_TREE;
7327 enum machine_mode nat_mode;
7328 unsigned int arg_boundary;
7330 /* Only 64bit target needs something special. */
7331 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7332 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7334 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7335 f_fpr = DECL_CHAIN (f_gpr);
7336 f_ovf = DECL_CHAIN (f_fpr);
7337 f_sav = DECL_CHAIN (f_ovf);
7339 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7340 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7341 valist = build_va_arg_indirect_ref (valist);
7342 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7343 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7344 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7346 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7348 type = build_pointer_type (type);
7349 size = int_size_in_bytes (type);
7350 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7352 nat_mode = type_natural_mode (type, NULL);
7361 /* Unnamed 256bit vector mode parameters are passed on stack. */
7362 if (ix86_cfun_abi () == SYSV_ABI)
7369 container = construct_container (nat_mode, TYPE_MODE (type),
7370 type, 0, X86_64_REGPARM_MAX,
7371 X86_64_SSE_REGPARM_MAX, intreg,
7376 /* Pull the value out of the saved registers. */
7378 addr = create_tmp_var (ptr_type_node, "addr");
7382 int needed_intregs, needed_sseregs;
7384 tree int_addr, sse_addr;
7386 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7387 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7389 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7391 need_temp = (!REG_P (container)
7392 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7393 || TYPE_ALIGN (type) > 128));
7395 /* In case we are passing structure, verify that it is consecutive block
7396 on the register save area. If not we need to do moves. */
7397 if (!need_temp && !REG_P (container))
7399 /* Verify that all registers are strictly consecutive */
7400 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7404 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7406 rtx slot = XVECEXP (container, 0, i);
7407 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7408 || INTVAL (XEXP (slot, 1)) != i * 16)
7416 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7418 rtx slot = XVECEXP (container, 0, i);
7419 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7420 || INTVAL (XEXP (slot, 1)) != i * 8)
7432 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7433 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7436 /* First ensure that we fit completely in registers. */
7439 t = build_int_cst (TREE_TYPE (gpr),
7440 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7441 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7442 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7443 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7444 gimplify_and_add (t, pre_p);
7448 t = build_int_cst (TREE_TYPE (fpr),
7449 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7450 + X86_64_REGPARM_MAX * 8);
7451 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7452 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7453 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7454 gimplify_and_add (t, pre_p);
7457 /* Compute index to start of area used for integer regs. */
7460 /* int_addr = gpr + sav; */
7461 t = fold_convert (sizetype, gpr);
7462 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7463 gimplify_assign (int_addr, t, pre_p);
7467 /* sse_addr = fpr + sav; */
7468 t = fold_convert (sizetype, fpr);
7469 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7470 gimplify_assign (sse_addr, t, pre_p);
7474 int i, prev_size = 0;
7475 tree temp = create_tmp_var (type, "va_arg_tmp");
7478 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7479 gimplify_assign (addr, t, pre_p);
7481 for (i = 0; i < XVECLEN (container, 0); i++)
7483 rtx slot = XVECEXP (container, 0, i);
7484 rtx reg = XEXP (slot, 0);
7485 enum machine_mode mode = GET_MODE (reg);
7491 tree dest_addr, dest;
7492 int cur_size = GET_MODE_SIZE (mode);
7494 if (prev_size + cur_size > size)
7496 cur_size = size - prev_size;
7497 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7498 if (mode == BLKmode)
7501 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7502 if (mode == GET_MODE (reg))
7503 addr_type = build_pointer_type (piece_type);
7505 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7507 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7510 if (SSE_REGNO_P (REGNO (reg)))
7512 src_addr = sse_addr;
7513 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7517 src_addr = int_addr;
7518 src_offset = REGNO (reg) * 8;
7520 src_addr = fold_convert (addr_type, src_addr);
7521 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7522 size_int (src_offset));
7524 dest_addr = fold_convert (daddr_type, addr);
7525 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7526 size_int (INTVAL (XEXP (slot, 1))));
7527 if (cur_size == GET_MODE_SIZE (mode))
7529 src = build_va_arg_indirect_ref (src_addr);
7530 dest = build_va_arg_indirect_ref (dest_addr);
7532 gimplify_assign (dest, src, pre_p);
7537 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7538 3, dest_addr, src_addr,
7539 size_int (cur_size));
7540 gimplify_and_add (copy, pre_p);
7542 prev_size += cur_size;
7548 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7549 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7550 gimplify_assign (gpr, t, pre_p);
7555 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7556 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7557 gimplify_assign (fpr, t, pre_p);
7560 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7562 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7565 /* ... otherwise out of the overflow area. */
7567 /* When we align parameter on stack for caller, if the parameter
7568 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7569 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7570 here with caller. */
7571 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7572 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7573 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7575 /* Care for on-stack alignment if needed. */
7576 if (arg_boundary <= 64 || size == 0)
7580 HOST_WIDE_INT align = arg_boundary / 8;
7581 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7582 size_int (align - 1));
7583 t = fold_convert (sizetype, t);
7584 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7586 t = fold_convert (TREE_TYPE (ovf), t);
7589 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7590 gimplify_assign (addr, t, pre_p);
7592 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7593 size_int (rsize * UNITS_PER_WORD));
7594 gimplify_assign (unshare_expr (ovf), t, pre_p);
7597 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7599 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7600 addr = fold_convert (ptrtype, addr);
7603 addr = build_va_arg_indirect_ref (addr);
7604 return build_va_arg_indirect_ref (addr);
7607 /* Return nonzero if OPNUM's MEM should be matched
7608 in movabs* patterns. */
7611 ix86_check_movabs (rtx insn, int opnum)
7615 set = PATTERN (insn);
7616 if (GET_CODE (set) == PARALLEL)
7617 set = XVECEXP (set, 0, 0);
7618 gcc_assert (GET_CODE (set) == SET);
7619 mem = XEXP (set, opnum);
7620 while (GET_CODE (mem) == SUBREG)
7621 mem = SUBREG_REG (mem);
7622 gcc_assert (MEM_P (mem));
7623 return (volatile_ok || !MEM_VOLATILE_P (mem));
7626 /* Initialize the table of extra 80387 mathematical constants. */
7629 init_ext_80387_constants (void)
7631 static const char * cst[5] =
7633 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7634 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7635 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7636 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7637 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7641 for (i = 0; i < 5; i++)
7643 real_from_string (&ext_80387_constants_table[i], cst[i]);
7644 /* Ensure each constant is rounded to XFmode precision. */
7645 real_convert (&ext_80387_constants_table[i],
7646 XFmode, &ext_80387_constants_table[i]);
7649 ext_80387_constants_init = 1;
7652 /* Return true if the constant is something that can be loaded with
7653 a special instruction. */
7656 standard_80387_constant_p (rtx x)
7658 enum machine_mode mode = GET_MODE (x);
7662 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7665 if (x == CONST0_RTX (mode))
7667 if (x == CONST1_RTX (mode))
7670 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7672 /* For XFmode constants, try to find a special 80387 instruction when
7673 optimizing for size or on those CPUs that benefit from them. */
7675 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7679 if (! ext_80387_constants_init)
7680 init_ext_80387_constants ();
7682 for (i = 0; i < 5; i++)
7683 if (real_identical (&r, &ext_80387_constants_table[i]))
7687 /* Load of the constant -0.0 or -1.0 will be split as
7688 fldz;fchs or fld1;fchs sequence. */
7689 if (real_isnegzero (&r))
7691 if (real_identical (&r, &dconstm1))
7697 /* Return the opcode of the special instruction to be used to load
7701 standard_80387_constant_opcode (rtx x)
7703 switch (standard_80387_constant_p (x))
7727 /* Return the CONST_DOUBLE representing the 80387 constant that is
7728 loaded by the specified special instruction. The argument IDX
7729 matches the return value from standard_80387_constant_p. */
7732 standard_80387_constant_rtx (int idx)
7736 if (! ext_80387_constants_init)
7737 init_ext_80387_constants ();
7753 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7757 /* Return 1 if X is all 0s and 2 if x is all 1s
7758 in supported SSE vector mode. */
7761 standard_sse_constant_p (rtx x)
7763 enum machine_mode mode = GET_MODE (x);
7765 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7767 if (vector_all_ones_operand (x, mode))
7783 /* Return the opcode of the special instruction to be used to load
7787 standard_sse_constant_opcode (rtx insn, rtx x)
7789 switch (standard_sse_constant_p (x))
7792 switch (get_attr_mode (insn))
7795 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7797 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7798 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7800 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7802 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7803 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7805 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7807 return "vxorps\t%x0, %x0, %x0";
7809 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7810 return "vxorps\t%x0, %x0, %x0";
7812 return "vxorpd\t%x0, %x0, %x0";
7814 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7815 return "vxorps\t%x0, %x0, %x0";
7817 return "vpxor\t%x0, %x0, %x0";
7822 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7829 /* Returns 1 if OP contains a symbol reference */
7832 symbolic_reference_mentioned_p (rtx op)
7837 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7840 fmt = GET_RTX_FORMAT (GET_CODE (op));
7841 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7847 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7848 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7852 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7859 /* Return 1 if it is appropriate to emit `ret' instructions in the
7860 body of a function. Do this only if the epilogue is simple, needing a
7861 couple of insns. Prior to reloading, we can't tell how many registers
7862 must be saved, so return 0 then. Return 0 if there is no frame
7863 marker to de-allocate. */
7866 ix86_can_use_return_insn_p (void)
7868 struct ix86_frame frame;
7870 if (! reload_completed || frame_pointer_needed)
7873 /* Don't allow more than 32k pop, since that's all we can do
7874 with one instruction. */
7875 if (crtl->args.pops_args && crtl->args.size >= 32768)
7878 ix86_compute_frame_layout (&frame);
7879 return (frame.stack_pointer_offset == UNITS_PER_WORD
7880 && (frame.nregs + frame.nsseregs) == 0);
7883 /* Value should be nonzero if functions must have frame pointers.
7884 Zero means the frame pointer need not be set up (and parms may
7885 be accessed via the stack pointer) in functions that seem suitable. */
7888 ix86_frame_pointer_required (void)
7890 /* If we accessed previous frames, then the generated code expects
7891 to be able to access the saved ebp value in our frame. */
7892 if (cfun->machine->accesses_prev_frame)
7895 /* Several x86 os'es need a frame pointer for other reasons,
7896 usually pertaining to setjmp. */
7897 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7900 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7901 the frame pointer by default. Turn it back on now if we've not
7902 got a leaf function. */
7903 if (TARGET_OMIT_LEAF_FRAME_POINTER
7904 && (!current_function_is_leaf
7905 || ix86_current_function_calls_tls_descriptor))
7908 if (crtl->profile && !flag_fentry)
7914 /* Record that the current function accesses previous call frames. */
7917 ix86_setup_frame_addresses (void)
7919 cfun->machine->accesses_prev_frame = 1;
7922 #ifndef USE_HIDDEN_LINKONCE
7923 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7924 # define USE_HIDDEN_LINKONCE 1
7926 # define USE_HIDDEN_LINKONCE 0
7930 static int pic_labels_used;
7932 /* Fills in the label name that should be used for a pc thunk for
7933 the given register. */
7936 get_pc_thunk_name (char name[32], unsigned int regno)
7938 gcc_assert (!TARGET_64BIT);
7940 if (USE_HIDDEN_LINKONCE)
7941 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7943 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7947 /* This function generates code for -fpic that loads %ebx with
7948 the return address of the caller and then returns. */
7951 ix86_code_end (void)
7956 for (regno = 0; regno < 8; ++regno)
7961 if (! ((pic_labels_used >> regno) & 1))
7964 get_pc_thunk_name (name, regno);
7966 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
7967 get_identifier (name),
7968 build_function_type (void_type_node, void_list_node));
7969 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
7970 NULL_TREE, void_type_node);
7971 TREE_PUBLIC (decl) = 1;
7972 TREE_STATIC (decl) = 1;
7977 switch_to_section (darwin_sections[text_coal_section]);
7978 fputs ("\t.weak_definition\t", asm_out_file);
7979 assemble_name (asm_out_file, name);
7980 fputs ("\n\t.private_extern\t", asm_out_file);
7981 assemble_name (asm_out_file, name);
7982 putc ('\n', asm_out_file);
7983 ASM_OUTPUT_LABEL (asm_out_file, name);
7984 DECL_WEAK (decl) = 1;
7988 if (USE_HIDDEN_LINKONCE)
7990 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7992 targetm.asm_out.unique_section (decl, 0);
7993 switch_to_section (get_named_section (decl, NULL, 0));
7995 targetm.asm_out.globalize_label (asm_out_file, name);
7996 fputs ("\t.hidden\t", asm_out_file);
7997 assemble_name (asm_out_file, name);
7998 putc ('\n', asm_out_file);
7999 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
8003 switch_to_section (text_section);
8004 ASM_OUTPUT_LABEL (asm_out_file, name);
8007 DECL_INITIAL (decl) = make_node (BLOCK);
8008 current_function_decl = decl;
8009 init_function_start (decl);
8010 first_function_block_is_cold = false;
8011 /* Make sure unwind info is emitted for the thunk if needed. */
8012 final_start_function (emit_barrier (), asm_out_file, 1);
8014 xops[0] = gen_rtx_REG (Pmode, regno);
8015 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8016 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8017 output_asm_insn ("ret", xops);
8018 final_end_function ();
8019 init_insn_lengths ();
8020 free_after_compilation (cfun);
8022 current_function_decl = NULL;
8026 /* Emit code for the SET_GOT patterns. */
8029 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8035 if (TARGET_VXWORKS_RTP && flag_pic)
8037 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8038 xops[2] = gen_rtx_MEM (Pmode,
8039 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8040 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8042 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8043 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8044 an unadorned address. */
8045 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8046 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8047 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8051 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8053 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8055 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8058 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8061 output_asm_insn ("call\t%a2", xops);
8062 #ifdef DWARF2_UNWIND_INFO
8063 /* The call to next label acts as a push. */
8064 if (dwarf2out_do_frame ())
8068 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8069 gen_rtx_PLUS (Pmode,
8072 RTX_FRAME_RELATED_P (insn) = 1;
8073 dwarf2out_frame_debug (insn, true);
8080 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8081 is what will be referenced by the Mach-O PIC subsystem. */
8083 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8086 targetm.asm_out.internal_label (asm_out_file, "L",
8087 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8091 output_asm_insn ("pop%z0\t%0", xops);
8092 #ifdef DWARF2_UNWIND_INFO
8093 /* The pop is a pop and clobbers dest, but doesn't restore it
8094 for unwind info purposes. */
8095 if (dwarf2out_do_frame ())
8099 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8100 dwarf2out_frame_debug (insn, true);
8101 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8102 gen_rtx_PLUS (Pmode,
8105 RTX_FRAME_RELATED_P (insn) = 1;
8106 dwarf2out_frame_debug (insn, true);
8115 get_pc_thunk_name (name, REGNO (dest));
8116 pic_labels_used |= 1 << REGNO (dest);
8118 #ifdef DWARF2_UNWIND_INFO
8119 /* Ensure all queued register saves are flushed before the
8121 if (dwarf2out_do_frame ())
8125 insn = emit_barrier ();
8127 dwarf2out_frame_debug (insn, false);
8130 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8131 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8132 output_asm_insn ("call\t%X2", xops);
8133 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8134 is what will be referenced by the Mach-O PIC subsystem. */
8137 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8139 targetm.asm_out.internal_label (asm_out_file, "L",
8140 CODE_LABEL_NUMBER (label));
8147 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8148 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8150 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8155 /* Generate an "push" pattern for input ARG. */
8160 struct machine_function *m = cfun->machine;
8162 if (m->fs.cfa_reg == stack_pointer_rtx)
8163 m->fs.cfa_offset += UNITS_PER_WORD;
8164 m->fs.sp_offset += UNITS_PER_WORD;
8166 return gen_rtx_SET (VOIDmode,
8168 gen_rtx_PRE_DEC (Pmode,
8169 stack_pointer_rtx)),
8173 /* Generate an "pop" pattern for input ARG. */
8178 return gen_rtx_SET (VOIDmode,
8181 gen_rtx_POST_INC (Pmode,
8182 stack_pointer_rtx)));
8185 /* Return >= 0 if there is an unused call-clobbered register available
8186 for the entire function. */
8189 ix86_select_alt_pic_regnum (void)
8191 if (current_function_is_leaf
8193 && !ix86_current_function_calls_tls_descriptor)
8196 /* Can't use the same register for both PIC and DRAP. */
8198 drap = REGNO (crtl->drap_reg);
8201 for (i = 2; i >= 0; --i)
8202 if (i != drap && !df_regs_ever_live_p (i))
8206 return INVALID_REGNUM;
8209 /* Return 1 if we need to save REGNO. */
8211 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8213 if (pic_offset_table_rtx
8214 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8215 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8217 || crtl->calls_eh_return
8218 || crtl->uses_const_pool))
8220 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8225 if (crtl->calls_eh_return && maybe_eh_return)
8230 unsigned test = EH_RETURN_DATA_REGNO (i);
8231 if (test == INVALID_REGNUM)
8238 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8241 return (df_regs_ever_live_p (regno)
8242 && !call_used_regs[regno]
8243 && !fixed_regs[regno]
8244 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8247 /* Return number of saved general prupose registers. */
8250 ix86_nsaved_regs (void)
8255 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8256 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8261 /* Return number of saved SSE registrers. */
8264 ix86_nsaved_sseregs (void)
8269 if (ix86_cfun_abi () != MS_ABI)
8271 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8272 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8277 /* Given FROM and TO register numbers, say whether this elimination is
8278 allowed. If stack alignment is needed, we can only replace argument
8279 pointer with hard frame pointer, or replace frame pointer with stack
8280 pointer. Otherwise, frame pointer elimination is automatically
8281 handled and all other eliminations are valid. */
8284 ix86_can_eliminate (const int from, const int to)
8286 if (stack_realign_fp)
8287 return ((from == ARG_POINTER_REGNUM
8288 && to == HARD_FRAME_POINTER_REGNUM)
8289 || (from == FRAME_POINTER_REGNUM
8290 && to == STACK_POINTER_REGNUM));
8292 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8295 /* Return the offset between two registers, one to be eliminated, and the other
8296 its replacement, at the start of a routine. */
8299 ix86_initial_elimination_offset (int from, int to)
8301 struct ix86_frame frame;
8302 ix86_compute_frame_layout (&frame);
8304 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8305 return frame.hard_frame_pointer_offset;
8306 else if (from == FRAME_POINTER_REGNUM
8307 && to == HARD_FRAME_POINTER_REGNUM)
8308 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8311 gcc_assert (to == STACK_POINTER_REGNUM);
8313 if (from == ARG_POINTER_REGNUM)
8314 return frame.stack_pointer_offset;
8316 gcc_assert (from == FRAME_POINTER_REGNUM);
8317 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8321 /* In a dynamically-aligned function, we can't know the offset from
8322 stack pointer to frame pointer, so we must ensure that setjmp
8323 eliminates fp against the hard fp (%ebp) rather than trying to
8324 index from %esp up to the top of the frame across a gap that is
8325 of unknown (at compile-time) size. */
8327 ix86_builtin_setjmp_frame_value (void)
8329 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8332 /* Fill structure ix86_frame about frame of currently computed function. */
8335 ix86_compute_frame_layout (struct ix86_frame *frame)
8337 unsigned int stack_alignment_needed;
8338 HOST_WIDE_INT offset;
8339 unsigned int preferred_alignment;
8340 HOST_WIDE_INT size = get_frame_size ();
8341 HOST_WIDE_INT to_allocate;
8343 frame->nregs = ix86_nsaved_regs ();
8344 frame->nsseregs = ix86_nsaved_sseregs ();
8346 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8347 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8349 /* MS ABI seem to require stack alignment to be always 16 except for function
8350 prologues and leaf. */
8351 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8352 && (!current_function_is_leaf || cfun->calls_alloca != 0
8353 || ix86_current_function_calls_tls_descriptor))
8355 preferred_alignment = 16;
8356 stack_alignment_needed = 16;
8357 crtl->preferred_stack_boundary = 128;
8358 crtl->stack_alignment_needed = 128;
8361 gcc_assert (!size || stack_alignment_needed);
8362 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8363 gcc_assert (preferred_alignment <= stack_alignment_needed);
8365 /* During reload iteration the amount of registers saved can change.
8366 Recompute the value as needed. Do not recompute when amount of registers
8367 didn't change as reload does multiple calls to the function and does not
8368 expect the decision to change within single iteration. */
8369 if (!optimize_function_for_size_p (cfun)
8370 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8372 int count = frame->nregs;
8373 struct cgraph_node *node = cgraph_node (current_function_decl);
8375 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8376 /* The fast prologue uses move instead of push to save registers. This
8377 is significantly longer, but also executes faster as modern hardware
8378 can execute the moves in parallel, but can't do that for push/pop.
8380 Be careful about choosing what prologue to emit: When function takes
8381 many instructions to execute we may use slow version as well as in
8382 case function is known to be outside hot spot (this is known with
8383 feedback only). Weight the size of function by number of registers
8384 to save as it is cheap to use one or two push instructions but very
8385 slow to use many of them. */
8387 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8388 if (node->frequency < NODE_FREQUENCY_NORMAL
8389 || (flag_branch_probabilities
8390 && node->frequency < NODE_FREQUENCY_HOT))
8391 cfun->machine->use_fast_prologue_epilogue = false;
8393 cfun->machine->use_fast_prologue_epilogue
8394 = !expensive_function_p (count);
8396 if (TARGET_PROLOGUE_USING_MOVE
8397 && cfun->machine->use_fast_prologue_epilogue)
8398 frame->save_regs_using_mov = true;
8400 frame->save_regs_using_mov = false;
8402 /* If static stack checking is enabled and done with probes, the registers
8403 need to be saved before allocating the frame. */
8404 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8405 frame->save_regs_using_mov = false;
8407 /* Skip return address. */
8408 offset = UNITS_PER_WORD;
8410 /* Skip pushed static chain. */
8411 if (ix86_static_chain_on_stack)
8412 offset += UNITS_PER_WORD;
8414 /* Skip saved base pointer. */
8415 if (frame_pointer_needed)
8416 offset += UNITS_PER_WORD;
8418 frame->hard_frame_pointer_offset = offset;
8420 /* Register save area */
8421 offset += frame->nregs * UNITS_PER_WORD;
8422 frame->reg_save_offset = offset;
8424 /* Align and set SSE register save area. */
8425 if (frame->nsseregs)
8427 /* The only ABI that has saved SSE registers (Win64) also has a
8428 16-byte aligned default stack, and thus we don't need to be
8429 within the re-aligned local stack frame to save them. */
8430 gcc_assert (INCOMING_STACK_BOUNDARY >= 128);
8431 offset = (offset + 16 - 1) & -16;
8432 offset += frame->nsseregs * 16;
8434 frame->sse_reg_save_offset = offset;
8436 /* The re-aligned stack starts here. Values before this point are not
8437 directly comparable with values below this point. In order to make
8438 sure that no value happens to be the same before and after, force
8439 the alignment computation below to add a non-zero value. */
8440 if (stack_realign_fp)
8441 offset = (offset + stack_alignment_needed) & -stack_alignment_needed;
8444 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8445 offset += frame->va_arg_size;
8447 /* Align start of frame for local function. */
8448 offset = (offset + stack_alignment_needed - 1) & -stack_alignment_needed;
8450 /* Frame pointer points here. */
8451 frame->frame_pointer_offset = offset;
8455 /* Add outgoing arguments area. Can be skipped if we eliminated
8456 all the function calls as dead code.
8457 Skipping is however impossible when function calls alloca. Alloca
8458 expander assumes that last crtl->outgoing_args_size
8459 of stack frame are unused. */
8460 if (ACCUMULATE_OUTGOING_ARGS
8461 && (!current_function_is_leaf || cfun->calls_alloca
8462 || ix86_current_function_calls_tls_descriptor))
8464 offset += crtl->outgoing_args_size;
8465 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8468 frame->outgoing_arguments_size = 0;
8470 /* Align stack boundary. Only needed if we're calling another function
8472 if (!current_function_is_leaf || cfun->calls_alloca
8473 || ix86_current_function_calls_tls_descriptor)
8474 offset = (offset + preferred_alignment - 1) & -preferred_alignment;
8476 /* We've reached end of stack frame. */
8477 frame->stack_pointer_offset = offset;
8479 /* Size prologue needs to allocate. */
8480 to_allocate = offset - frame->sse_reg_save_offset;
8482 if ((!to_allocate && frame->nregs <= 1)
8483 || (TARGET_64BIT && to_allocate >= (HOST_WIDE_INT) 0x80000000))
8484 frame->save_regs_using_mov = false;
8486 if (ix86_using_red_zone ()
8487 && current_function_sp_is_unchanging
8488 && current_function_is_leaf
8489 && !ix86_current_function_calls_tls_descriptor)
8491 frame->red_zone_size = to_allocate;
8492 if (frame->save_regs_using_mov)
8493 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8494 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8495 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8498 frame->red_zone_size = 0;
8499 frame->stack_pointer_offset -= frame->red_zone_size;
8502 /* This is semi-inlined memory_address_length, but simplified
8503 since we know that we're always dealing with reg+offset, and
8504 to avoid having to create and discard all that rtl. */
8507 choose_baseaddr_len (unsigned int regno, HOST_WIDE_INT offset)
8513 /* EBP and R13 cannot be encoded without an offset. */
8514 len = (regno == BP_REG || regno == R13_REG);
8516 else if (IN_RANGE (offset, -128, 127))
8519 /* ESP and R12 must be encoded with a SIB byte. */
8520 if (regno == SP_REG || regno == R12_REG)
8526 /* Return an RTX that points to CFA_OFFSET within the stack frame.
8527 The valid base registers are taken from CFUN->MACHINE->FS. */
8530 choose_baseaddr (HOST_WIDE_INT cfa_offset)
8532 const struct machine_function *m = cfun->machine;
8533 rtx base_reg = NULL;
8534 HOST_WIDE_INT base_offset = 0;
8536 if (m->use_fast_prologue_epilogue)
8538 /* Choose the base register most likely to allow the most scheduling
8539 opportunities. Generally FP is valid througout the function,
8540 while DRAP must be reloaded within the epilogue. But choose either
8541 over the SP due to increased encoding size. */
8545 base_reg = hard_frame_pointer_rtx;
8546 base_offset = m->fs.fp_offset - cfa_offset;
8548 else if (m->fs.drap_valid)
8550 base_reg = crtl->drap_reg;
8551 base_offset = 0 - cfa_offset;
8553 else if (m->fs.sp_valid)
8555 base_reg = stack_pointer_rtx;
8556 base_offset = m->fs.sp_offset - cfa_offset;
8561 HOST_WIDE_INT toffset;
8564 /* Choose the base register with the smallest address encoding.
8565 With a tie, choose FP > DRAP > SP. */
8568 base_reg = stack_pointer_rtx;
8569 base_offset = m->fs.sp_offset - cfa_offset;
8570 len = choose_baseaddr_len (STACK_POINTER_REGNUM, base_offset);
8572 if (m->fs.drap_valid)
8574 toffset = 0 - cfa_offset;
8575 tlen = choose_baseaddr_len (REGNO (crtl->drap_reg), toffset);
8578 base_reg = crtl->drap_reg;
8579 base_offset = toffset;
8585 toffset = m->fs.fp_offset - cfa_offset;
8586 tlen = choose_baseaddr_len (HARD_FRAME_POINTER_REGNUM, toffset);
8589 base_reg = hard_frame_pointer_rtx;
8590 base_offset = toffset;
8595 gcc_assert (base_reg != NULL);
8597 return plus_constant (base_reg, base_offset);
8600 /* Emit code to save registers in the prologue. */
8603 ix86_emit_save_regs (void)
8608 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8609 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8611 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8612 RTX_FRAME_RELATED_P (insn) = 1;
8616 /* Emit a single register save at CFA - CFA_OFFSET. */
8619 ix86_emit_save_reg_using_mov (enum machine_mode mode, unsigned int regno,
8620 HOST_WIDE_INT cfa_offset)
8622 struct machine_function *m = cfun->machine;
8623 rtx reg = gen_rtx_REG (mode, regno);
8624 rtx mem, addr, base, insn;
8626 addr = choose_baseaddr (cfa_offset);
8627 mem = gen_frame_mem (mode, addr);
8629 /* For SSE saves, we need to indicate the 128-bit alignment. */
8630 set_mem_align (mem, GET_MODE_ALIGNMENT (mode));
8632 insn = emit_move_insn (mem, reg);
8633 RTX_FRAME_RELATED_P (insn) = 1;
8636 if (GET_CODE (base) == PLUS)
8637 base = XEXP (base, 0);
8638 gcc_checking_assert (REG_P (base));
8640 /* When saving registers into a re-aligned local stack frame, avoid
8641 any tricky guessing by dwarf2out. */
8642 if (m->fs.realigned)
8644 gcc_checking_assert (stack_realign_drap);
8646 if (regno == REGNO (crtl->drap_reg))
8648 /* A bit of a hack. We force the DRAP register to be saved in
8649 the re-aligned stack frame, which provides us with a copy
8650 of the CFA that will last past the prologue. Install it. */
8651 gcc_checking_assert (cfun->machine->fs.fp_valid);
8652 addr = plus_constant (hard_frame_pointer_rtx,
8653 cfun->machine->fs.fp_offset - cfa_offset);
8654 mem = gen_rtx_MEM (mode, addr);
8655 add_reg_note (insn, REG_CFA_DEF_CFA, mem);
8659 /* The frame pointer is a stable reference within the
8660 aligned frame. Use it. */
8661 gcc_checking_assert (cfun->machine->fs.fp_valid);
8662 addr = plus_constant (hard_frame_pointer_rtx,
8663 cfun->machine->fs.fp_offset - cfa_offset);
8664 mem = gen_rtx_MEM (mode, addr);
8665 add_reg_note (insn, REG_CFA_EXPRESSION,
8666 gen_rtx_SET (VOIDmode, mem, reg));
8670 /* The memory may not be relative to the current CFA register,
8671 which means that we may need to generate a new pattern for
8672 use by the unwind info. */
8673 else if (base != m->fs.cfa_reg)
8675 addr = plus_constant (m->fs.cfa_reg, m->fs.cfa_offset - cfa_offset);
8676 mem = gen_rtx_MEM (mode, addr);
8677 add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (VOIDmode, mem, reg));
8681 /* Emit code to save registers using MOV insns.
8682 First register is stored at CFA - CFA_OFFSET. */
8684 ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
8688 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8689 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8691 ix86_emit_save_reg_using_mov (Pmode, regno, cfa_offset);
8692 cfa_offset -= UNITS_PER_WORD;
8696 /* Emit code to save SSE registers using MOV insns.
8697 First register is stored at CFA - CFA_OFFSET. */
8699 ix86_emit_save_sse_regs_using_mov (HOST_WIDE_INT cfa_offset)
8703 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8704 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8706 ix86_emit_save_reg_using_mov (V4SFmode, regno, cfa_offset);
8711 static GTY(()) rtx queued_cfa_restores;
8713 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8714 manipulation insn. The value is on the stack at CFA - CFA_OFFSET.
8715 Don't add the note if the previously saved value will be left untouched
8716 within stack red-zone till return, as unwinders can find the same value
8717 in the register and on the stack. */
8720 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT cfa_offset)
8722 if (cfa_offset <= cfun->machine->fs.red_zone_offset)
8727 add_reg_note (insn, REG_CFA_RESTORE, reg);
8728 RTX_FRAME_RELATED_P (insn) = 1;
8732 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8735 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8738 ix86_add_queued_cfa_restore_notes (rtx insn)
8741 if (!queued_cfa_restores)
8743 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8745 XEXP (last, 1) = REG_NOTES (insn);
8746 REG_NOTES (insn) = queued_cfa_restores;
8747 queued_cfa_restores = NULL_RTX;
8748 RTX_FRAME_RELATED_P (insn) = 1;
8751 /* Expand prologue or epilogue stack adjustment.
8752 The pattern exist to put a dependency on all ebp-based memory accesses.
8753 STYLE should be negative if instructions should be marked as frame related,
8754 zero if %r11 register is live and cannot be freely used and positive
8758 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8759 int style, bool set_cfa)
8761 struct machine_function *m = cfun->machine;
8765 insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
8766 else if (x86_64_immediate_operand (offset, DImode))
8767 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
8771 /* r11 is used by indirect sibcall return as well, set before the
8772 epilogue and used after the epilogue. */
8774 tmp = gen_rtx_REG (DImode, R11_REG);
8777 gcc_assert (src != hard_frame_pointer_rtx
8778 && dest != hard_frame_pointer_rtx);
8779 tmp = hard_frame_pointer_rtx;
8781 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8783 RTX_FRAME_RELATED_P (insn) = 1;
8784 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
8789 ix86_add_queued_cfa_restore_notes (insn);
8795 gcc_assert (m->fs.cfa_reg == src);
8796 m->fs.cfa_offset += INTVAL (offset);
8797 m->fs.cfa_reg = dest;
8799 r = gen_rtx_PLUS (Pmode, src, offset);
8800 r = gen_rtx_SET (VOIDmode, dest, r);
8801 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8802 RTX_FRAME_RELATED_P (insn) = 1;
8805 RTX_FRAME_RELATED_P (insn) = 1;
8807 if (dest == stack_pointer_rtx)
8809 HOST_WIDE_INT ooffset = m->fs.sp_offset;
8810 bool valid = m->fs.sp_valid;
8812 if (src == hard_frame_pointer_rtx)
8814 valid = m->fs.fp_valid;
8815 ooffset = m->fs.fp_offset;
8817 else if (src == crtl->drap_reg)
8819 valid = m->fs.drap_valid;
8824 /* Else there are two possibilities: SP itself, which we set
8825 up as the default above. Or EH_RETURN_STACKADJ_RTX, which is
8826 taken care of this by hand along the eh_return path. */
8827 gcc_checking_assert (src == stack_pointer_rtx
8828 || offset == const0_rtx);
8831 m->fs.sp_offset = ooffset - INTVAL (offset);
8832 m->fs.sp_valid = valid;
8836 /* Find an available register to be used as dynamic realign argument
8837 pointer regsiter. Such a register will be written in prologue and
8838 used in begin of body, so it must not be
8839 1. parameter passing register.
8841 We reuse static-chain register if it is available. Otherwise, we
8842 use DI for i386 and R13 for x86-64. We chose R13 since it has
8845 Return: the regno of chosen register. */
8848 find_drap_reg (void)
8850 tree decl = cfun->decl;
8854 /* Use R13 for nested function or function need static chain.
8855 Since function with tail call may use any caller-saved
8856 registers in epilogue, DRAP must not use caller-saved
8857 register in such case. */
8858 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8865 /* Use DI for nested function or function need static chain.
8866 Since function with tail call may use any caller-saved
8867 registers in epilogue, DRAP must not use caller-saved
8868 register in such case. */
8869 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8872 /* Reuse static chain register if it isn't used for parameter
8874 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8875 && !lookup_attribute ("fastcall",
8876 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8877 && !lookup_attribute ("thiscall",
8878 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8885 /* Return minimum incoming stack alignment. */
8888 ix86_minimum_incoming_stack_boundary (bool sibcall)
8890 unsigned int incoming_stack_boundary;
8892 /* Prefer the one specified at command line. */
8893 if (ix86_user_incoming_stack_boundary)
8894 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8895 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8896 if -mstackrealign is used, it isn't used for sibcall check and
8897 estimated stack alignment is 128bit. */
8900 && ix86_force_align_arg_pointer
8901 && crtl->stack_alignment_estimated == 128)
8902 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8904 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8906 /* Incoming stack alignment can be changed on individual functions
8907 via force_align_arg_pointer attribute. We use the smallest
8908 incoming stack boundary. */
8909 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8910 && lookup_attribute (ix86_force_align_arg_pointer_string,
8911 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8912 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8914 /* The incoming stack frame has to be aligned at least at
8915 parm_stack_boundary. */
8916 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8917 incoming_stack_boundary = crtl->parm_stack_boundary;
8919 /* Stack at entrance of main is aligned by runtime. We use the
8920 smallest incoming stack boundary. */
8921 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8922 && DECL_NAME (current_function_decl)
8923 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8924 && DECL_FILE_SCOPE_P (current_function_decl))
8925 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8927 return incoming_stack_boundary;
8930 /* Update incoming stack boundary and estimated stack alignment. */
8933 ix86_update_stack_boundary (void)
8935 ix86_incoming_stack_boundary
8936 = ix86_minimum_incoming_stack_boundary (false);
8938 /* x86_64 vararg needs 16byte stack alignment for register save
8942 && crtl->stack_alignment_estimated < 128)
8943 crtl->stack_alignment_estimated = 128;
8946 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8947 needed or an rtx for DRAP otherwise. */
8950 ix86_get_drap_rtx (void)
8952 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8953 crtl->need_drap = true;
8955 if (stack_realign_drap)
8957 /* Assign DRAP to vDRAP and returns vDRAP */
8958 unsigned int regno = find_drap_reg ();
8963 arg_ptr = gen_rtx_REG (Pmode, regno);
8964 crtl->drap_reg = arg_ptr;
8967 drap_vreg = copy_to_reg (arg_ptr);
8971 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8974 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8975 RTX_FRAME_RELATED_P (insn) = 1;
8983 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8986 ix86_internal_arg_pointer (void)
8988 return virtual_incoming_args_rtx;
8991 struct scratch_reg {
8996 /* Return a short-lived scratch register for use on function entry.
8997 In 32-bit mode, it is valid only after the registers are saved
8998 in the prologue. This register must be released by means of
8999 release_scratch_register_on_entry once it is dead. */
9002 get_scratch_register_on_entry (struct scratch_reg *sr)
9010 /* We always use R11 in 64-bit mode. */
9015 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
9017 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
9018 bool static_chain_p = DECL_STATIC_CHAIN (decl);
9019 int regparm = ix86_function_regparm (fntype, decl);
9021 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
9023 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
9024 for the static chain register. */
9025 if ((regparm < 1 || (fastcall_p && !static_chain_p))
9026 && drap_regno != AX_REG)
9028 else if (regparm < 2 && drap_regno != DX_REG)
9030 /* ecx is the static chain register. */
9031 else if (regparm < 3 && !fastcall_p && !static_chain_p
9032 && drap_regno != CX_REG)
9034 else if (ix86_save_reg (BX_REG, true))
9036 /* esi is the static chain register. */
9037 else if (!(regparm == 3 && static_chain_p)
9038 && ix86_save_reg (SI_REG, true))
9040 else if (ix86_save_reg (DI_REG, true))
9044 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
9049 sr->reg = gen_rtx_REG (Pmode, regno);
9052 rtx insn = emit_insn (gen_push (sr->reg));
9053 RTX_FRAME_RELATED_P (insn) = 1;
9057 /* Release a scratch register obtained from the preceding function. */
9060 release_scratch_register_on_entry (struct scratch_reg *sr)
9064 rtx x, insn = emit_insn (gen_pop (sr->reg));
9066 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
9067 RTX_FRAME_RELATED_P (insn) = 1;
9068 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
9069 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9070 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
9074 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
9076 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
9079 ix86_adjust_stack_and_probe (const HOST_WIDE_INT size)
9081 /* We skip the probe for the first interval + a small dope of 4 words and
9082 probe that many bytes past the specified size to maintain a protection
9083 area at the botton of the stack. */
9084 const int dope = 4 * UNITS_PER_WORD;
9085 rtx size_rtx = GEN_INT (size);
9087 /* See if we have a constant small number of probes to generate. If so,
9088 that's the easy case. The run-time loop is made up of 11 insns in the
9089 generic case while the compile-time loop is made up of 3+2*(n-1) insns
9090 for n # of intervals. */
9091 if (size <= 5 * PROBE_INTERVAL)
9093 HOST_WIDE_INT i, adjust;
9094 bool first_probe = true;
9096 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
9097 values of N from 1 until it exceeds SIZE. If only one probe is
9098 needed, this will not generate any code. Then adjust and probe
9099 to PROBE_INTERVAL + SIZE. */
9100 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9104 adjust = 2 * PROBE_INTERVAL + dope;
9105 first_probe = false;
9108 adjust = PROBE_INTERVAL;
9110 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9111 plus_constant (stack_pointer_rtx, -adjust)));
9112 emit_stack_probe (stack_pointer_rtx);
9116 adjust = size + PROBE_INTERVAL + dope;
9118 adjust = size + PROBE_INTERVAL - i;
9120 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9121 plus_constant (stack_pointer_rtx, -adjust)));
9122 emit_stack_probe (stack_pointer_rtx);
9124 /* Adjust back to account for the additional first interval. */
9125 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9126 plus_constant (stack_pointer_rtx,
9127 PROBE_INTERVAL + dope)));
9130 /* Otherwise, do the same as above, but in a loop. Note that we must be
9131 extra careful with variables wrapping around because we might be at
9132 the very top (or the very bottom) of the address space and we have
9133 to be able to handle this case properly; in particular, we use an
9134 equality test for the loop condition. */
9137 HOST_WIDE_INT rounded_size;
9138 struct scratch_reg sr;
9140 get_scratch_register_on_entry (&sr);
9143 /* Step 1: round SIZE to the previous multiple of the interval. */
9145 rounded_size = size & -PROBE_INTERVAL;
9148 /* Step 2: compute initial and final value of the loop counter. */
9150 /* SP = SP_0 + PROBE_INTERVAL. */
9151 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9152 plus_constant (stack_pointer_rtx,
9153 - (PROBE_INTERVAL + dope))));
9155 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
9156 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
9157 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
9158 gen_rtx_PLUS (Pmode, sr.reg,
9159 stack_pointer_rtx)));
9164 while (SP != LAST_ADDR)
9166 SP = SP + PROBE_INTERVAL
9170 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
9171 values of N from 1 until it is equal to ROUNDED_SIZE. */
9173 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
9176 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
9177 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
9179 if (size != rounded_size)
9181 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9182 plus_constant (stack_pointer_rtx,
9183 rounded_size - size)));
9184 emit_stack_probe (stack_pointer_rtx);
9187 /* Adjust back to account for the additional first interval. */
9188 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
9189 plus_constant (stack_pointer_rtx,
9190 PROBE_INTERVAL + dope)));
9192 release_scratch_register_on_entry (&sr);
9195 gcc_assert (cfun->machine->fs.cfa_reg != stack_pointer_rtx);
9196 cfun->machine->fs.sp_offset += size;
9198 /* Make sure nothing is scheduled before we are done. */
9199 emit_insn (gen_blockage ());
9202 /* Adjust the stack pointer up to REG while probing it. */
9205 output_adjust_stack_and_probe (rtx reg)
9207 static int labelno = 0;
9208 char loop_lab[32], end_lab[32];
9211 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9212 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9214 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9216 /* Jump to END_LAB if SP == LAST_ADDR. */
9217 xops[0] = stack_pointer_rtx;
9219 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9220 fputs ("\tje\t", asm_out_file);
9221 assemble_name_raw (asm_out_file, end_lab);
9222 fputc ('\n', asm_out_file);
9224 /* SP = SP + PROBE_INTERVAL. */
9225 xops[1] = GEN_INT (PROBE_INTERVAL);
9226 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9229 xops[1] = const0_rtx;
9230 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9232 fprintf (asm_out_file, "\tjmp\t");
9233 assemble_name_raw (asm_out_file, loop_lab);
9234 fputc ('\n', asm_out_file);
9236 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9241 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9242 inclusive. These are offsets from the current stack pointer. */
9245 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9247 /* See if we have a constant small number of probes to generate. If so,
9248 that's the easy case. The run-time loop is made up of 7 insns in the
9249 generic case while the compile-time loop is made up of n insns for n #
9251 if (size <= 7 * PROBE_INTERVAL)
9255 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9256 it exceeds SIZE. If only one probe is needed, this will not
9257 generate any code. Then probe at FIRST + SIZE. */
9258 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9259 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9261 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9264 /* Otherwise, do the same as above, but in a loop. Note that we must be
9265 extra careful with variables wrapping around because we might be at
9266 the very top (or the very bottom) of the address space and we have
9267 to be able to handle this case properly; in particular, we use an
9268 equality test for the loop condition. */
9271 HOST_WIDE_INT rounded_size, last;
9272 struct scratch_reg sr;
9274 get_scratch_register_on_entry (&sr);
9277 /* Step 1: round SIZE to the previous multiple of the interval. */
9279 rounded_size = size & -PROBE_INTERVAL;
9282 /* Step 2: compute initial and final value of the loop counter. */
9284 /* TEST_OFFSET = FIRST. */
9285 emit_move_insn (sr.reg, GEN_INT (-first));
9287 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9288 last = first + rounded_size;
9293 while (TEST_ADDR != LAST_ADDR)
9295 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9299 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9300 until it is equal to ROUNDED_SIZE. */
9302 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9305 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9306 that SIZE is equal to ROUNDED_SIZE. */
9308 if (size != rounded_size)
9309 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9312 rounded_size - size));
9314 release_scratch_register_on_entry (&sr);
9317 /* Make sure nothing is scheduled before we are done. */
9318 emit_insn (gen_blockage ());
9321 /* Probe a range of stack addresses from REG to END, inclusive. These are
9322 offsets from the current stack pointer. */
9325 output_probe_stack_range (rtx reg, rtx end)
9327 static int labelno = 0;
9328 char loop_lab[32], end_lab[32];
9331 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9332 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9334 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9336 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9339 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9340 fputs ("\tje\t", asm_out_file);
9341 assemble_name_raw (asm_out_file, end_lab);
9342 fputc ('\n', asm_out_file);
9344 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9345 xops[1] = GEN_INT (PROBE_INTERVAL);
9346 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9348 /* Probe at TEST_ADDR. */
9349 xops[0] = stack_pointer_rtx;
9351 xops[2] = const0_rtx;
9352 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9354 fprintf (asm_out_file, "\tjmp\t");
9355 assemble_name_raw (asm_out_file, loop_lab);
9356 fputc ('\n', asm_out_file);
9358 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9363 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9364 to be generated in correct form. */
9366 ix86_finalize_stack_realign_flags (void)
9368 /* Check if stack realign is really needed after reload, and
9369 stores result in cfun */
9370 unsigned int incoming_stack_boundary
9371 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9372 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9373 unsigned int stack_realign = (incoming_stack_boundary
9374 < (current_function_is_leaf
9375 ? crtl->max_used_stack_slot_alignment
9376 : crtl->stack_alignment_needed));
9378 if (crtl->stack_realign_finalized)
9380 /* After stack_realign_needed is finalized, we can't no longer
9382 gcc_assert (crtl->stack_realign_needed == stack_realign);
9386 crtl->stack_realign_needed = stack_realign;
9387 crtl->stack_realign_finalized = true;
9391 /* Expand the prologue into a bunch of separate insns. */
9394 ix86_expand_prologue (void)
9396 struct machine_function *m = cfun->machine;
9399 struct ix86_frame frame;
9400 HOST_WIDE_INT allocate;
9401 bool int_registers_saved;
9403 ix86_finalize_stack_realign_flags ();
9405 /* DRAP should not coexist with stack_realign_fp */
9406 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9408 memset (&m->fs, 0, sizeof (m->fs));
9410 /* Initialize CFA state for before the prologue. */
9411 m->fs.cfa_reg = stack_pointer_rtx;
9412 m->fs.cfa_offset = INCOMING_FRAME_SP_OFFSET;
9414 /* Track SP offset to the CFA. We continue tracking this after we've
9415 swapped the CFA register away from SP. In the case of re-alignment
9416 this is fudged; we're interested to offsets within the local frame. */
9417 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9418 m->fs.sp_valid = true;
9420 ix86_compute_frame_layout (&frame);
9422 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9424 /* We should have already generated an error for any use of
9425 ms_hook on a nested function. */
9426 gcc_checking_assert (!ix86_static_chain_on_stack);
9428 /* Check if profiling is active and we shall use profiling before
9429 prologue variant. If so sorry. */
9430 if (crtl->profile && flag_fentry != 0)
9431 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9433 /* In ix86_asm_output_function_label we emitted:
9434 8b ff movl.s %edi,%edi
9436 8b ec movl.s %esp,%ebp
9438 This matches the hookable function prologue in Win32 API
9439 functions in Microsoft Windows XP Service Pack 2 and newer.
9440 Wine uses this to enable Windows apps to hook the Win32 API
9441 functions provided by Wine.
9443 What that means is that we've already set up the frame pointer. */
9445 if (frame_pointer_needed
9446 && !(crtl->drap_reg && crtl->stack_realign_needed))
9450 /* We've decided to use the frame pointer already set up.
9451 Describe this to the unwinder by pretending that both
9452 push and mov insns happen right here.
9454 Putting the unwind info here at the end of the ms_hook
9455 is done so that we can make absolutely certain we get
9456 the required byte sequence at the start of the function,
9457 rather than relying on an assembler that can produce
9458 the exact encoding required.
9460 However it does mean (in the unpatched case) that we have
9461 a 1 insn window where the asynchronous unwind info is
9462 incorrect. However, if we placed the unwind info at
9463 its correct location we would have incorrect unwind info
9464 in the patched case. Which is probably all moot since
9465 I don't expect Wine generates dwarf2 unwind info for the
9466 system libraries that use this feature. */
9468 insn = emit_insn (gen_blockage ());
9470 push = gen_push (hard_frame_pointer_rtx);
9471 mov = gen_rtx_SET (VOIDmode, hard_frame_pointer_rtx,
9473 RTX_FRAME_RELATED_P (push) = 1;
9474 RTX_FRAME_RELATED_P (mov) = 1;
9476 RTX_FRAME_RELATED_P (insn) = 1;
9477 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
9478 gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, push, mov)));
9480 /* Note that gen_push incremented m->fs.cfa_offset, even
9481 though we didn't emit the push insn here. */
9482 m->fs.cfa_reg = hard_frame_pointer_rtx;
9483 m->fs.fp_offset = m->fs.cfa_offset;
9484 m->fs.fp_valid = true;
9488 /* The frame pointer is not needed so pop %ebp again.
9489 This leaves us with a pristine state. */
9490 emit_insn (gen_pop (hard_frame_pointer_rtx));
9494 /* The first insn of a function that accepts its static chain on the
9495 stack is to push the register that would be filled in by a direct
9496 call. This insn will be skipped by the trampoline. */
9497 else if (ix86_static_chain_on_stack)
9499 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9500 emit_insn (gen_blockage ());
9502 /* We don't want to interpret this push insn as a register save,
9503 only as a stack adjustment. The real copy of the register as
9504 a save will be done later, if needed. */
9505 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9506 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9507 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9508 RTX_FRAME_RELATED_P (insn) = 1;
9511 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9512 of DRAP is needed and stack realignment is really needed after reload */
9513 if (stack_realign_drap)
9515 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9517 /* Only need to push parameter pointer reg if it is caller saved. */
9518 if (!call_used_regs[REGNO (crtl->drap_reg)])
9520 /* Push arg pointer reg */
9521 insn = emit_insn (gen_push (crtl->drap_reg));
9522 RTX_FRAME_RELATED_P (insn) = 1;
9525 /* Grab the argument pointer. */
9526 t = plus_constant (stack_pointer_rtx, m->fs.sp_offset);
9527 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9528 RTX_FRAME_RELATED_P (insn) = 1;
9529 m->fs.cfa_reg = crtl->drap_reg;
9530 m->fs.cfa_offset = 0;
9532 /* Align the stack. */
9533 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9535 GEN_INT (-align_bytes)));
9536 RTX_FRAME_RELATED_P (insn) = 1;
9538 /* Replicate the return address on the stack so that return
9539 address can be reached via (argp - 1) slot. This is needed
9540 to implement macro RETURN_ADDR_RTX and intrinsic function
9541 expand_builtin_return_addr etc. */
9542 t = plus_constant (crtl->drap_reg, -UNITS_PER_WORD);
9543 t = gen_frame_mem (Pmode, t);
9544 insn = emit_insn (gen_push (t));
9545 RTX_FRAME_RELATED_P (insn) = 1;
9547 /* For the purposes of frame and register save area addressing,
9548 we've started over with a new frame. */
9549 m->fs.sp_offset = INCOMING_FRAME_SP_OFFSET;
9550 m->fs.realigned = true;
9553 if (frame_pointer_needed && !m->fs.fp_valid)
9555 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9556 slower on all targets. Also sdb doesn't like it. */
9557 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9558 RTX_FRAME_RELATED_P (insn) = 1;
9560 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9561 RTX_FRAME_RELATED_P (insn) = 1;
9563 if (m->fs.cfa_reg == stack_pointer_rtx)
9564 m->fs.cfa_reg = hard_frame_pointer_rtx;
9565 gcc_assert (m->fs.sp_offset == frame.hard_frame_pointer_offset);
9566 m->fs.fp_offset = m->fs.sp_offset;
9567 m->fs.fp_valid = true;
9570 int_registers_saved = (frame.nregs == 0);
9572 if (!int_registers_saved)
9574 /* If saving registers via PUSH, do so now. */
9575 if (!frame.save_regs_using_mov)
9577 ix86_emit_save_regs ();
9578 int_registers_saved = true;
9579 gcc_assert (m->fs.sp_offset == frame.reg_save_offset);
9582 /* When using red zone we may start register saving before allocating
9583 the stack frame saving one cycle of the prologue. However, avoid
9584 doing this if we have to probe the stack; at least on x86_64 the
9585 stack probe can turn into a call that clobbers a red zone location. */
9586 else if (ix86_using_red_zone ()
9587 && (! TARGET_STACK_PROBE
9588 || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
9590 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9591 int_registers_saved = true;
9595 if (stack_realign_fp)
9597 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9598 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9600 /* The computation of the size of the re-aligned stack frame means
9601 that we must allocate the size of the register save area before
9602 performing the actual alignment. Otherwise we cannot guarantee
9603 that there's enough storage above the realignment point. */
9604 if (m->fs.sp_offset != frame.sse_reg_save_offset)
9605 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9606 GEN_INT (m->fs.sp_offset
9607 - frame.sse_reg_save_offset),
9610 /* Align the stack. */
9611 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9613 GEN_INT (-align_bytes)));
9615 /* For the purposes of register save area addressing, the stack
9616 pointer is no longer valid. As for the value of sp_offset,
9617 see ix86_compute_frame_layout, which we need to match in order
9618 to pass verification of stack_pointer_offset at the end. */
9619 m->fs.sp_offset = (m->fs.sp_offset + align_bytes) & -align_bytes;
9620 m->fs.sp_valid = false;
9623 allocate = frame.stack_pointer_offset - m->fs.sp_offset;
9625 if (flag_stack_usage)
9627 /* We start to count from ARG_POINTER. */
9628 HOST_WIDE_INT stack_size = frame.stack_pointer_offset;
9630 /* If it was realigned, take into account the fake frame. */
9631 if (stack_realign_drap)
9633 if (ix86_static_chain_on_stack)
9634 stack_size += UNITS_PER_WORD;
9636 if (!call_used_regs[REGNO (crtl->drap_reg)])
9637 stack_size += UNITS_PER_WORD;
9639 /* This over-estimates by 1 minimal-stack-alignment-unit but
9640 mitigates that by counting in the new return address slot. */
9641 current_function_dynamic_stack_size
9642 += crtl->stack_alignment_needed / BITS_PER_UNIT;
9645 current_function_static_stack_size = stack_size;
9648 /* The stack has already been decremented by the instruction calling us
9649 so we need to probe unconditionally to preserve the protection area. */
9650 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9652 /* We expect the registers to be saved when probes are used. */
9653 gcc_assert (int_registers_saved);
9655 if (STACK_CHECK_MOVING_SP)
9657 ix86_adjust_stack_and_probe (allocate);
9662 HOST_WIDE_INT size = allocate;
9664 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9665 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9667 if (TARGET_STACK_PROBE)
9668 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9670 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9676 else if (!ix86_target_stack_probe ()
9677 || frame.stack_pointer_offset < CHECK_STACK_LIMIT)
9679 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9680 GEN_INT (-allocate), -1,
9681 m->fs.cfa_reg == stack_pointer_rtx);
9685 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9688 if (cfun->machine->call_abi == MS_ABI)
9691 eax_live = ix86_eax_live_at_start_p ();
9695 emit_insn (gen_push (eax));
9696 allocate -= UNITS_PER_WORD;
9699 emit_move_insn (eax, GEN_INT (allocate));
9701 insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9703 if (m->fs.cfa_reg == stack_pointer_rtx)
9705 m->fs.cfa_offset += allocate;
9706 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
9707 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9708 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9709 RTX_FRAME_RELATED_P (insn) = 1;
9711 m->fs.sp_offset += allocate;
9715 t = choose_baseaddr (m->fs.sp_offset - allocate);
9716 emit_move_insn (eax, gen_frame_mem (Pmode, t));
9719 gcc_assert (m->fs.sp_offset == frame.stack_pointer_offset);
9721 if (!int_registers_saved)
9722 ix86_emit_save_regs_using_mov (frame.reg_save_offset);
9724 ix86_emit_save_sse_regs_using_mov (frame.sse_reg_save_offset);
9726 pic_reg_used = false;
9727 if (pic_offset_table_rtx
9728 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9731 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9733 if (alt_pic_reg_used != INVALID_REGNUM)
9734 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9736 pic_reg_used = true;
9743 if (ix86_cmodel == CM_LARGE_PIC)
9745 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9746 rtx label = gen_label_rtx ();
9748 LABEL_PRESERVE_P (label) = 1;
9749 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9750 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9751 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9752 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9753 pic_offset_table_rtx, tmp_reg));
9756 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9759 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9762 /* In the pic_reg_used case, make sure that the got load isn't deleted
9763 when mcount needs it. Blockage to avoid call movement across mcount
9764 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9766 if (crtl->profile && !flag_fentry && pic_reg_used)
9767 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9769 if (crtl->drap_reg && !crtl->stack_realign_needed)
9771 /* vDRAP is setup but after reload it turns out stack realign
9772 isn't necessary, here we will emit prologue to setup DRAP
9773 without stack realign adjustment */
9774 t = choose_baseaddr (0);
9775 emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, t));
9778 /* Prevent instructions from being scheduled into register save push
9779 sequence when access to the redzone area is done through frame pointer.
9780 The offset between the frame pointer and the stack pointer is calculated
9781 relative to the value of the stack pointer at the end of the function
9782 prologue, and moving instructions that access redzone area via frame
9783 pointer inside push sequence violates this assumption. */
9784 if (frame_pointer_needed && frame.red_zone_size)
9785 emit_insn (gen_memory_blockage ());
9787 /* Emit cld instruction if stringops are used in the function. */
9788 if (TARGET_CLD && ix86_current_function_needs_cld)
9789 emit_insn (gen_cld ());
9792 /* Emit code to restore REG using a POP insn. */
9795 ix86_emit_restore_reg_using_pop (rtx reg)
9797 struct machine_function *m = cfun->machine;
9798 rtx insn = emit_insn (gen_pop (reg));
9800 ix86_add_cfa_restore_note (insn, reg, m->fs.sp_offset);
9801 m->fs.sp_offset -= UNITS_PER_WORD;
9803 if (m->fs.cfa_reg == crtl->drap_reg
9804 && REGNO (reg) == REGNO (crtl->drap_reg))
9806 /* Previously we'd represented the CFA as an expression
9807 like *(%ebp - 8). We've just popped that value from
9808 the stack, which means we need to reset the CFA to
9809 the drap register. This will remain until we restore
9810 the stack pointer. */
9811 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9812 RTX_FRAME_RELATED_P (insn) = 1;
9814 /* This means that the DRAP register is valid for addressing too. */
9815 m->fs.drap_valid = true;
9819 if (m->fs.cfa_reg == stack_pointer_rtx)
9821 rtx x = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
9822 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
9823 add_reg_note (insn, REG_CFA_ADJUST_CFA, x);
9824 RTX_FRAME_RELATED_P (insn) = 1;
9826 m->fs.cfa_offset -= UNITS_PER_WORD;
9829 /* When the frame pointer is the CFA, and we pop it, we are
9830 swapping back to the stack pointer as the CFA. This happens
9831 for stack frames that don't allocate other data, so we assume
9832 the stack pointer is now pointing at the return address, i.e.
9833 the function entry state, which makes the offset be 1 word. */
9834 if (reg == hard_frame_pointer_rtx)
9836 m->fs.fp_valid = false;
9837 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9839 m->fs.cfa_reg = stack_pointer_rtx;
9840 m->fs.cfa_offset -= UNITS_PER_WORD;
9842 add_reg_note (insn, REG_CFA_DEF_CFA,
9843 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9844 GEN_INT (m->fs.cfa_offset)));
9845 RTX_FRAME_RELATED_P (insn) = 1;
9850 /* Emit code to restore saved registers using POP insns. */
9853 ix86_emit_restore_regs_using_pop (void)
9857 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9858 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9859 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno));
9862 /* Emit code and notes for the LEAVE instruction. */
9865 ix86_emit_leave (void)
9867 struct machine_function *m = cfun->machine;
9868 rtx insn = emit_insn (ix86_gen_leave ());
9870 ix86_add_queued_cfa_restore_notes (insn);
9872 gcc_assert (m->fs.fp_valid);
9873 m->fs.sp_valid = true;
9874 m->fs.sp_offset = m->fs.fp_offset - UNITS_PER_WORD;
9875 m->fs.fp_valid = false;
9877 if (m->fs.cfa_reg == hard_frame_pointer_rtx)
9879 m->fs.cfa_reg = stack_pointer_rtx;
9880 m->fs.cfa_offset = m->fs.sp_offset;
9882 add_reg_note (insn, REG_CFA_DEF_CFA,
9883 plus_constant (stack_pointer_rtx, m->fs.sp_offset));
9884 RTX_FRAME_RELATED_P (insn) = 1;
9885 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx,
9890 /* Emit code to restore saved registers using MOV insns.
9891 First register is restored from CFA - CFA_OFFSET. */
9893 ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
9894 int maybe_eh_return)
9896 struct machine_function *m = cfun->machine;
9899 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9900 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9902 rtx reg = gen_rtx_REG (Pmode, regno);
9905 mem = choose_baseaddr (cfa_offset);
9906 mem = gen_frame_mem (Pmode, mem);
9907 insn = emit_move_insn (reg, mem);
9909 if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
9911 /* Previously we'd represented the CFA as an expression
9912 like *(%ebp - 8). We've just popped that value from
9913 the stack, which means we need to reset the CFA to
9914 the drap register. This will remain until we restore
9915 the stack pointer. */
9916 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9917 RTX_FRAME_RELATED_P (insn) = 1;
9919 /* This means that the DRAP register is valid for addressing. */
9920 m->fs.drap_valid = true;
9923 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
9925 cfa_offset -= UNITS_PER_WORD;
9929 /* Emit code to restore saved registers using MOV insns.
9930 First register is restored from CFA - CFA_OFFSET. */
9932 ix86_emit_restore_sse_regs_using_mov (HOST_WIDE_INT cfa_offset,
9933 int maybe_eh_return)
9937 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9938 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9940 rtx reg = gen_rtx_REG (V4SFmode, regno);
9943 mem = choose_baseaddr (cfa_offset);
9944 mem = gen_rtx_MEM (V4SFmode, mem);
9945 set_mem_align (mem, 128);
9946 emit_move_insn (reg, mem);
9948 ix86_add_cfa_restore_note (NULL_RTX, reg, cfa_offset);
9954 /* Restore function stack, frame, and registers. */
9957 ix86_expand_epilogue (int style)
9959 struct machine_function *m = cfun->machine;
9960 struct machine_frame_state frame_state_save = m->fs;
9961 struct ix86_frame frame;
9962 bool restore_regs_via_mov;
9965 ix86_finalize_stack_realign_flags ();
9966 ix86_compute_frame_layout (&frame);
9968 m->fs.sp_valid = (!frame_pointer_needed
9969 || (current_function_sp_is_unchanging
9970 && !stack_realign_fp));
9971 gcc_assert (!m->fs.sp_valid
9972 || m->fs.sp_offset == frame.stack_pointer_offset);
9974 /* The FP must be valid if the frame pointer is present. */
9975 gcc_assert (frame_pointer_needed == m->fs.fp_valid);
9976 gcc_assert (!m->fs.fp_valid
9977 || m->fs.fp_offset == frame.hard_frame_pointer_offset);
9979 /* We must have *some* valid pointer to the stack frame. */
9980 gcc_assert (m->fs.sp_valid || m->fs.fp_valid);
9982 /* The DRAP is never valid at this point. */
9983 gcc_assert (!m->fs.drap_valid);
9985 /* See the comment about red zone and frame
9986 pointer usage in ix86_expand_prologue. */
9987 if (frame_pointer_needed && frame.red_zone_size)
9988 emit_insn (gen_memory_blockage ());
9990 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9991 gcc_assert (!using_drap || m->fs.cfa_reg == crtl->drap_reg);
9993 /* Determine the CFA offset of the end of the red-zone. */
9994 m->fs.red_zone_offset = 0;
9995 if (ix86_using_red_zone () && crtl->args.pops_args < 65536)
9997 /* The red-zone begins below the return address. */
9998 m->fs.red_zone_offset = RED_ZONE_SIZE + UNITS_PER_WORD;
10000 /* When the register save area is in the aligned portion of
10001 the stack, determine the maximum runtime displacement that
10002 matches up with the aligned frame. */
10003 if (stack_realign_drap)
10004 m->fs.red_zone_offset -= (crtl->stack_alignment_needed / BITS_PER_UNIT
10008 /* Special care must be taken for the normal return case of a function
10009 using eh_return: the eax and edx registers are marked as saved, but
10010 not restored along this path. Adjust the save location to match. */
10011 if (crtl->calls_eh_return && style != 2)
10012 frame.reg_save_offset -= 2 * UNITS_PER_WORD;
10014 /* If we're only restoring one register and sp is not valid then
10015 using a move instruction to restore the register since it's
10016 less work than reloading sp and popping the register. */
10017 if (!m->fs.sp_valid && frame.nregs <= 1)
10018 restore_regs_via_mov = true;
10019 /* EH_RETURN requires the use of moves to function properly. */
10020 else if (crtl->calls_eh_return)
10021 restore_regs_via_mov = true;
10022 else if (TARGET_EPILOGUE_USING_MOVE
10023 && cfun->machine->use_fast_prologue_epilogue
10024 && (frame.nregs > 1
10025 || m->fs.sp_offset != frame.reg_save_offset))
10026 restore_regs_via_mov = true;
10027 else if (frame_pointer_needed
10029 && m->fs.sp_offset != frame.reg_save_offset)
10030 restore_regs_via_mov = true;
10031 else if (frame_pointer_needed
10032 && TARGET_USE_LEAVE
10033 && cfun->machine->use_fast_prologue_epilogue
10034 && frame.nregs == 1)
10035 restore_regs_via_mov = true;
10037 restore_regs_via_mov = false;
10039 if (restore_regs_via_mov || frame.nsseregs)
10041 /* Ensure that the entire register save area is addressable via
10042 the stack pointer, if we will restore via sp. */
10044 && m->fs.sp_offset > 0x7fffffff
10045 && !(m->fs.fp_valid || m->fs.drap_valid)
10046 && (frame.nsseregs + frame.nregs) != 0)
10048 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10049 GEN_INT (m->fs.sp_offset
10050 - frame.sse_reg_save_offset),
10052 m->fs.cfa_reg == stack_pointer_rtx);
10056 /* If there are any SSE registers to restore, then we have to do it
10057 via moves, since there's obviously no pop for SSE regs. */
10058 if (frame.nsseregs)
10059 ix86_emit_restore_sse_regs_using_mov (frame.sse_reg_save_offset,
10062 if (restore_regs_via_mov)
10067 ix86_emit_restore_regs_using_mov (frame.reg_save_offset, style == 2);
10069 /* eh_return epilogues need %ecx added to the stack pointer. */
10072 rtx insn, sa = EH_RETURN_STACKADJ_RTX;
10074 /* Stack align doesn't work with eh_return. */
10075 gcc_assert (!stack_realign_drap);
10076 /* Neither does regparm nested functions. */
10077 gcc_assert (!ix86_static_chain_on_stack);
10079 if (frame_pointer_needed)
10081 t = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
10082 t = plus_constant (t, m->fs.fp_offset - UNITS_PER_WORD);
10083 emit_insn (gen_rtx_SET (VOIDmode, sa, t));
10085 t = gen_frame_mem (Pmode, hard_frame_pointer_rtx);
10086 insn = emit_move_insn (hard_frame_pointer_rtx, t);
10088 /* Note that we use SA as a temporary CFA, as the return
10089 address is at the proper place relative to it. We
10090 pretend this happens at the FP restore insn because
10091 prior to this insn the FP would be stored at the wrong
10092 offset relative to SA, and after this insn we have no
10093 other reasonable register to use for the CFA. We don't
10094 bother resetting the CFA to the SP for the duration of
10095 the return insn. */
10096 add_reg_note (insn, REG_CFA_DEF_CFA,
10097 plus_constant (sa, UNITS_PER_WORD));
10098 ix86_add_queued_cfa_restore_notes (insn);
10099 add_reg_note (insn, REG_CFA_RESTORE, hard_frame_pointer_rtx);
10100 RTX_FRAME_RELATED_P (insn) = 1;
10102 m->fs.cfa_reg = sa;
10103 m->fs.cfa_offset = UNITS_PER_WORD;
10104 m->fs.fp_valid = false;
10106 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
10107 const0_rtx, style, false);
10111 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
10112 t = plus_constant (t, m->fs.sp_offset - UNITS_PER_WORD);
10113 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, t));
10114 ix86_add_queued_cfa_restore_notes (insn);
10116 gcc_assert (m->fs.cfa_reg == stack_pointer_rtx);
10117 if (m->fs.cfa_offset != UNITS_PER_WORD)
10119 m->fs.cfa_offset = UNITS_PER_WORD;
10120 add_reg_note (insn, REG_CFA_DEF_CFA,
10121 plus_constant (stack_pointer_rtx,
10123 RTX_FRAME_RELATED_P (insn) = 1;
10126 m->fs.sp_offset = UNITS_PER_WORD;
10131 /* First step is to deallocate the stack frame so that we can
10132 pop the registers. */
10133 if (!m->fs.sp_valid)
10135 pro_epilogue_adjust_stack (stack_pointer_rtx, hard_frame_pointer_rtx,
10136 GEN_INT (m->fs.fp_offset
10137 - frame.reg_save_offset),
10140 else if (m->fs.sp_offset != frame.reg_save_offset)
10142 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10143 GEN_INT (m->fs.sp_offset
10144 - frame.reg_save_offset),
10146 m->fs.cfa_reg == stack_pointer_rtx);
10149 ix86_emit_restore_regs_using_pop ();
10152 /* If we used a stack pointer and haven't already got rid of it,
10154 if (m->fs.fp_valid)
10156 /* If the stack pointer is valid and pointing at the frame
10157 pointer store address, then we only need a pop. */
10158 if (m->fs.sp_valid && m->fs.sp_offset == frame.hard_frame_pointer_offset)
10159 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10160 /* Leave results in shorter dependency chains on CPUs that are
10161 able to grok it fast. */
10162 else if (TARGET_USE_LEAVE
10163 || optimize_function_for_size_p (cfun)
10164 || !cfun->machine->use_fast_prologue_epilogue)
10165 ix86_emit_leave ();
10168 pro_epilogue_adjust_stack (stack_pointer_rtx,
10169 hard_frame_pointer_rtx,
10170 const0_rtx, style, !using_drap);
10171 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx);
10177 int param_ptr_offset = UNITS_PER_WORD;
10180 gcc_assert (stack_realign_drap);
10182 if (ix86_static_chain_on_stack)
10183 param_ptr_offset += UNITS_PER_WORD;
10184 if (!call_used_regs[REGNO (crtl->drap_reg)])
10185 param_ptr_offset += UNITS_PER_WORD;
10187 insn = emit_insn (gen_rtx_SET
10188 (VOIDmode, stack_pointer_rtx,
10189 gen_rtx_PLUS (Pmode,
10191 GEN_INT (-param_ptr_offset))));
10192 m->fs.cfa_reg = stack_pointer_rtx;
10193 m->fs.cfa_offset = param_ptr_offset;
10194 m->fs.sp_offset = param_ptr_offset;
10195 m->fs.realigned = false;
10197 add_reg_note (insn, REG_CFA_DEF_CFA,
10198 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10199 GEN_INT (param_ptr_offset)));
10200 RTX_FRAME_RELATED_P (insn) = 1;
10202 if (!call_used_regs[REGNO (crtl->drap_reg)])
10203 ix86_emit_restore_reg_using_pop (crtl->drap_reg);
10206 /* At this point the stack pointer must be valid, and we must have
10207 restored all of the registers. We may not have deallocated the
10208 entire stack frame. We've delayed this until now because it may
10209 be possible to merge the local stack deallocation with the
10210 deallocation forced by ix86_static_chain_on_stack. */
10211 gcc_assert (m->fs.sp_valid);
10212 gcc_assert (!m->fs.fp_valid);
10213 gcc_assert (!m->fs.realigned);
10214 if (m->fs.sp_offset != UNITS_PER_WORD)
10216 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10217 GEN_INT (m->fs.sp_offset - UNITS_PER_WORD),
10221 /* Sibcall epilogues don't want a return instruction. */
10224 m->fs = frame_state_save;
10228 if (crtl->args.pops_args && crtl->args.size)
10230 rtx popc = GEN_INT (crtl->args.pops_args);
10232 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10233 address, do explicit add, and jump indirectly to the caller. */
10235 if (crtl->args.pops_args >= 65536)
10237 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10240 /* There is no "pascal" calling convention in any 64bit ABI. */
10241 gcc_assert (!TARGET_64BIT);
10243 insn = emit_insn (gen_pop (ecx));
10244 m->fs.cfa_offset -= UNITS_PER_WORD;
10245 m->fs.sp_offset -= UNITS_PER_WORD;
10247 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10248 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10249 add_reg_note (insn, REG_CFA_REGISTER,
10250 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10251 RTX_FRAME_RELATED_P (insn) = 1;
10253 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10255 emit_jump_insn (gen_return_indirect_internal (ecx));
10258 emit_jump_insn (gen_return_pop_internal (popc));
10261 emit_jump_insn (gen_return_internal ());
10263 /* Restore the state back to the state from the prologue,
10264 so that it's correct for the next epilogue. */
10265 m->fs = frame_state_save;
10268 /* Reset from the function's potential modifications. */
10271 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10272 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10274 if (pic_offset_table_rtx)
10275 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10277 /* Mach-O doesn't support labels at the end of objects, so if
10278 it looks like we might want one, insert a NOP. */
10280 rtx insn = get_last_insn ();
10283 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10284 insn = PREV_INSN (insn);
10288 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10289 fputs ("\tnop\n", file);
10295 /* Extract the parts of an RTL expression that is a valid memory address
10296 for an instruction. Return 0 if the structure of the address is
10297 grossly off. Return -1 if the address contains ASHIFT, so it is not
10298 strictly valid, but still used for computing length of lea instruction. */
10301 ix86_decompose_address (rtx addr, struct ix86_address *out)
10303 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10304 rtx base_reg, index_reg;
10305 HOST_WIDE_INT scale = 1;
10306 rtx scale_rtx = NULL_RTX;
10309 enum ix86_address_seg seg = SEG_DEFAULT;
10311 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10313 else if (GET_CODE (addr) == PLUS)
10315 rtx addends[4], op;
10323 addends[n++] = XEXP (op, 1);
10326 while (GET_CODE (op) == PLUS);
10331 for (i = n; i >= 0; --i)
10334 switch (GET_CODE (op))
10339 index = XEXP (op, 0);
10340 scale_rtx = XEXP (op, 1);
10346 index = XEXP (op, 0);
10347 tmp = XEXP (op, 1);
10348 if (!CONST_INT_P (tmp))
10350 scale = INTVAL (tmp);
10351 if ((unsigned HOST_WIDE_INT) scale > 3)
10353 scale = 1 << scale;
10357 if (XINT (op, 1) == UNSPEC_TP
10358 && TARGET_TLS_DIRECT_SEG_REFS
10359 && seg == SEG_DEFAULT)
10360 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10389 else if (GET_CODE (addr) == MULT)
10391 index = XEXP (addr, 0); /* index*scale */
10392 scale_rtx = XEXP (addr, 1);
10394 else if (GET_CODE (addr) == ASHIFT)
10396 /* We're called for lea too, which implements ashift on occasion. */
10397 index = XEXP (addr, 0);
10398 tmp = XEXP (addr, 1);
10399 if (!CONST_INT_P (tmp))
10401 scale = INTVAL (tmp);
10402 if ((unsigned HOST_WIDE_INT) scale > 3)
10404 scale = 1 << scale;
10408 disp = addr; /* displacement */
10410 /* Extract the integral value of scale. */
10413 if (!CONST_INT_P (scale_rtx))
10415 scale = INTVAL (scale_rtx);
10418 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10419 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10421 /* Avoid useless 0 displacement. */
10422 if (disp == const0_rtx && (base || index))
10425 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10426 if (base_reg && index_reg && scale == 1
10427 && (index_reg == arg_pointer_rtx
10428 || index_reg == frame_pointer_rtx
10429 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10432 tmp = base, base = index, index = tmp;
10433 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10436 /* Special case: %ebp cannot be encoded as a base without a displacement.
10440 && (base_reg == hard_frame_pointer_rtx
10441 || base_reg == frame_pointer_rtx
10442 || base_reg == arg_pointer_rtx
10443 || (REG_P (base_reg)
10444 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10445 || REGNO (base_reg) == R13_REG))))
10448 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10449 Avoid this by transforming to [%esi+0].
10450 Reload calls address legitimization without cfun defined, so we need
10451 to test cfun for being non-NULL. */
10452 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10453 && base_reg && !index_reg && !disp
10454 && REG_P (base_reg) && REGNO (base_reg) == SI_REG)
10457 /* Special case: encode reg+reg instead of reg*2. */
10458 if (!base && index && scale == 2)
10459 base = index, base_reg = index_reg, scale = 1;
10461 /* Special case: scaling cannot be encoded without base or displacement. */
10462 if (!base && !disp && index && scale != 1)
10466 out->index = index;
10468 out->scale = scale;
10474 /* Return cost of the memory address x.
10475 For i386, it is better to use a complex address than let gcc copy
10476 the address into a reg and make a new pseudo. But not if the address
10477 requires to two regs - that would mean more pseudos with longer
10480 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10482 struct ix86_address parts;
10484 int ok = ix86_decompose_address (x, &parts);
10488 if (parts.base && GET_CODE (parts.base) == SUBREG)
10489 parts.base = SUBREG_REG (parts.base);
10490 if (parts.index && GET_CODE (parts.index) == SUBREG)
10491 parts.index = SUBREG_REG (parts.index);
10493 /* Attempt to minimize number of registers in the address. */
10495 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10497 && (!REG_P (parts.index)
10498 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10502 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10504 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10505 && parts.base != parts.index)
10508 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10509 since it's predecode logic can't detect the length of instructions
10510 and it degenerates to vector decoded. Increase cost of such
10511 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10512 to split such addresses or even refuse such addresses at all.
10514 Following addressing modes are affected:
10519 The first and last case may be avoidable by explicitly coding the zero in
10520 memory address, but I don't have AMD-K6 machine handy to check this
10524 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10525 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10526 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10532 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10533 this is used for to form addresses to local data when -fPIC is in
10537 darwin_local_data_pic (rtx disp)
10539 return (GET_CODE (disp) == UNSPEC
10540 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10543 /* Determine if a given RTX is a valid constant. We already know this
10544 satisfies CONSTANT_P. */
10547 legitimate_constant_p (rtx x)
10549 switch (GET_CODE (x))
10554 if (GET_CODE (x) == PLUS)
10556 if (!CONST_INT_P (XEXP (x, 1)))
10561 if (TARGET_MACHO && darwin_local_data_pic (x))
10564 /* Only some unspecs are valid as "constants". */
10565 if (GET_CODE (x) == UNSPEC)
10566 switch (XINT (x, 1))
10569 case UNSPEC_GOTOFF:
10570 case UNSPEC_PLTOFF:
10571 return TARGET_64BIT;
10573 case UNSPEC_NTPOFF:
10574 x = XVECEXP (x, 0, 0);
10575 return (GET_CODE (x) == SYMBOL_REF
10576 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10577 case UNSPEC_DTPOFF:
10578 x = XVECEXP (x, 0, 0);
10579 return (GET_CODE (x) == SYMBOL_REF
10580 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10585 /* We must have drilled down to a symbol. */
10586 if (GET_CODE (x) == LABEL_REF)
10588 if (GET_CODE (x) != SYMBOL_REF)
10593 /* TLS symbols are never valid. */
10594 if (SYMBOL_REF_TLS_MODEL (x))
10597 /* DLLIMPORT symbols are never valid. */
10598 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10599 && SYMBOL_REF_DLLIMPORT_P (x))
10604 if (GET_MODE (x) == TImode
10605 && x != CONST0_RTX (TImode)
10611 if (!standard_sse_constant_p (x))
10618 /* Otherwise we handle everything else in the move patterns. */
10622 /* Determine if it's legal to put X into the constant pool. This
10623 is not possible for the address of thread-local symbols, which
10624 is checked above. */
10627 ix86_cannot_force_const_mem (rtx x)
10629 /* We can always put integral constants and vectors in memory. */
10630 switch (GET_CODE (x))
10640 return !legitimate_constant_p (x);
10644 /* Nonzero if the constant value X is a legitimate general operand
10645 when generating PIC code. It is given that flag_pic is on and
10646 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
10649 legitimate_pic_operand_p (rtx x)
10653 switch (GET_CODE (x))
10656 inner = XEXP (x, 0);
10657 if (GET_CODE (inner) == PLUS
10658 && CONST_INT_P (XEXP (inner, 1)))
10659 inner = XEXP (inner, 0);
10661 /* Only some unspecs are valid as "constants". */
10662 if (GET_CODE (inner) == UNSPEC)
10663 switch (XINT (inner, 1))
10666 case UNSPEC_GOTOFF:
10667 case UNSPEC_PLTOFF:
10668 return TARGET_64BIT;
10670 x = XVECEXP (inner, 0, 0);
10671 return (GET_CODE (x) == SYMBOL_REF
10672 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10673 case UNSPEC_MACHOPIC_OFFSET:
10674 return legitimate_pic_address_disp_p (x);
10682 return legitimate_pic_address_disp_p (x);
10689 /* Determine if a given CONST RTX is a valid memory displacement
10693 legitimate_pic_address_disp_p (rtx disp)
10697 /* In 64bit mode we can allow direct addresses of symbols and labels
10698 when they are not dynamic symbols. */
10701 rtx op0 = disp, op1;
10703 switch (GET_CODE (disp))
10709 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10711 op0 = XEXP (XEXP (disp, 0), 0);
10712 op1 = XEXP (XEXP (disp, 0), 1);
10713 if (!CONST_INT_P (op1)
10714 || INTVAL (op1) >= 16*1024*1024
10715 || INTVAL (op1) < -16*1024*1024)
10717 if (GET_CODE (op0) == LABEL_REF)
10719 if (GET_CODE (op0) != SYMBOL_REF)
10724 /* TLS references should always be enclosed in UNSPEC. */
10725 if (SYMBOL_REF_TLS_MODEL (op0))
10727 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
10728 && ix86_cmodel != CM_LARGE_PIC)
10736 if (GET_CODE (disp) != CONST)
10738 disp = XEXP (disp, 0);
10742 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10743 of GOT tables. We should not need these anyway. */
10744 if (GET_CODE (disp) != UNSPEC
10745 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10746 && XINT (disp, 1) != UNSPEC_GOTOFF
10747 && XINT (disp, 1) != UNSPEC_PLTOFF))
10750 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10751 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10757 if (GET_CODE (disp) == PLUS)
10759 if (!CONST_INT_P (XEXP (disp, 1)))
10761 disp = XEXP (disp, 0);
10765 if (TARGET_MACHO && darwin_local_data_pic (disp))
10768 if (GET_CODE (disp) != UNSPEC)
10771 switch (XINT (disp, 1))
10776 /* We need to check for both symbols and labels because VxWorks loads
10777 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10779 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10780 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10781 case UNSPEC_GOTOFF:
10782 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10783 While ABI specify also 32bit relocation but we don't produce it in
10784 small PIC model at all. */
10785 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10786 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10788 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10790 case UNSPEC_GOTTPOFF:
10791 case UNSPEC_GOTNTPOFF:
10792 case UNSPEC_INDNTPOFF:
10795 disp = XVECEXP (disp, 0, 0);
10796 return (GET_CODE (disp) == SYMBOL_REF
10797 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10798 case UNSPEC_NTPOFF:
10799 disp = XVECEXP (disp, 0, 0);
10800 return (GET_CODE (disp) == SYMBOL_REF
10801 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10802 case UNSPEC_DTPOFF:
10803 disp = XVECEXP (disp, 0, 0);
10804 return (GET_CODE (disp) == SYMBOL_REF
10805 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10811 /* Recognizes RTL expressions that are valid memory addresses for an
10812 instruction. The MODE argument is the machine mode for the MEM
10813 expression that wants to use this address.
10815 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10816 convert common non-canonical forms to canonical form so that they will
10820 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
10821 rtx addr, bool strict)
10823 struct ix86_address parts;
10824 rtx base, index, disp;
10825 HOST_WIDE_INT scale;
10827 if (ix86_decompose_address (addr, &parts) <= 0)
10828 /* Decomposition failed. */
10832 index = parts.index;
10834 scale = parts.scale;
10836 /* Validate base register.
10838 Don't allow SUBREG's that span more than a word here. It can lead to spill
10839 failures when the base is one word out of a two word structure, which is
10840 represented internally as a DImode int. */
10848 else if (GET_CODE (base) == SUBREG
10849 && REG_P (SUBREG_REG (base))
10850 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
10852 reg = SUBREG_REG (base);
10854 /* Base is not a register. */
10857 if (GET_MODE (base) != Pmode)
10858 /* Base is not in Pmode. */
10861 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10862 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10863 /* Base is not valid. */
10867 /* Validate index register.
10869 Don't allow SUBREG's that span more than a word here -- same as above. */
10877 else if (GET_CODE (index) == SUBREG
10878 && REG_P (SUBREG_REG (index))
10879 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
10881 reg = SUBREG_REG (index);
10883 /* Index is not a register. */
10886 if (GET_MODE (index) != Pmode)
10887 /* Index is not in Pmode. */
10890 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10891 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10892 /* Index is not valid. */
10896 /* Validate scale factor. */
10900 /* Scale without index. */
10903 if (scale != 2 && scale != 4 && scale != 8)
10904 /* Scale is not a valid multiplier. */
10908 /* Validate displacement. */
10911 if (GET_CODE (disp) == CONST
10912 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10913 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10914 switch (XINT (XEXP (disp, 0), 1))
10916 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
10917 used. While ABI specify also 32bit relocations, we don't produce
10918 them at all and use IP relative instead. */
10920 case UNSPEC_GOTOFF:
10921 gcc_assert (flag_pic);
10923 goto is_legitimate_pic;
10925 /* 64bit address unspec. */
10928 case UNSPEC_GOTPCREL:
10929 gcc_assert (flag_pic);
10930 goto is_legitimate_pic;
10932 case UNSPEC_GOTTPOFF:
10933 case UNSPEC_GOTNTPOFF:
10934 case UNSPEC_INDNTPOFF:
10935 case UNSPEC_NTPOFF:
10936 case UNSPEC_DTPOFF:
10940 /* Invalid address unspec. */
10944 else if (SYMBOLIC_CONST (disp)
10948 && MACHOPIC_INDIRECT
10949 && !machopic_operand_p (disp)
10955 if (TARGET_64BIT && (index || base))
10957 /* foo@dtpoff(%rX) is ok. */
10958 if (GET_CODE (disp) != CONST
10959 || GET_CODE (XEXP (disp, 0)) != PLUS
10960 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10961 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10962 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10963 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10964 /* Non-constant pic memory reference. */
10967 else if (! legitimate_pic_address_disp_p (disp))
10968 /* Displacement is an invalid pic construct. */
10971 /* This code used to verify that a symbolic pic displacement
10972 includes the pic_offset_table_rtx register.
10974 While this is good idea, unfortunately these constructs may
10975 be created by "adds using lea" optimization for incorrect
10984 This code is nonsensical, but results in addressing
10985 GOT table with pic_offset_table_rtx base. We can't
10986 just refuse it easily, since it gets matched by
10987 "addsi3" pattern, that later gets split to lea in the
10988 case output register differs from input. While this
10989 can be handled by separate addsi pattern for this case
10990 that never results in lea, this seems to be easier and
10991 correct fix for crash to disable this test. */
10993 else if (GET_CODE (disp) != LABEL_REF
10994 && !CONST_INT_P (disp)
10995 && (GET_CODE (disp) != CONST
10996 || !legitimate_constant_p (disp))
10997 && (GET_CODE (disp) != SYMBOL_REF
10998 || !legitimate_constant_p (disp)))
10999 /* Displacement is not constant. */
11001 else if (TARGET_64BIT
11002 && !x86_64_immediate_operand (disp, VOIDmode))
11003 /* Displacement is out of range. */
11007 /* Everything looks valid. */
11011 /* Determine if a given RTX is a valid constant address. */
11014 constant_address_p (rtx x)
11016 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
11019 /* Return a unique alias set for the GOT. */
11021 static alias_set_type
11022 ix86_GOT_alias_set (void)
11024 static alias_set_type set = -1;
11026 set = new_alias_set ();
11030 /* Return a legitimate reference for ORIG (an address) using the
11031 register REG. If REG is 0, a new pseudo is generated.
11033 There are two types of references that must be handled:
11035 1. Global data references must load the address from the GOT, via
11036 the PIC reg. An insn is emitted to do this load, and the reg is
11039 2. Static data references, constant pool addresses, and code labels
11040 compute the address as an offset from the GOT, whose base is in
11041 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
11042 differentiate them from global data objects. The returned
11043 address is the PIC reg + an unspec constant.
11045 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
11046 reg also appears in the address. */
11049 legitimize_pic_address (rtx orig, rtx reg)
11052 rtx new_rtx = orig;
11056 if (TARGET_MACHO && !TARGET_64BIT)
11059 reg = gen_reg_rtx (Pmode);
11060 /* Use the generic Mach-O PIC machinery. */
11061 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
11065 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
11067 else if (TARGET_64BIT
11068 && ix86_cmodel != CM_SMALL_PIC
11069 && gotoff_operand (addr, Pmode))
11072 /* This symbol may be referenced via a displacement from the PIC
11073 base address (@GOTOFF). */
11075 if (reload_in_progress)
11076 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11077 if (GET_CODE (addr) == CONST)
11078 addr = XEXP (addr, 0);
11079 if (GET_CODE (addr) == PLUS)
11081 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11083 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11086 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11087 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11089 tmpreg = gen_reg_rtx (Pmode);
11092 emit_move_insn (tmpreg, new_rtx);
11096 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
11097 tmpreg, 1, OPTAB_DIRECT);
11100 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
11102 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
11104 /* This symbol may be referenced via a displacement from the PIC
11105 base address (@GOTOFF). */
11107 if (reload_in_progress)
11108 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11109 if (GET_CODE (addr) == CONST)
11110 addr = XEXP (addr, 0);
11111 if (GET_CODE (addr) == PLUS)
11113 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
11115 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
11118 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
11119 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11120 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11124 emit_move_insn (reg, new_rtx);
11128 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
11129 /* We can't use @GOTOFF for text labels on VxWorks;
11130 see gotoff_operand. */
11131 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
11133 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11135 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
11136 return legitimize_dllimport_symbol (addr, true);
11137 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
11138 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
11139 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
11141 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
11142 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
11146 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
11148 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
11149 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11150 new_rtx = gen_const_mem (Pmode, new_rtx);
11151 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11154 reg = gen_reg_rtx (Pmode);
11155 /* Use directly gen_movsi, otherwise the address is loaded
11156 into register for CSE. We don't want to CSE this addresses,
11157 instead we CSE addresses from the GOT table, so skip this. */
11158 emit_insn (gen_movsi (reg, new_rtx));
11163 /* This symbol must be referenced via a load from the
11164 Global Offset Table (@GOT). */
11166 if (reload_in_progress)
11167 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11168 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
11169 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11171 new_rtx = force_reg (Pmode, new_rtx);
11172 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11173 new_rtx = gen_const_mem (Pmode, new_rtx);
11174 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
11177 reg = gen_reg_rtx (Pmode);
11178 emit_move_insn (reg, new_rtx);
11184 if (CONST_INT_P (addr)
11185 && !x86_64_immediate_operand (addr, VOIDmode))
11189 emit_move_insn (reg, addr);
11193 new_rtx = force_reg (Pmode, addr);
11195 else if (GET_CODE (addr) == CONST)
11197 addr = XEXP (addr, 0);
11199 /* We must match stuff we generate before. Assume the only
11200 unspecs that can get here are ours. Not that we could do
11201 anything with them anyway.... */
11202 if (GET_CODE (addr) == UNSPEC
11203 || (GET_CODE (addr) == PLUS
11204 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
11206 gcc_assert (GET_CODE (addr) == PLUS);
11208 if (GET_CODE (addr) == PLUS)
11210 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11212 /* Check first to see if this is a constant offset from a @GOTOFF
11213 symbol reference. */
11214 if (gotoff_operand (op0, Pmode)
11215 && CONST_INT_P (op1))
11219 if (reload_in_progress)
11220 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11221 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11223 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11224 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11225 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11229 emit_move_insn (reg, new_rtx);
11235 if (INTVAL (op1) < -16*1024*1024
11236 || INTVAL (op1) >= 16*1024*1024)
11238 if (!x86_64_immediate_operand (op1, Pmode))
11239 op1 = force_reg (Pmode, op1);
11240 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11246 base = legitimize_pic_address (XEXP (addr, 0), reg);
11247 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11248 base == reg ? NULL_RTX : reg);
11250 if (CONST_INT_P (new_rtx))
11251 new_rtx = plus_constant (base, INTVAL (new_rtx));
11254 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11256 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11257 new_rtx = XEXP (new_rtx, 1);
11259 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11267 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11270 get_thread_pointer (int to_reg)
11274 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11278 reg = gen_reg_rtx (Pmode);
11279 insn = gen_rtx_SET (VOIDmode, reg, tp);
11280 insn = emit_insn (insn);
11285 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11286 false if we expect this to be used for a memory address and true if
11287 we expect to load the address into a register. */
11290 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11292 rtx dest, base, off, pic, tp;
11297 case TLS_MODEL_GLOBAL_DYNAMIC:
11298 dest = gen_reg_rtx (Pmode);
11299 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11301 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11303 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11306 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11307 insns = get_insns ();
11310 RTL_CONST_CALL_P (insns) = 1;
11311 emit_libcall_block (insns, dest, rax, x);
11313 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11314 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11316 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11318 if (TARGET_GNU2_TLS)
11320 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11322 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11326 case TLS_MODEL_LOCAL_DYNAMIC:
11327 base = gen_reg_rtx (Pmode);
11328 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11330 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11332 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11335 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11336 insns = get_insns ();
11339 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11340 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11341 RTL_CONST_CALL_P (insns) = 1;
11342 emit_libcall_block (insns, base, rax, note);
11344 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11345 emit_insn (gen_tls_local_dynamic_base_64 (base));
11347 emit_insn (gen_tls_local_dynamic_base_32 (base));
11349 if (TARGET_GNU2_TLS)
11351 rtx x = ix86_tls_module_base ();
11353 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11354 gen_rtx_MINUS (Pmode, x, tp));
11357 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11358 off = gen_rtx_CONST (Pmode, off);
11360 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11362 if (TARGET_GNU2_TLS)
11364 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11366 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11371 case TLS_MODEL_INITIAL_EXEC:
11375 type = UNSPEC_GOTNTPOFF;
11379 if (reload_in_progress)
11380 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11381 pic = pic_offset_table_rtx;
11382 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11384 else if (!TARGET_ANY_GNU_TLS)
11386 pic = gen_reg_rtx (Pmode);
11387 emit_insn (gen_set_got (pic));
11388 type = UNSPEC_GOTTPOFF;
11393 type = UNSPEC_INDNTPOFF;
11396 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11397 off = gen_rtx_CONST (Pmode, off);
11399 off = gen_rtx_PLUS (Pmode, pic, off);
11400 off = gen_const_mem (Pmode, off);
11401 set_mem_alias_set (off, ix86_GOT_alias_set ());
11403 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11405 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11406 off = force_reg (Pmode, off);
11407 return gen_rtx_PLUS (Pmode, base, off);
11411 base = get_thread_pointer (true);
11412 dest = gen_reg_rtx (Pmode);
11413 emit_insn (gen_subsi3 (dest, base, off));
11417 case TLS_MODEL_LOCAL_EXEC:
11418 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11419 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11420 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11421 off = gen_rtx_CONST (Pmode, off);
11423 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11425 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11426 return gen_rtx_PLUS (Pmode, base, off);
11430 base = get_thread_pointer (true);
11431 dest = gen_reg_rtx (Pmode);
11432 emit_insn (gen_subsi3 (dest, base, off));
11437 gcc_unreachable ();
11443 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11446 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11447 htab_t dllimport_map;
11450 get_dllimport_decl (tree decl)
11452 struct tree_map *h, in;
11455 const char *prefix;
11456 size_t namelen, prefixlen;
11461 if (!dllimport_map)
11462 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11464 in.hash = htab_hash_pointer (decl);
11465 in.base.from = decl;
11466 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11467 h = (struct tree_map *) *loc;
11471 *loc = h = ggc_alloc_tree_map ();
11473 h->base.from = decl;
11474 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11475 VAR_DECL, NULL, ptr_type_node);
11476 DECL_ARTIFICIAL (to) = 1;
11477 DECL_IGNORED_P (to) = 1;
11478 DECL_EXTERNAL (to) = 1;
11479 TREE_READONLY (to) = 1;
11481 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11482 name = targetm.strip_name_encoding (name);
11483 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11484 ? "*__imp_" : "*__imp__";
11485 namelen = strlen (name);
11486 prefixlen = strlen (prefix);
11487 imp_name = (char *) alloca (namelen + prefixlen + 1);
11488 memcpy (imp_name, prefix, prefixlen);
11489 memcpy (imp_name + prefixlen, name, namelen + 1);
11491 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11492 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11493 SET_SYMBOL_REF_DECL (rtl, to);
11494 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11496 rtl = gen_const_mem (Pmode, rtl);
11497 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11499 SET_DECL_RTL (to, rtl);
11500 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11505 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11506 true if we require the result be a register. */
11509 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11514 gcc_assert (SYMBOL_REF_DECL (symbol));
11515 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11517 x = DECL_RTL (imp_decl);
11519 x = force_reg (Pmode, x);
11523 /* Try machine-dependent ways of modifying an illegitimate address
11524 to be legitimate. If we find one, return the new, valid address.
11525 This macro is used in only one place: `memory_address' in explow.c.
11527 OLDX is the address as it was before break_out_memory_refs was called.
11528 In some cases it is useful to look at this to decide what needs to be done.
11530 It is always safe for this macro to do nothing. It exists to recognize
11531 opportunities to optimize the output.
11533 For the 80386, we handle X+REG by loading X into a register R and
11534 using R+REG. R will go in a general reg and indexing will be used.
11535 However, if REG is a broken-out memory address or multiplication,
11536 nothing needs to be done because REG can certainly go in a general reg.
11538 When -fpic is used, special handling is needed for symbolic references.
11539 See comments by legitimize_pic_address in i386.c for details. */
11542 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11543 enum machine_mode mode)
11548 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11550 return legitimize_tls_address (x, (enum tls_model) log, false);
11551 if (GET_CODE (x) == CONST
11552 && GET_CODE (XEXP (x, 0)) == PLUS
11553 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11554 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11556 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11557 (enum tls_model) log, false);
11558 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11561 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11563 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11564 return legitimize_dllimport_symbol (x, true);
11565 if (GET_CODE (x) == CONST
11566 && GET_CODE (XEXP (x, 0)) == PLUS
11567 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11568 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11570 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11571 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11575 if (flag_pic && SYMBOLIC_CONST (x))
11576 return legitimize_pic_address (x, 0);
11578 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11579 if (GET_CODE (x) == ASHIFT
11580 && CONST_INT_P (XEXP (x, 1))
11581 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11584 log = INTVAL (XEXP (x, 1));
11585 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11586 GEN_INT (1 << log));
11589 if (GET_CODE (x) == PLUS)
11591 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11593 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11594 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11595 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11598 log = INTVAL (XEXP (XEXP (x, 0), 1));
11599 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11600 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11601 GEN_INT (1 << log));
11604 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11605 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11606 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11609 log = INTVAL (XEXP (XEXP (x, 1), 1));
11610 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11611 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11612 GEN_INT (1 << log));
11615 /* Put multiply first if it isn't already. */
11616 if (GET_CODE (XEXP (x, 1)) == MULT)
11618 rtx tmp = XEXP (x, 0);
11619 XEXP (x, 0) = XEXP (x, 1);
11624 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11625 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11626 created by virtual register instantiation, register elimination, and
11627 similar optimizations. */
11628 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11631 x = gen_rtx_PLUS (Pmode,
11632 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11633 XEXP (XEXP (x, 1), 0)),
11634 XEXP (XEXP (x, 1), 1));
11638 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11639 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11640 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11641 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11642 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11643 && CONSTANT_P (XEXP (x, 1)))
11646 rtx other = NULL_RTX;
11648 if (CONST_INT_P (XEXP (x, 1)))
11650 constant = XEXP (x, 1);
11651 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11653 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11655 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11656 other = XEXP (x, 1);
11664 x = gen_rtx_PLUS (Pmode,
11665 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11666 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11667 plus_constant (other, INTVAL (constant)));
11671 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11674 if (GET_CODE (XEXP (x, 0)) == MULT)
11677 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
11680 if (GET_CODE (XEXP (x, 1)) == MULT)
11683 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
11687 && REG_P (XEXP (x, 1))
11688 && REG_P (XEXP (x, 0)))
11691 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11694 x = legitimize_pic_address (x, 0);
11697 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11700 if (REG_P (XEXP (x, 0)))
11702 rtx temp = gen_reg_rtx (Pmode);
11703 rtx val = force_operand (XEXP (x, 1), temp);
11705 emit_move_insn (temp, val);
11707 XEXP (x, 1) = temp;
11711 else if (REG_P (XEXP (x, 1)))
11713 rtx temp = gen_reg_rtx (Pmode);
11714 rtx val = force_operand (XEXP (x, 0), temp);
11716 emit_move_insn (temp, val);
11718 XEXP (x, 0) = temp;
11726 /* Print an integer constant expression in assembler syntax. Addition
11727 and subtraction are the only arithmetic that may appear in these
11728 expressions. FILE is the stdio stream to write to, X is the rtx, and
11729 CODE is the operand print code from the output string. */
11732 output_pic_addr_const (FILE *file, rtx x, int code)
11736 switch (GET_CODE (x))
11739 gcc_assert (flag_pic);
11744 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
11745 output_addr_const (file, x);
11748 const char *name = XSTR (x, 0);
11750 /* Mark the decl as referenced so that cgraph will
11751 output the function. */
11752 if (SYMBOL_REF_DECL (x))
11753 mark_decl_referenced (SYMBOL_REF_DECL (x));
11756 if (MACHOPIC_INDIRECT
11757 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11758 name = machopic_indirection_name (x, /*stub_p=*/true);
11760 assemble_name (file, name);
11762 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11763 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11764 fputs ("@PLT", file);
11771 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11772 assemble_name (asm_out_file, buf);
11776 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11780 /* This used to output parentheses around the expression,
11781 but that does not work on the 386 (either ATT or BSD assembler). */
11782 output_pic_addr_const (file, XEXP (x, 0), code);
11786 if (GET_MODE (x) == VOIDmode)
11788 /* We can use %d if the number is <32 bits and positive. */
11789 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
11790 fprintf (file, "0x%lx%08lx",
11791 (unsigned long) CONST_DOUBLE_HIGH (x),
11792 (unsigned long) CONST_DOUBLE_LOW (x));
11794 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
11797 /* We can't handle floating point constants;
11798 TARGET_PRINT_OPERAND must handle them. */
11799 output_operand_lossage ("floating constant misused");
11803 /* Some assemblers need integer constants to appear first. */
11804 if (CONST_INT_P (XEXP (x, 0)))
11806 output_pic_addr_const (file, XEXP (x, 0), code);
11808 output_pic_addr_const (file, XEXP (x, 1), code);
11812 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11813 output_pic_addr_const (file, XEXP (x, 1), code);
11815 output_pic_addr_const (file, XEXP (x, 0), code);
11821 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11822 output_pic_addr_const (file, XEXP (x, 0), code);
11824 output_pic_addr_const (file, XEXP (x, 1), code);
11826 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11830 gcc_assert (XVECLEN (x, 0) == 1);
11831 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11832 switch (XINT (x, 1))
11835 fputs ("@GOT", file);
11837 case UNSPEC_GOTOFF:
11838 fputs ("@GOTOFF", file);
11840 case UNSPEC_PLTOFF:
11841 fputs ("@PLTOFF", file);
11843 case UNSPEC_GOTPCREL:
11844 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11845 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11847 case UNSPEC_GOTTPOFF:
11848 /* FIXME: This might be @TPOFF in Sun ld too. */
11849 fputs ("@gottpoff", file);
11852 fputs ("@tpoff", file);
11854 case UNSPEC_NTPOFF:
11856 fputs ("@tpoff", file);
11858 fputs ("@ntpoff", file);
11860 case UNSPEC_DTPOFF:
11861 fputs ("@dtpoff", file);
11863 case UNSPEC_GOTNTPOFF:
11865 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11866 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11868 fputs ("@gotntpoff", file);
11870 case UNSPEC_INDNTPOFF:
11871 fputs ("@indntpoff", file);
11874 case UNSPEC_MACHOPIC_OFFSET:
11876 machopic_output_function_base_name (file);
11880 output_operand_lossage ("invalid UNSPEC as operand");
11886 output_operand_lossage ("invalid expression as operand");
11890 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11891 We need to emit DTP-relative relocations. */
11893 static void ATTRIBUTE_UNUSED
11894 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11896 fputs (ASM_LONG, file);
11897 output_addr_const (file, x);
11898 fputs ("@dtpoff", file);
11904 fputs (", 0", file);
11907 gcc_unreachable ();
11911 /* Return true if X is a representation of the PIC register. This copes
11912 with calls from ix86_find_base_term, where the register might have
11913 been replaced by a cselib value. */
11916 ix86_pic_register_p (rtx x)
11918 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11919 return (pic_offset_table_rtx
11920 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11922 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11925 /* In the name of slightly smaller debug output, and to cater to
11926 general assembler lossage, recognize PIC+GOTOFF and turn it back
11927 into a direct symbol reference.
11929 On Darwin, this is necessary to avoid a crash, because Darwin
11930 has a different PIC label for each routine but the DWARF debugging
11931 information is not associated with any particular routine, so it's
11932 necessary to remove references to the PIC label from RTL stored by
11933 the DWARF output code. */
11936 ix86_delegitimize_address (rtx x)
11938 rtx orig_x = delegitimize_mem_from_attrs (x);
11939 /* addend is NULL or some rtx if x is something+GOTOFF where
11940 something doesn't include the PIC register. */
11941 rtx addend = NULL_RTX;
11942 /* reg_addend is NULL or a multiple of some register. */
11943 rtx reg_addend = NULL_RTX;
11944 /* const_addend is NULL or a const_int. */
11945 rtx const_addend = NULL_RTX;
11946 /* This is the result, or NULL. */
11947 rtx result = NULL_RTX;
11956 if (GET_CODE (x) != CONST
11957 || GET_CODE (XEXP (x, 0)) != UNSPEC
11958 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
11959 || !MEM_P (orig_x))
11961 x = XVECEXP (XEXP (x, 0), 0, 0);
11962 if (GET_MODE (orig_x) != Pmode)
11963 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
11967 if (GET_CODE (x) != PLUS
11968 || GET_CODE (XEXP (x, 1)) != CONST)
11971 if (ix86_pic_register_p (XEXP (x, 0)))
11972 /* %ebx + GOT/GOTOFF */
11974 else if (GET_CODE (XEXP (x, 0)) == PLUS)
11976 /* %ebx + %reg * scale + GOT/GOTOFF */
11977 reg_addend = XEXP (x, 0);
11978 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11979 reg_addend = XEXP (reg_addend, 1);
11980 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11981 reg_addend = XEXP (reg_addend, 0);
11984 reg_addend = NULL_RTX;
11985 addend = XEXP (x, 0);
11989 addend = XEXP (x, 0);
11991 x = XEXP (XEXP (x, 1), 0);
11992 if (GET_CODE (x) == PLUS
11993 && CONST_INT_P (XEXP (x, 1)))
11995 const_addend = XEXP (x, 1);
11999 if (GET_CODE (x) == UNSPEC
12000 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
12001 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
12002 result = XVECEXP (x, 0, 0);
12004 if (TARGET_MACHO && darwin_local_data_pic (x)
12005 && !MEM_P (orig_x))
12006 result = XVECEXP (x, 0, 0);
12012 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
12014 result = gen_rtx_PLUS (Pmode, reg_addend, result);
12017 /* If the rest of original X doesn't involve the PIC register, add
12018 addend and subtract pic_offset_table_rtx. This can happen e.g.
12020 leal (%ebx, %ecx, 4), %ecx
12022 movl foo@GOTOFF(%ecx), %edx
12023 in which case we return (%ecx - %ebx) + foo. */
12024 if (pic_offset_table_rtx)
12025 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
12026 pic_offset_table_rtx),
12031 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
12032 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
12036 /* If X is a machine specific address (i.e. a symbol or label being
12037 referenced as a displacement from the GOT implemented using an
12038 UNSPEC), then return the base term. Otherwise return X. */
12041 ix86_find_base_term (rtx x)
12047 if (GET_CODE (x) != CONST)
12049 term = XEXP (x, 0);
12050 if (GET_CODE (term) == PLUS
12051 && (CONST_INT_P (XEXP (term, 1))
12052 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
12053 term = XEXP (term, 0);
12054 if (GET_CODE (term) != UNSPEC
12055 || XINT (term, 1) != UNSPEC_GOTPCREL)
12058 return XVECEXP (term, 0, 0);
12061 return ix86_delegitimize_address (x);
12065 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
12066 int fp, FILE *file)
12068 const char *suffix;
12070 if (mode == CCFPmode || mode == CCFPUmode)
12072 code = ix86_fp_compare_code_to_integer (code);
12076 code = reverse_condition (code);
12127 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
12131 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
12132 Those same assemblers have the same but opposite lossage on cmov. */
12133 if (mode == CCmode)
12134 suffix = fp ? "nbe" : "a";
12135 else if (mode == CCCmode)
12138 gcc_unreachable ();
12154 gcc_unreachable ();
12158 gcc_assert (mode == CCmode || mode == CCCmode);
12175 gcc_unreachable ();
12179 /* ??? As above. */
12180 gcc_assert (mode == CCmode || mode == CCCmode);
12181 suffix = fp ? "nb" : "ae";
12184 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
12188 /* ??? As above. */
12189 if (mode == CCmode)
12191 else if (mode == CCCmode)
12192 suffix = fp ? "nb" : "ae";
12194 gcc_unreachable ();
12197 suffix = fp ? "u" : "p";
12200 suffix = fp ? "nu" : "np";
12203 gcc_unreachable ();
12205 fputs (suffix, file);
12208 /* Print the name of register X to FILE based on its machine mode and number.
12209 If CODE is 'w', pretend the mode is HImode.
12210 If CODE is 'b', pretend the mode is QImode.
12211 If CODE is 'k', pretend the mode is SImode.
12212 If CODE is 'q', pretend the mode is DImode.
12213 If CODE is 'x', pretend the mode is V4SFmode.
12214 If CODE is 't', pretend the mode is V8SFmode.
12215 If CODE is 'h', pretend the reg is the 'high' byte register.
12216 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12217 If CODE is 'd', duplicate the operand for AVX instruction.
12221 print_reg (rtx x, int code, FILE *file)
12224 bool duplicated = code == 'd' && TARGET_AVX;
12226 gcc_assert (x == pc_rtx
12227 || (REGNO (x) != ARG_POINTER_REGNUM
12228 && REGNO (x) != FRAME_POINTER_REGNUM
12229 && REGNO (x) != FLAGS_REG
12230 && REGNO (x) != FPSR_REG
12231 && REGNO (x) != FPCR_REG));
12233 if (ASSEMBLER_DIALECT == ASM_ATT)
12238 gcc_assert (TARGET_64BIT);
12239 fputs ("rip", file);
12243 if (code == 'w' || MMX_REG_P (x))
12245 else if (code == 'b')
12247 else if (code == 'k')
12249 else if (code == 'q')
12251 else if (code == 'y')
12253 else if (code == 'h')
12255 else if (code == 'x')
12257 else if (code == 't')
12260 code = GET_MODE_SIZE (GET_MODE (x));
12262 /* Irritatingly, AMD extended registers use different naming convention
12263 from the normal registers. */
12264 if (REX_INT_REG_P (x))
12266 gcc_assert (TARGET_64BIT);
12270 error ("extended registers have no high halves");
12273 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12276 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12279 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12282 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12285 error ("unsupported operand size for extended register");
12295 if (STACK_TOP_P (x))
12304 if (! ANY_FP_REG_P (x))
12305 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12310 reg = hi_reg_name[REGNO (x)];
12313 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12315 reg = qi_reg_name[REGNO (x)];
12318 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12320 reg = qi_high_reg_name[REGNO (x)];
12325 gcc_assert (!duplicated);
12327 fputs (hi_reg_name[REGNO (x)] + 1, file);
12332 gcc_unreachable ();
12338 if (ASSEMBLER_DIALECT == ASM_ATT)
12339 fprintf (file, ", %%%s", reg);
12341 fprintf (file, ", %s", reg);
12345 /* Locate some local-dynamic symbol still in use by this function
12346 so that we can print its name in some tls_local_dynamic_base
12350 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12354 if (GET_CODE (x) == SYMBOL_REF
12355 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12357 cfun->machine->some_ld_name = XSTR (x, 0);
12364 static const char *
12365 get_some_local_dynamic_name (void)
12369 if (cfun->machine->some_ld_name)
12370 return cfun->machine->some_ld_name;
12372 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12373 if (NONDEBUG_INSN_P (insn)
12374 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12375 return cfun->machine->some_ld_name;
12380 /* Meaning of CODE:
12381 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12382 C -- print opcode suffix for set/cmov insn.
12383 c -- like C, but print reversed condition
12384 F,f -- likewise, but for floating-point.
12385 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12387 R -- print the prefix for register names.
12388 z -- print the opcode suffix for the size of the current operand.
12389 Z -- likewise, with special suffixes for x87 instructions.
12390 * -- print a star (in certain assembler syntax)
12391 A -- print an absolute memory reference.
12392 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12393 s -- print a shift double count, followed by the assemblers argument
12395 b -- print the QImode name of the register for the indicated operand.
12396 %b0 would print %al if operands[0] is reg 0.
12397 w -- likewise, print the HImode name of the register.
12398 k -- likewise, print the SImode name of the register.
12399 q -- likewise, print the DImode name of the register.
12400 x -- likewise, print the V4SFmode name of the register.
12401 t -- likewise, print the V8SFmode name of the register.
12402 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12403 y -- print "st(0)" instead of "st" as a register.
12404 d -- print duplicated register operand for AVX instruction.
12405 D -- print condition for SSE cmp instruction.
12406 P -- if PIC, print an @PLT suffix.
12407 X -- don't print any sort of PIC '@' suffix for a symbol.
12408 & -- print some in-use local-dynamic symbol name.
12409 H -- print a memory address offset by 8; used for sse high-parts
12410 Y -- print condition for XOP pcom* instruction.
12411 + -- print a branch hint as 'cs' or 'ds' prefix
12412 ; -- print a semicolon (after prefixes due to bug in older gas).
12416 ix86_print_operand (FILE *file, rtx x, int code)
12423 if (ASSEMBLER_DIALECT == ASM_ATT)
12429 const char *name = get_some_local_dynamic_name ();
12431 output_operand_lossage ("'%%&' used without any "
12432 "local dynamic TLS references");
12434 assemble_name (file, name);
12439 switch (ASSEMBLER_DIALECT)
12446 /* Intel syntax. For absolute addresses, registers should not
12447 be surrounded by braces. */
12451 ix86_print_operand (file, x, 0);
12458 gcc_unreachable ();
12461 ix86_print_operand (file, x, 0);
12466 if (ASSEMBLER_DIALECT == ASM_ATT)
12471 if (ASSEMBLER_DIALECT == ASM_ATT)
12476 if (ASSEMBLER_DIALECT == ASM_ATT)
12481 if (ASSEMBLER_DIALECT == ASM_ATT)
12486 if (ASSEMBLER_DIALECT == ASM_ATT)
12491 if (ASSEMBLER_DIALECT == ASM_ATT)
12496 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12498 /* Opcodes don't get size suffixes if using Intel opcodes. */
12499 if (ASSEMBLER_DIALECT == ASM_INTEL)
12502 switch (GET_MODE_SIZE (GET_MODE (x)))
12521 output_operand_lossage
12522 ("invalid operand size for operand code '%c'", code);
12527 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12529 (0, "non-integer operand used with operand code '%c'", code);
12533 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12534 if (ASSEMBLER_DIALECT == ASM_INTEL)
12537 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12539 switch (GET_MODE_SIZE (GET_MODE (x)))
12542 #ifdef HAVE_AS_IX86_FILDS
12552 #ifdef HAVE_AS_IX86_FILDQ
12555 fputs ("ll", file);
12563 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12565 /* 387 opcodes don't get size suffixes
12566 if the operands are registers. */
12567 if (STACK_REG_P (x))
12570 switch (GET_MODE_SIZE (GET_MODE (x)))
12591 output_operand_lossage
12592 ("invalid operand type used with operand code '%c'", code);
12596 output_operand_lossage
12597 ("invalid operand size for operand code '%c'", code);
12614 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12616 ix86_print_operand (file, x, 0);
12617 fputs (", ", file);
12622 /* Little bit of braindamage here. The SSE compare instructions
12623 does use completely different names for the comparisons that the
12624 fp conditional moves. */
12627 switch (GET_CODE (x))
12630 fputs ("eq", file);
12633 fputs ("eq_us", file);
12636 fputs ("lt", file);
12639 fputs ("nge", file);
12642 fputs ("le", file);
12645 fputs ("ngt", file);
12648 fputs ("unord", file);
12651 fputs ("neq", file);
12654 fputs ("neq_oq", file);
12657 fputs ("ge", file);
12660 fputs ("nlt", file);
12663 fputs ("gt", file);
12666 fputs ("nle", file);
12669 fputs ("ord", file);
12672 output_operand_lossage ("operand is not a condition code, "
12673 "invalid operand code 'D'");
12679 switch (GET_CODE (x))
12683 fputs ("eq", file);
12687 fputs ("lt", file);
12691 fputs ("le", file);
12694 fputs ("unord", file);
12698 fputs ("neq", file);
12702 fputs ("nlt", file);
12706 fputs ("nle", file);
12709 fputs ("ord", file);
12712 output_operand_lossage ("operand is not a condition code, "
12713 "invalid operand code 'D'");
12719 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12720 if (ASSEMBLER_DIALECT == ASM_ATT)
12722 switch (GET_MODE (x))
12724 case HImode: putc ('w', file); break;
12726 case SFmode: putc ('l', file); break;
12728 case DFmode: putc ('q', file); break;
12729 default: gcc_unreachable ();
12736 if (!COMPARISON_P (x))
12738 output_operand_lossage ("operand is neither a constant nor a "
12739 "condition code, invalid operand code "
12743 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
12746 if (!COMPARISON_P (x))
12748 output_operand_lossage ("operand is neither a constant nor a "
12749 "condition code, invalid operand code "
12753 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12754 if (ASSEMBLER_DIALECT == ASM_ATT)
12757 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
12760 /* Like above, but reverse condition */
12762 /* Check to see if argument to %c is really a constant
12763 and not a condition code which needs to be reversed. */
12764 if (!COMPARISON_P (x))
12766 output_operand_lossage ("operand is neither a constant nor a "
12767 "condition code, invalid operand "
12771 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
12774 if (!COMPARISON_P (x))
12776 output_operand_lossage ("operand is neither a constant nor a "
12777 "condition code, invalid operand "
12781 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12782 if (ASSEMBLER_DIALECT == ASM_ATT)
12785 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
12789 /* It doesn't actually matter what mode we use here, as we're
12790 only going to use this for printing. */
12791 x = adjust_address_nv (x, DImode, 8);
12799 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
12802 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12805 int pred_val = INTVAL (XEXP (x, 0));
12807 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12808 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12810 int taken = pred_val > REG_BR_PROB_BASE / 2;
12811 int cputaken = final_forward_branch_p (current_output_insn) == 0;
12813 /* Emit hints only in the case default branch prediction
12814 heuristics would fail. */
12815 if (taken != cputaken)
12817 /* We use 3e (DS) prefix for taken branches and
12818 2e (CS) prefix for not taken branches. */
12820 fputs ("ds ; ", file);
12822 fputs ("cs ; ", file);
12830 switch (GET_CODE (x))
12833 fputs ("neq", file);
12836 fputs ("eq", file);
12840 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12844 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12848 fputs ("le", file);
12852 fputs ("lt", file);
12855 fputs ("unord", file);
12858 fputs ("ord", file);
12861 fputs ("ueq", file);
12864 fputs ("nlt", file);
12867 fputs ("nle", file);
12870 fputs ("ule", file);
12873 fputs ("ult", file);
12876 fputs ("une", file);
12879 output_operand_lossage ("operand is not a condition code, "
12880 "invalid operand code 'Y'");
12886 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12892 output_operand_lossage ("invalid operand code '%c'", code);
12897 print_reg (x, code, file);
12899 else if (MEM_P (x))
12901 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
12902 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
12903 && GET_MODE (x) != BLKmode)
12906 switch (GET_MODE_SIZE (GET_MODE (x)))
12908 case 1: size = "BYTE"; break;
12909 case 2: size = "WORD"; break;
12910 case 4: size = "DWORD"; break;
12911 case 8: size = "QWORD"; break;
12912 case 12: size = "TBYTE"; break;
12914 if (GET_MODE (x) == XFmode)
12919 case 32: size = "YMMWORD"; break;
12921 gcc_unreachable ();
12924 /* Check for explicit size override (codes 'b', 'w' and 'k') */
12927 else if (code == 'w')
12929 else if (code == 'k')
12932 fputs (size, file);
12933 fputs (" PTR ", file);
12937 /* Avoid (%rip) for call operands. */
12938 if (CONSTANT_ADDRESS_P (x) && code == 'P'
12939 && !CONST_INT_P (x))
12940 output_addr_const (file, x);
12941 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
12942 output_operand_lossage ("invalid constraints for operand");
12944 output_address (x);
12947 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
12952 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12953 REAL_VALUE_TO_TARGET_SINGLE (r, l);
12955 if (ASSEMBLER_DIALECT == ASM_ATT)
12957 /* Sign extend 32bit SFmode immediate to 8 bytes. */
12959 fprintf (file, "0x%08llx", (unsigned long long) (int) l);
12961 fprintf (file, "0x%08x", (unsigned int) l);
12964 /* These float cases don't actually occur as immediate operands. */
12965 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
12969 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12970 fputs (dstr, file);
12973 else if (GET_CODE (x) == CONST_DOUBLE
12974 && GET_MODE (x) == XFmode)
12978 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12979 fputs (dstr, file);
12984 /* We have patterns that allow zero sets of memory, for instance.
12985 In 64-bit mode, we should probably support all 8-byte vectors,
12986 since we can in fact encode that into an immediate. */
12987 if (GET_CODE (x) == CONST_VECTOR)
12989 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
12995 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
12997 if (ASSEMBLER_DIALECT == ASM_ATT)
13000 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
13001 || GET_CODE (x) == LABEL_REF)
13003 if (ASSEMBLER_DIALECT == ASM_ATT)
13006 fputs ("OFFSET FLAT:", file);
13009 if (CONST_INT_P (x))
13010 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
13012 output_pic_addr_const (file, x, code);
13014 output_addr_const (file, x);
13019 ix86_print_operand_punct_valid_p (unsigned char code)
13021 return (code == '*' || code == '+' || code == '&' || code == ';');
13024 /* Print a memory operand whose address is ADDR. */
13027 ix86_print_operand_address (FILE *file, rtx addr)
13029 struct ix86_address parts;
13030 rtx base, index, disp;
13032 int ok = ix86_decompose_address (addr, &parts);
13037 index = parts.index;
13039 scale = parts.scale;
13047 if (ASSEMBLER_DIALECT == ASM_ATT)
13049 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
13052 gcc_unreachable ();
13055 /* Use one byte shorter RIP relative addressing for 64bit mode. */
13056 if (TARGET_64BIT && !base && !index)
13060 if (GET_CODE (disp) == CONST
13061 && GET_CODE (XEXP (disp, 0)) == PLUS
13062 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13063 symbol = XEXP (XEXP (disp, 0), 0);
13065 if (GET_CODE (symbol) == LABEL_REF
13066 || (GET_CODE (symbol) == SYMBOL_REF
13067 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
13070 if (!base && !index)
13072 /* Displacement only requires special attention. */
13074 if (CONST_INT_P (disp))
13076 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
13077 fputs ("ds:", file);
13078 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
13081 output_pic_addr_const (file, disp, 0);
13083 output_addr_const (file, disp);
13087 if (ASSEMBLER_DIALECT == ASM_ATT)
13092 output_pic_addr_const (file, disp, 0);
13093 else if (GET_CODE (disp) == LABEL_REF)
13094 output_asm_label (disp);
13096 output_addr_const (file, disp);
13101 print_reg (base, 0, file);
13105 print_reg (index, 0, file);
13107 fprintf (file, ",%d", scale);
13113 rtx offset = NULL_RTX;
13117 /* Pull out the offset of a symbol; print any symbol itself. */
13118 if (GET_CODE (disp) == CONST
13119 && GET_CODE (XEXP (disp, 0)) == PLUS
13120 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
13122 offset = XEXP (XEXP (disp, 0), 1);
13123 disp = gen_rtx_CONST (VOIDmode,
13124 XEXP (XEXP (disp, 0), 0));
13128 output_pic_addr_const (file, disp, 0);
13129 else if (GET_CODE (disp) == LABEL_REF)
13130 output_asm_label (disp);
13131 else if (CONST_INT_P (disp))
13134 output_addr_const (file, disp);
13140 print_reg (base, 0, file);
13143 if (INTVAL (offset) >= 0)
13145 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13149 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
13156 print_reg (index, 0, file);
13158 fprintf (file, "*%d", scale);
13165 /* Implementation of TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
13168 i386_asm_output_addr_const_extra (FILE *file, rtx x)
13172 if (GET_CODE (x) != UNSPEC)
13175 op = XVECEXP (x, 0, 0);
13176 switch (XINT (x, 1))
13178 case UNSPEC_GOTTPOFF:
13179 output_addr_const (file, op);
13180 /* FIXME: This might be @TPOFF in Sun ld. */
13181 fputs ("@gottpoff", file);
13184 output_addr_const (file, op);
13185 fputs ("@tpoff", file);
13187 case UNSPEC_NTPOFF:
13188 output_addr_const (file, op);
13190 fputs ("@tpoff", file);
13192 fputs ("@ntpoff", file);
13194 case UNSPEC_DTPOFF:
13195 output_addr_const (file, op);
13196 fputs ("@dtpoff", file);
13198 case UNSPEC_GOTNTPOFF:
13199 output_addr_const (file, op);
13201 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
13202 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
13204 fputs ("@gotntpoff", file);
13206 case UNSPEC_INDNTPOFF:
13207 output_addr_const (file, op);
13208 fputs ("@indntpoff", file);
13211 case UNSPEC_MACHOPIC_OFFSET:
13212 output_addr_const (file, op);
13214 machopic_output_function_base_name (file);
13225 /* Split one or more DImode RTL references into pairs of SImode
13226 references. The RTL can be REG, offsettable MEM, integer constant, or
13227 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13228 split and "num" is its length. lo_half and hi_half are output arrays
13229 that parallel "operands". */
13232 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13236 rtx op = operands[num];
13238 /* simplify_subreg refuse to split volatile memory addresses,
13239 but we still have to handle it. */
13242 lo_half[num] = adjust_address (op, SImode, 0);
13243 hi_half[num] = adjust_address (op, SImode, 4);
13247 lo_half[num] = simplify_gen_subreg (SImode, op,
13248 GET_MODE (op) == VOIDmode
13249 ? DImode : GET_MODE (op), 0);
13250 hi_half[num] = simplify_gen_subreg (SImode, op,
13251 GET_MODE (op) == VOIDmode
13252 ? DImode : GET_MODE (op), 4);
13256 /* Split one or more TImode RTL references into pairs of DImode
13257 references. The RTL can be REG, offsettable MEM, integer constant, or
13258 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13259 split and "num" is its length. lo_half and hi_half are output arrays
13260 that parallel "operands". */
13263 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13267 rtx op = operands[num];
13269 /* simplify_subreg refuse to split volatile memory addresses, but we
13270 still have to handle it. */
13273 lo_half[num] = adjust_address (op, DImode, 0);
13274 hi_half[num] = adjust_address (op, DImode, 8);
13278 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
13279 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
13284 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13285 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13286 is the expression of the binary operation. The output may either be
13287 emitted here, or returned to the caller, like all output_* functions.
13289 There is no guarantee that the operands are the same mode, as they
13290 might be within FLOAT or FLOAT_EXTEND expressions. */
13292 #ifndef SYSV386_COMPAT
13293 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13294 wants to fix the assemblers because that causes incompatibility
13295 with gcc. No-one wants to fix gcc because that causes
13296 incompatibility with assemblers... You can use the option of
13297 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13298 #define SYSV386_COMPAT 1
13302 output_387_binary_op (rtx insn, rtx *operands)
13304 static char buf[40];
13307 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13309 #ifdef ENABLE_CHECKING
13310 /* Even if we do not want to check the inputs, this documents input
13311 constraints. Which helps in understanding the following code. */
13312 if (STACK_REG_P (operands[0])
13313 && ((REG_P (operands[1])
13314 && REGNO (operands[0]) == REGNO (operands[1])
13315 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13316 || (REG_P (operands[2])
13317 && REGNO (operands[0]) == REGNO (operands[2])
13318 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13319 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13322 gcc_assert (is_sse);
13325 switch (GET_CODE (operands[3]))
13328 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13329 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13337 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13338 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13346 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13347 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13355 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13356 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13364 gcc_unreachable ();
13371 strcpy (buf, ssep);
13372 if (GET_MODE (operands[0]) == SFmode)
13373 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13375 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13379 strcpy (buf, ssep + 1);
13380 if (GET_MODE (operands[0]) == SFmode)
13381 strcat (buf, "ss\t{%2, %0|%0, %2}");
13383 strcat (buf, "sd\t{%2, %0|%0, %2}");
13389 switch (GET_CODE (operands[3]))
13393 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13395 rtx temp = operands[2];
13396 operands[2] = operands[1];
13397 operands[1] = temp;
13400 /* know operands[0] == operands[1]. */
13402 if (MEM_P (operands[2]))
13408 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13410 if (STACK_TOP_P (operands[0]))
13411 /* How is it that we are storing to a dead operand[2]?
13412 Well, presumably operands[1] is dead too. We can't
13413 store the result to st(0) as st(0) gets popped on this
13414 instruction. Instead store to operands[2] (which I
13415 think has to be st(1)). st(1) will be popped later.
13416 gcc <= 2.8.1 didn't have this check and generated
13417 assembly code that the Unixware assembler rejected. */
13418 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13420 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13424 if (STACK_TOP_P (operands[0]))
13425 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13427 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13432 if (MEM_P (operands[1]))
13438 if (MEM_P (operands[2]))
13444 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13447 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13448 derived assemblers, confusingly reverse the direction of
13449 the operation for fsub{r} and fdiv{r} when the
13450 destination register is not st(0). The Intel assembler
13451 doesn't have this brain damage. Read !SYSV386_COMPAT to
13452 figure out what the hardware really does. */
13453 if (STACK_TOP_P (operands[0]))
13454 p = "{p\t%0, %2|rp\t%2, %0}";
13456 p = "{rp\t%2, %0|p\t%0, %2}";
13458 if (STACK_TOP_P (operands[0]))
13459 /* As above for fmul/fadd, we can't store to st(0). */
13460 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13462 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13467 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13470 if (STACK_TOP_P (operands[0]))
13471 p = "{rp\t%0, %1|p\t%1, %0}";
13473 p = "{p\t%1, %0|rp\t%0, %1}";
13475 if (STACK_TOP_P (operands[0]))
13476 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13478 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13483 if (STACK_TOP_P (operands[0]))
13485 if (STACK_TOP_P (operands[1]))
13486 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13488 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13491 else if (STACK_TOP_P (operands[1]))
13494 p = "{\t%1, %0|r\t%0, %1}";
13496 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13502 p = "{r\t%2, %0|\t%0, %2}";
13504 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13510 gcc_unreachable ();
13517 /* Return needed mode for entity in optimize_mode_switching pass. */
13520 ix86_mode_needed (int entity, rtx insn)
13522 enum attr_i387_cw mode;
13524 /* The mode UNINITIALIZED is used to store control word after a
13525 function call or ASM pattern. The mode ANY specify that function
13526 has no requirements on the control word and make no changes in the
13527 bits we are interested in. */
13530 || (NONJUMP_INSN_P (insn)
13531 && (asm_noperands (PATTERN (insn)) >= 0
13532 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13533 return I387_CW_UNINITIALIZED;
13535 if (recog_memoized (insn) < 0)
13536 return I387_CW_ANY;
13538 mode = get_attr_i387_cw (insn);
13543 if (mode == I387_CW_TRUNC)
13548 if (mode == I387_CW_FLOOR)
13553 if (mode == I387_CW_CEIL)
13558 if (mode == I387_CW_MASK_PM)
13563 gcc_unreachable ();
13566 return I387_CW_ANY;
13569 /* Output code to initialize control word copies used by trunc?f?i and
13570 rounding patterns. CURRENT_MODE is set to current control word,
13571 while NEW_MODE is set to new control word. */
13574 emit_i387_cw_initialization (int mode)
13576 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13579 enum ix86_stack_slot slot;
13581 rtx reg = gen_reg_rtx (HImode);
13583 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13584 emit_move_insn (reg, copy_rtx (stored_mode));
13586 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13587 || optimize_function_for_size_p (cfun))
13591 case I387_CW_TRUNC:
13592 /* round toward zero (truncate) */
13593 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13594 slot = SLOT_CW_TRUNC;
13597 case I387_CW_FLOOR:
13598 /* round down toward -oo */
13599 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13600 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
13601 slot = SLOT_CW_FLOOR;
13605 /* round up toward +oo */
13606 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13607 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
13608 slot = SLOT_CW_CEIL;
13611 case I387_CW_MASK_PM:
13612 /* mask precision exception for nearbyint() */
13613 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13614 slot = SLOT_CW_MASK_PM;
13618 gcc_unreachable ();
13625 case I387_CW_TRUNC:
13626 /* round toward zero (truncate) */
13627 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
13628 slot = SLOT_CW_TRUNC;
13631 case I387_CW_FLOOR:
13632 /* round down toward -oo */
13633 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
13634 slot = SLOT_CW_FLOOR;
13638 /* round up toward +oo */
13639 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
13640 slot = SLOT_CW_CEIL;
13643 case I387_CW_MASK_PM:
13644 /* mask precision exception for nearbyint() */
13645 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13646 slot = SLOT_CW_MASK_PM;
13650 gcc_unreachable ();
13654 gcc_assert (slot < MAX_386_STACK_LOCALS);
13656 new_mode = assign_386_stack_local (HImode, slot);
13657 emit_move_insn (new_mode, reg);
13660 /* Output code for INSN to convert a float to a signed int. OPERANDS
13661 are the insn operands. The output may be [HSD]Imode and the input
13662 operand may be [SDX]Fmode. */
13665 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
13667 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13668 int dimode_p = GET_MODE (operands[0]) == DImode;
13669 int round_mode = get_attr_i387_cw (insn);
13671 /* Jump through a hoop or two for DImode, since the hardware has no
13672 non-popping instruction. We used to do this a different way, but
13673 that was somewhat fragile and broke with post-reload splitters. */
13674 if ((dimode_p || fisttp) && !stack_top_dies)
13675 output_asm_insn ("fld\t%y1", operands);
13677 gcc_assert (STACK_TOP_P (operands[1]));
13678 gcc_assert (MEM_P (operands[0]));
13679 gcc_assert (GET_MODE (operands[1]) != TFmode);
13682 output_asm_insn ("fisttp%Z0\t%0", operands);
13685 if (round_mode != I387_CW_ANY)
13686 output_asm_insn ("fldcw\t%3", operands);
13687 if (stack_top_dies || dimode_p)
13688 output_asm_insn ("fistp%Z0\t%0", operands);
13690 output_asm_insn ("fist%Z0\t%0", operands);
13691 if (round_mode != I387_CW_ANY)
13692 output_asm_insn ("fldcw\t%2", operands);
13698 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13699 have the values zero or one, indicates the ffreep insn's operand
13700 from the OPERANDS array. */
13702 static const char *
13703 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
13705 if (TARGET_USE_FFREEP)
13706 #ifdef HAVE_AS_IX86_FFREEP
13707 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
13710 static char retval[32];
13711 int regno = REGNO (operands[opno]);
13713 gcc_assert (FP_REGNO_P (regno));
13715 regno -= FIRST_STACK_REG;
13717 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
13722 return opno ? "fstp\t%y1" : "fstp\t%y0";
13726 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13727 should be used. UNORDERED_P is true when fucom should be used. */
13730 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
13732 int stack_top_dies;
13733 rtx cmp_op0, cmp_op1;
13734 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
13738 cmp_op0 = operands[0];
13739 cmp_op1 = operands[1];
13743 cmp_op0 = operands[1];
13744 cmp_op1 = operands[2];
13749 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
13750 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
13751 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
13752 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
13754 if (GET_MODE (operands[0]) == SFmode)
13756 return &ucomiss[TARGET_AVX ? 0 : 1];
13758 return &comiss[TARGET_AVX ? 0 : 1];
13761 return &ucomisd[TARGET_AVX ? 0 : 1];
13763 return &comisd[TARGET_AVX ? 0 : 1];
13766 gcc_assert (STACK_TOP_P (cmp_op0));
13768 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13770 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
13772 if (stack_top_dies)
13774 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
13775 return output_387_ffreep (operands, 1);
13778 return "ftst\n\tfnstsw\t%0";
13781 if (STACK_REG_P (cmp_op1)
13783 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
13784 && REGNO (cmp_op1) != FIRST_STACK_REG)
13786 /* If both the top of the 387 stack dies, and the other operand
13787 is also a stack register that dies, then this must be a
13788 `fcompp' float compare */
13792 /* There is no double popping fcomi variant. Fortunately,
13793 eflags is immune from the fstp's cc clobbering. */
13795 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
13797 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
13798 return output_387_ffreep (operands, 0);
13803 return "fucompp\n\tfnstsw\t%0";
13805 return "fcompp\n\tfnstsw\t%0";
13810 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
13812 static const char * const alt[16] =
13814 "fcom%Z2\t%y2\n\tfnstsw\t%0",
13815 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
13816 "fucom%Z2\t%y2\n\tfnstsw\t%0",
13817 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
13819 "ficom%Z2\t%y2\n\tfnstsw\t%0",
13820 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
13824 "fcomi\t{%y1, %0|%0, %y1}",
13825 "fcomip\t{%y1, %0|%0, %y1}",
13826 "fucomi\t{%y1, %0|%0, %y1}",
13827 "fucomip\t{%y1, %0|%0, %y1}",
13838 mask = eflags_p << 3;
13839 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
13840 mask |= unordered_p << 1;
13841 mask |= stack_top_dies;
13843 gcc_assert (mask < 16);
13852 ix86_output_addr_vec_elt (FILE *file, int value)
13854 const char *directive = ASM_LONG;
13858 directive = ASM_QUAD;
13860 gcc_assert (!TARGET_64BIT);
13863 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
13867 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
13869 const char *directive = ASM_LONG;
13872 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
13873 directive = ASM_QUAD;
13875 gcc_assert (!TARGET_64BIT);
13877 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
13878 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
13879 fprintf (file, "%s%s%d-%s%d\n",
13880 directive, LPREFIX, value, LPREFIX, rel);
13881 else if (HAVE_AS_GOTOFF_IN_DATA)
13882 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
13884 else if (TARGET_MACHO)
13886 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
13887 machopic_output_function_base_name (file);
13892 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
13893 GOT_SYMBOL_NAME, LPREFIX, value);
13896 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
13900 ix86_expand_clear (rtx dest)
13904 /* We play register width games, which are only valid after reload. */
13905 gcc_assert (reload_completed);
13907 /* Avoid HImode and its attendant prefix byte. */
13908 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
13909 dest = gen_rtx_REG (SImode, REGNO (dest));
13910 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
13912 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
13913 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
13915 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13916 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
13922 /* X is an unchanging MEM. If it is a constant pool reference, return
13923 the constant pool rtx, else NULL. */
13926 maybe_get_pool_constant (rtx x)
13928 x = ix86_delegitimize_address (XEXP (x, 0));
13930 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
13931 return get_pool_constant (x);
13937 ix86_expand_move (enum machine_mode mode, rtx operands[])
13940 enum tls_model model;
13945 if (GET_CODE (op1) == SYMBOL_REF)
13947 model = SYMBOL_REF_TLS_MODEL (op1);
13950 op1 = legitimize_tls_address (op1, model, true);
13951 op1 = force_operand (op1, op0);
13955 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13956 && SYMBOL_REF_DLLIMPORT_P (op1))
13957 op1 = legitimize_dllimport_symbol (op1, false);
13959 else if (GET_CODE (op1) == CONST
13960 && GET_CODE (XEXP (op1, 0)) == PLUS
13961 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
13963 rtx addend = XEXP (XEXP (op1, 0), 1);
13964 rtx symbol = XEXP (XEXP (op1, 0), 0);
13967 model = SYMBOL_REF_TLS_MODEL (symbol);
13969 tmp = legitimize_tls_address (symbol, model, true);
13970 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13971 && SYMBOL_REF_DLLIMPORT_P (symbol))
13972 tmp = legitimize_dllimport_symbol (symbol, true);
13976 tmp = force_operand (tmp, NULL);
13977 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
13978 op0, 1, OPTAB_DIRECT);
13984 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
13986 if (TARGET_MACHO && !TARGET_64BIT)
13991 rtx temp = ((reload_in_progress
13992 || ((op0 && REG_P (op0))
13994 ? op0 : gen_reg_rtx (Pmode));
13995 op1 = machopic_indirect_data_reference (op1, temp);
13996 op1 = machopic_legitimize_pic_address (op1, mode,
13997 temp == op1 ? 0 : temp);
13999 else if (MACHOPIC_INDIRECT)
14000 op1 = machopic_indirect_data_reference (op1, 0);
14008 op1 = force_reg (Pmode, op1);
14009 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
14011 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
14012 op1 = legitimize_pic_address (op1, reg);
14021 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
14022 || !push_operand (op0, mode))
14024 op1 = force_reg (mode, op1);
14026 if (push_operand (op0, mode)
14027 && ! general_no_elim_operand (op1, mode))
14028 op1 = copy_to_mode_reg (mode, op1);
14030 /* Force large constants in 64bit compilation into register
14031 to get them CSEed. */
14032 if (can_create_pseudo_p ()
14033 && (mode == DImode) && TARGET_64BIT
14034 && immediate_operand (op1, mode)
14035 && !x86_64_zext_immediate_operand (op1, VOIDmode)
14036 && !register_operand (op0, mode)
14038 op1 = copy_to_mode_reg (mode, op1);
14040 if (can_create_pseudo_p ()
14041 && FLOAT_MODE_P (mode)
14042 && GET_CODE (op1) == CONST_DOUBLE)
14044 /* If we are loading a floating point constant to a register,
14045 force the value to memory now, since we'll get better code
14046 out the back end. */
14048 op1 = validize_mem (force_const_mem (mode, op1));
14049 if (!register_operand (op0, mode))
14051 rtx temp = gen_reg_rtx (mode);
14052 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
14053 emit_move_insn (op0, temp);
14059 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14063 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
14065 rtx op0 = operands[0], op1 = operands[1];
14066 unsigned int align = GET_MODE_ALIGNMENT (mode);
14068 /* Force constants other than zero into memory. We do not know how
14069 the instructions used to build constants modify the upper 64 bits
14070 of the register, once we have that information we may be able
14071 to handle some of them more efficiently. */
14072 if (can_create_pseudo_p ()
14073 && register_operand (op0, mode)
14074 && (CONSTANT_P (op1)
14075 || (GET_CODE (op1) == SUBREG
14076 && CONSTANT_P (SUBREG_REG (op1))))
14077 && !standard_sse_constant_p (op1))
14078 op1 = validize_mem (force_const_mem (mode, op1));
14080 /* We need to check memory alignment for SSE mode since attribute
14081 can make operands unaligned. */
14082 if (can_create_pseudo_p ()
14083 && SSE_REG_MODE_P (mode)
14084 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
14085 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
14089 /* ix86_expand_vector_move_misalign() does not like constants ... */
14090 if (CONSTANT_P (op1)
14091 || (GET_CODE (op1) == SUBREG
14092 && CONSTANT_P (SUBREG_REG (op1))))
14093 op1 = validize_mem (force_const_mem (mode, op1));
14095 /* ... nor both arguments in memory. */
14096 if (!register_operand (op0, mode)
14097 && !register_operand (op1, mode))
14098 op1 = force_reg (mode, op1);
14100 tmp[0] = op0; tmp[1] = op1;
14101 ix86_expand_vector_move_misalign (mode, tmp);
14105 /* Make operand1 a register if it isn't already. */
14106 if (can_create_pseudo_p ()
14107 && !register_operand (op0, mode)
14108 && !register_operand (op1, mode))
14110 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
14114 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
14117 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
14118 straight to ix86_expand_vector_move. */
14119 /* Code generation for scalar reg-reg moves of single and double precision data:
14120 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
14124 if (x86_sse_partial_reg_dependency == true)
14129 Code generation for scalar loads of double precision data:
14130 if (x86_sse_split_regs == true)
14131 movlpd mem, reg (gas syntax)
14135 Code generation for unaligned packed loads of single precision data
14136 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
14137 if (x86_sse_unaligned_move_optimal)
14140 if (x86_sse_partial_reg_dependency == true)
14152 Code generation for unaligned packed loads of double precision data
14153 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
14154 if (x86_sse_unaligned_move_optimal)
14157 if (x86_sse_split_regs == true)
14170 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
14179 switch (GET_MODE_CLASS (mode))
14181 case MODE_VECTOR_INT:
14183 switch (GET_MODE_SIZE (mode))
14186 /* If we're optimizing for size, movups is the smallest. */
14187 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14189 op0 = gen_lowpart (V4SFmode, op0);
14190 op1 = gen_lowpart (V4SFmode, op1);
14191 emit_insn (gen_avx_movups (op0, op1));
14194 op0 = gen_lowpart (V16QImode, op0);
14195 op1 = gen_lowpart (V16QImode, op1);
14196 emit_insn (gen_avx_movdqu (op0, op1));
14199 op0 = gen_lowpart (V32QImode, op0);
14200 op1 = gen_lowpart (V32QImode, op1);
14201 emit_insn (gen_avx_movdqu256 (op0, op1));
14204 gcc_unreachable ();
14207 case MODE_VECTOR_FLOAT:
14208 op0 = gen_lowpart (mode, op0);
14209 op1 = gen_lowpart (mode, op1);
14214 emit_insn (gen_avx_movups (op0, op1));
14217 emit_insn (gen_avx_movups256 (op0, op1));
14220 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14222 op0 = gen_lowpart (V4SFmode, op0);
14223 op1 = gen_lowpart (V4SFmode, op1);
14224 emit_insn (gen_avx_movups (op0, op1));
14227 emit_insn (gen_avx_movupd (op0, op1));
14230 emit_insn (gen_avx_movupd256 (op0, op1));
14233 gcc_unreachable ();
14238 gcc_unreachable ();
14246 /* If we're optimizing for size, movups is the smallest. */
14247 if (optimize_insn_for_size_p ()
14248 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14250 op0 = gen_lowpart (V4SFmode, op0);
14251 op1 = gen_lowpart (V4SFmode, op1);
14252 emit_insn (gen_sse_movups (op0, op1));
14256 /* ??? If we have typed data, then it would appear that using
14257 movdqu is the only way to get unaligned data loaded with
14259 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14261 op0 = gen_lowpart (V16QImode, op0);
14262 op1 = gen_lowpart (V16QImode, op1);
14263 emit_insn (gen_sse2_movdqu (op0, op1));
14267 if (TARGET_SSE2 && mode == V2DFmode)
14271 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14273 op0 = gen_lowpart (V2DFmode, op0);
14274 op1 = gen_lowpart (V2DFmode, op1);
14275 emit_insn (gen_sse2_movupd (op0, op1));
14279 /* When SSE registers are split into halves, we can avoid
14280 writing to the top half twice. */
14281 if (TARGET_SSE_SPLIT_REGS)
14283 emit_clobber (op0);
14288 /* ??? Not sure about the best option for the Intel chips.
14289 The following would seem to satisfy; the register is
14290 entirely cleared, breaking the dependency chain. We
14291 then store to the upper half, with a dependency depth
14292 of one. A rumor has it that Intel recommends two movsd
14293 followed by an unpacklpd, but this is unconfirmed. And
14294 given that the dependency depth of the unpacklpd would
14295 still be one, I'm not sure why this would be better. */
14296 zero = CONST0_RTX (V2DFmode);
14299 m = adjust_address (op1, DFmode, 0);
14300 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14301 m = adjust_address (op1, DFmode, 8);
14302 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14306 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14308 op0 = gen_lowpart (V4SFmode, op0);
14309 op1 = gen_lowpart (V4SFmode, op1);
14310 emit_insn (gen_sse_movups (op0, op1));
14314 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14315 emit_move_insn (op0, CONST0_RTX (mode));
14317 emit_clobber (op0);
14319 if (mode != V4SFmode)
14320 op0 = gen_lowpart (V4SFmode, op0);
14321 m = adjust_address (op1, V2SFmode, 0);
14322 emit_insn (gen_sse_loadlps (op0, op0, m));
14323 m = adjust_address (op1, V2SFmode, 8);
14324 emit_insn (gen_sse_loadhps (op0, op0, m));
14327 else if (MEM_P (op0))
14329 /* If we're optimizing for size, movups is the smallest. */
14330 if (optimize_insn_for_size_p ()
14331 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14333 op0 = gen_lowpart (V4SFmode, op0);
14334 op1 = gen_lowpart (V4SFmode, op1);
14335 emit_insn (gen_sse_movups (op0, op1));
14339 /* ??? Similar to above, only less clear because of quote
14340 typeless stores unquote. */
14341 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14342 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14344 op0 = gen_lowpart (V16QImode, op0);
14345 op1 = gen_lowpart (V16QImode, op1);
14346 emit_insn (gen_sse2_movdqu (op0, op1));
14350 if (TARGET_SSE2 && mode == V2DFmode)
14352 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14354 op0 = gen_lowpart (V2DFmode, op0);
14355 op1 = gen_lowpart (V2DFmode, op1);
14356 emit_insn (gen_sse2_movupd (op0, op1));
14360 m = adjust_address (op0, DFmode, 0);
14361 emit_insn (gen_sse2_storelpd (m, op1));
14362 m = adjust_address (op0, DFmode, 8);
14363 emit_insn (gen_sse2_storehpd (m, op1));
14368 if (mode != V4SFmode)
14369 op1 = gen_lowpart (V4SFmode, op1);
14371 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14373 op0 = gen_lowpart (V4SFmode, op0);
14374 emit_insn (gen_sse_movups (op0, op1));
14378 m = adjust_address (op0, V2SFmode, 0);
14379 emit_insn (gen_sse_storelps (m, op1));
14380 m = adjust_address (op0, V2SFmode, 8);
14381 emit_insn (gen_sse_storehps (m, op1));
14386 gcc_unreachable ();
14389 /* Expand a push in MODE. This is some mode for which we do not support
14390 proper push instructions, at least from the registers that we expect
14391 the value to live in. */
14394 ix86_expand_push (enum machine_mode mode, rtx x)
14398 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14399 GEN_INT (-GET_MODE_SIZE (mode)),
14400 stack_pointer_rtx, 1, OPTAB_DIRECT);
14401 if (tmp != stack_pointer_rtx)
14402 emit_move_insn (stack_pointer_rtx, tmp);
14404 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14406 /* When we push an operand onto stack, it has to be aligned at least
14407 at the function argument boundary. However since we don't have
14408 the argument type, we can't determine the actual argument
14410 emit_move_insn (tmp, x);
14413 /* Helper function of ix86_fixup_binary_operands to canonicalize
14414 operand order. Returns true if the operands should be swapped. */
14417 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14420 rtx dst = operands[0];
14421 rtx src1 = operands[1];
14422 rtx src2 = operands[2];
14424 /* If the operation is not commutative, we can't do anything. */
14425 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14428 /* Highest priority is that src1 should match dst. */
14429 if (rtx_equal_p (dst, src1))
14431 if (rtx_equal_p (dst, src2))
14434 /* Next highest priority is that immediate constants come second. */
14435 if (immediate_operand (src2, mode))
14437 if (immediate_operand (src1, mode))
14440 /* Lowest priority is that memory references should come second. */
14450 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14451 destination to use for the operation. If different from the true
14452 destination in operands[0], a copy operation will be required. */
14455 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14458 rtx dst = operands[0];
14459 rtx src1 = operands[1];
14460 rtx src2 = operands[2];
14462 /* Canonicalize operand order. */
14463 if (ix86_swap_binary_operands_p (code, mode, operands))
14467 /* It is invalid to swap operands of different modes. */
14468 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14475 /* Both source operands cannot be in memory. */
14476 if (MEM_P (src1) && MEM_P (src2))
14478 /* Optimization: Only read from memory once. */
14479 if (rtx_equal_p (src1, src2))
14481 src2 = force_reg (mode, src2);
14485 src2 = force_reg (mode, src2);
14488 /* If the destination is memory, and we do not have matching source
14489 operands, do things in registers. */
14490 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14491 dst = gen_reg_rtx (mode);
14493 /* Source 1 cannot be a constant. */
14494 if (CONSTANT_P (src1))
14495 src1 = force_reg (mode, src1);
14497 /* Source 1 cannot be a non-matching memory. */
14498 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14499 src1 = force_reg (mode, src1);
14501 operands[1] = src1;
14502 operands[2] = src2;
14506 /* Similarly, but assume that the destination has already been
14507 set up properly. */
14510 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14511 enum machine_mode mode, rtx operands[])
14513 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14514 gcc_assert (dst == operands[0]);
14517 /* Attempt to expand a binary operator. Make the expansion closer to the
14518 actual machine, then just general_operand, which will allow 3 separate
14519 memory references (one output, two input) in a single insn. */
14522 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14525 rtx src1, src2, dst, op, clob;
14527 dst = ix86_fixup_binary_operands (code, mode, operands);
14528 src1 = operands[1];
14529 src2 = operands[2];
14531 /* Emit the instruction. */
14533 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14534 if (reload_in_progress)
14536 /* Reload doesn't know about the flags register, and doesn't know that
14537 it doesn't want to clobber it. We can only do this with PLUS. */
14538 gcc_assert (code == PLUS);
14543 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14544 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14547 /* Fix up the destination if needed. */
14548 if (dst != operands[0])
14549 emit_move_insn (operands[0], dst);
14552 /* Return TRUE or FALSE depending on whether the binary operator meets the
14553 appropriate constraints. */
14556 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14559 rtx dst = operands[0];
14560 rtx src1 = operands[1];
14561 rtx src2 = operands[2];
14563 /* Both source operands cannot be in memory. */
14564 if (MEM_P (src1) && MEM_P (src2))
14567 /* Canonicalize operand order for commutative operators. */
14568 if (ix86_swap_binary_operands_p (code, mode, operands))
14575 /* If the destination is memory, we must have a matching source operand. */
14576 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14579 /* Source 1 cannot be a constant. */
14580 if (CONSTANT_P (src1))
14583 /* Source 1 cannot be a non-matching memory. */
14584 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14590 /* Attempt to expand a unary operator. Make the expansion closer to the
14591 actual machine, then just general_operand, which will allow 2 separate
14592 memory references (one output, one input) in a single insn. */
14595 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
14598 int matching_memory;
14599 rtx src, dst, op, clob;
14604 /* If the destination is memory, and we do not have matching source
14605 operands, do things in registers. */
14606 matching_memory = 0;
14609 if (rtx_equal_p (dst, src))
14610 matching_memory = 1;
14612 dst = gen_reg_rtx (mode);
14615 /* When source operand is memory, destination must match. */
14616 if (MEM_P (src) && !matching_memory)
14617 src = force_reg (mode, src);
14619 /* Emit the instruction. */
14621 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
14622 if (reload_in_progress || code == NOT)
14624 /* Reload doesn't know about the flags register, and doesn't know that
14625 it doesn't want to clobber it. */
14626 gcc_assert (code == NOT);
14631 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14632 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14635 /* Fix up the destination if needed. */
14636 if (dst != operands[0])
14637 emit_move_insn (operands[0], dst);
14640 #define LEA_SEARCH_THRESHOLD 12
14642 /* Search backward for non-agu definition of register number REGNO1
14643 or register number REGNO2 in INSN's basic block until
14644 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14645 2. Reach BB boundary, or
14646 3. Reach agu definition.
14647 Returns the distance between the non-agu definition point and INSN.
14648 If no definition point, returns -1. */
14651 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14654 basic_block bb = BLOCK_FOR_INSN (insn);
14657 enum attr_type insn_type;
14659 if (insn != BB_HEAD (bb))
14661 rtx prev = PREV_INSN (insn);
14662 while (prev && distance < LEA_SEARCH_THRESHOLD)
14664 if (NONDEBUG_INSN_P (prev))
14667 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14668 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14669 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14670 && (regno1 == DF_REF_REGNO (*def_rec)
14671 || regno2 == DF_REF_REGNO (*def_rec)))
14673 insn_type = get_attr_type (prev);
14674 if (insn_type != TYPE_LEA)
14678 if (prev == BB_HEAD (bb))
14680 prev = PREV_INSN (prev);
14684 if (distance < LEA_SEARCH_THRESHOLD)
14688 bool simple_loop = false;
14690 FOR_EACH_EDGE (e, ei, bb->preds)
14693 simple_loop = true;
14699 rtx prev = BB_END (bb);
14702 && distance < LEA_SEARCH_THRESHOLD)
14704 if (NONDEBUG_INSN_P (prev))
14707 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14708 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14709 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14710 && (regno1 == DF_REF_REGNO (*def_rec)
14711 || regno2 == DF_REF_REGNO (*def_rec)))
14713 insn_type = get_attr_type (prev);
14714 if (insn_type != TYPE_LEA)
14718 prev = PREV_INSN (prev);
14726 /* get_attr_type may modify recog data. We want to make sure
14727 that recog data is valid for instruction INSN, on which
14728 distance_non_agu_define is called. INSN is unchanged here. */
14729 extract_insn_cached (insn);
14733 /* Return the distance between INSN and the next insn that uses
14734 register number REGNO0 in memory address. Return -1 if no such
14735 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14738 distance_agu_use (unsigned int regno0, rtx insn)
14740 basic_block bb = BLOCK_FOR_INSN (insn);
14745 if (insn != BB_END (bb))
14747 rtx next = NEXT_INSN (insn);
14748 while (next && distance < LEA_SEARCH_THRESHOLD)
14750 if (NONDEBUG_INSN_P (next))
14754 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14755 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14756 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14757 && regno0 == DF_REF_REGNO (*use_rec))
14759 /* Return DISTANCE if OP0 is used in memory
14760 address in NEXT. */
14764 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14765 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14766 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14767 && regno0 == DF_REF_REGNO (*def_rec))
14769 /* Return -1 if OP0 is set in NEXT. */
14773 if (next == BB_END (bb))
14775 next = NEXT_INSN (next);
14779 if (distance < LEA_SEARCH_THRESHOLD)
14783 bool simple_loop = false;
14785 FOR_EACH_EDGE (e, ei, bb->succs)
14788 simple_loop = true;
14794 rtx next = BB_HEAD (bb);
14797 && distance < LEA_SEARCH_THRESHOLD)
14799 if (NONDEBUG_INSN_P (next))
14803 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14804 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14805 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14806 && regno0 == DF_REF_REGNO (*use_rec))
14808 /* Return DISTANCE if OP0 is used in memory
14809 address in NEXT. */
14813 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14814 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14815 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14816 && regno0 == DF_REF_REGNO (*def_rec))
14818 /* Return -1 if OP0 is set in NEXT. */
14823 next = NEXT_INSN (next);
14831 /* Define this macro to tune LEA priority vs ADD, it take effect when
14832 there is a dilemma of choicing LEA or ADD
14833 Negative value: ADD is more preferred than LEA
14835 Positive value: LEA is more preferred than ADD*/
14836 #define IX86_LEA_PRIORITY 2
14838 /* Return true if it is ok to optimize an ADD operation to LEA
14839 operation to avoid flag register consumation. For most processors,
14840 ADD is faster than LEA. For the processors like ATOM, if the
14841 destination register of LEA holds an actual address which will be
14842 used soon, LEA is better and otherwise ADD is better. */
14845 ix86_lea_for_add_ok (rtx insn, rtx operands[])
14847 unsigned int regno0 = true_regnum (operands[0]);
14848 unsigned int regno1 = true_regnum (operands[1]);
14849 unsigned int regno2 = true_regnum (operands[2]);
14851 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14852 if (regno0 != regno1 && regno0 != regno2)
14855 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14859 int dist_define, dist_use;
14860 dist_define = distance_non_agu_define (regno1, regno2, insn);
14861 if (dist_define <= 0)
14864 /* If this insn has both backward non-agu dependence and forward
14865 agu dependence, the one with short distance take effect. */
14866 dist_use = distance_agu_use (regno0, insn);
14868 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
14875 /* Return true if destination reg of SET_BODY is shift count of
14879 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14885 /* Retrieve destination of SET_BODY. */
14886 switch (GET_CODE (set_body))
14889 set_dest = SET_DEST (set_body);
14890 if (!set_dest || !REG_P (set_dest))
14894 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14895 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14903 /* Retrieve shift count of USE_BODY. */
14904 switch (GET_CODE (use_body))
14907 shift_rtx = XEXP (use_body, 1);
14910 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14911 if (ix86_dep_by_shift_count_body (set_body,
14912 XVECEXP (use_body, 0, i)))
14920 && (GET_CODE (shift_rtx) == ASHIFT
14921 || GET_CODE (shift_rtx) == LSHIFTRT
14922 || GET_CODE (shift_rtx) == ASHIFTRT
14923 || GET_CODE (shift_rtx) == ROTATE
14924 || GET_CODE (shift_rtx) == ROTATERT))
14926 rtx shift_count = XEXP (shift_rtx, 1);
14928 /* Return true if shift count is dest of SET_BODY. */
14929 if (REG_P (shift_count)
14930 && true_regnum (set_dest) == true_regnum (shift_count))
14937 /* Return true if destination reg of SET_INSN is shift count of
14941 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
14943 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
14944 PATTERN (use_insn));
14947 /* Return TRUE or FALSE depending on whether the unary operator meets the
14948 appropriate constraints. */
14951 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14952 enum machine_mode mode ATTRIBUTE_UNUSED,
14953 rtx operands[2] ATTRIBUTE_UNUSED)
14955 /* If one of operands is memory, source and destination must match. */
14956 if ((MEM_P (operands[0])
14957 || MEM_P (operands[1]))
14958 && ! rtx_equal_p (operands[0], operands[1]))
14963 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14964 are ok, keeping in mind the possible movddup alternative. */
14967 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
14969 if (MEM_P (operands[0]))
14970 return rtx_equal_p (operands[0], operands[1 + high]);
14971 if (MEM_P (operands[1]) && MEM_P (operands[2]))
14972 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
14976 /* Post-reload splitter for converting an SF or DFmode value in an
14977 SSE register into an unsigned SImode. */
14980 ix86_split_convert_uns_si_sse (rtx operands[])
14982 enum machine_mode vecmode;
14983 rtx value, large, zero_or_two31, input, two31, x;
14985 large = operands[1];
14986 zero_or_two31 = operands[2];
14987 input = operands[3];
14988 two31 = operands[4];
14989 vecmode = GET_MODE (large);
14990 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
14992 /* Load up the value into the low element. We must ensure that the other
14993 elements are valid floats -- zero is the easiest such value. */
14996 if (vecmode == V4SFmode)
14997 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
14999 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
15003 input = gen_rtx_REG (vecmode, REGNO (input));
15004 emit_move_insn (value, CONST0_RTX (vecmode));
15005 if (vecmode == V4SFmode)
15006 emit_insn (gen_sse_movss (value, value, input));
15008 emit_insn (gen_sse2_movsd (value, value, input));
15011 emit_move_insn (large, two31);
15012 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
15014 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
15015 emit_insn (gen_rtx_SET (VOIDmode, large, x));
15017 x = gen_rtx_AND (vecmode, zero_or_two31, large);
15018 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
15020 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
15021 emit_insn (gen_rtx_SET (VOIDmode, value, x));
15023 large = gen_rtx_REG (V4SImode, REGNO (large));
15024 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
15026 x = gen_rtx_REG (V4SImode, REGNO (value));
15027 if (vecmode == V4SFmode)
15028 emit_insn (gen_sse2_cvttps2dq (x, value));
15030 emit_insn (gen_sse2_cvttpd2dq (x, value));
15033 emit_insn (gen_xorv4si3 (value, value, large));
15036 /* Convert an unsigned DImode value into a DFmode, using only SSE.
15037 Expects the 64-bit DImode to be supplied in a pair of integral
15038 registers. Requires SSE2; will use SSE3 if available. For x86_32,
15039 -mfpmath=sse, !optimize_size only. */
15042 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
15044 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
15045 rtx int_xmm, fp_xmm;
15046 rtx biases, exponents;
15049 int_xmm = gen_reg_rtx (V4SImode);
15050 if (TARGET_INTER_UNIT_MOVES)
15051 emit_insn (gen_movdi_to_sse (int_xmm, input));
15052 else if (TARGET_SSE_SPLIT_REGS)
15054 emit_clobber (int_xmm);
15055 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
15059 x = gen_reg_rtx (V2DImode);
15060 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
15061 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
15064 x = gen_rtx_CONST_VECTOR (V4SImode,
15065 gen_rtvec (4, GEN_INT (0x43300000UL),
15066 GEN_INT (0x45300000UL),
15067 const0_rtx, const0_rtx));
15068 exponents = validize_mem (force_const_mem (V4SImode, x));
15070 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
15071 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
15073 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
15074 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
15075 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
15076 (0x1.0p84 + double(fp_value_hi_xmm)).
15077 Note these exponents differ by 32. */
15079 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
15081 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
15082 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
15083 real_ldexp (&bias_lo_rvt, &dconst1, 52);
15084 real_ldexp (&bias_hi_rvt, &dconst1, 84);
15085 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
15086 x = const_double_from_real_value (bias_hi_rvt, DFmode);
15087 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
15088 biases = validize_mem (force_const_mem (V2DFmode, biases));
15089 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
15091 /* Add the upper and lower DFmode values together. */
15093 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
15096 x = copy_to_mode_reg (V2DFmode, fp_xmm);
15097 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
15098 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
15101 ix86_expand_vector_extract (false, target, fp_xmm, 0);
15104 /* Not used, but eases macroization of patterns. */
15106 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
15107 rtx input ATTRIBUTE_UNUSED)
15109 gcc_unreachable ();
15112 /* Convert an unsigned SImode value into a DFmode. Only currently used
15113 for SSE, but applicable anywhere. */
15116 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
15118 REAL_VALUE_TYPE TWO31r;
15121 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
15122 NULL, 1, OPTAB_DIRECT);
15124 fp = gen_reg_rtx (DFmode);
15125 emit_insn (gen_floatsidf2 (fp, x));
15127 real_ldexp (&TWO31r, &dconst1, 31);
15128 x = const_double_from_real_value (TWO31r, DFmode);
15130 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
15132 emit_move_insn (target, x);
15135 /* Convert a signed DImode value into a DFmode. Only used for SSE in
15136 32-bit mode; otherwise we have a direct convert instruction. */
15139 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
15141 REAL_VALUE_TYPE TWO32r;
15142 rtx fp_lo, fp_hi, x;
15144 fp_lo = gen_reg_rtx (DFmode);
15145 fp_hi = gen_reg_rtx (DFmode);
15147 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
15149 real_ldexp (&TWO32r, &dconst1, 32);
15150 x = const_double_from_real_value (TWO32r, DFmode);
15151 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
15153 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
15155 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
15158 emit_move_insn (target, x);
15161 /* Convert an unsigned SImode value into a SFmode, using only SSE.
15162 For x86_32, -mfpmath=sse, !optimize_size only. */
15164 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
15166 REAL_VALUE_TYPE ONE16r;
15167 rtx fp_hi, fp_lo, int_hi, int_lo, x;
15169 real_ldexp (&ONE16r, &dconst1, 16);
15170 x = const_double_from_real_value (ONE16r, SFmode);
15171 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
15172 NULL, 0, OPTAB_DIRECT);
15173 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
15174 NULL, 0, OPTAB_DIRECT);
15175 fp_hi = gen_reg_rtx (SFmode);
15176 fp_lo = gen_reg_rtx (SFmode);
15177 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
15178 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
15179 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
15181 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
15183 if (!rtx_equal_p (target, fp_hi))
15184 emit_move_insn (target, fp_hi);
15187 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
15188 then replicate the value for all elements of the vector
15192 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
15199 v = gen_rtvec (4, value, value, value, value);
15200 return gen_rtx_CONST_VECTOR (V4SImode, v);
15204 v = gen_rtvec (2, value, value);
15205 return gen_rtx_CONST_VECTOR (V2DImode, v);
15209 v = gen_rtvec (4, value, value, value, value);
15211 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15212 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15213 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15217 v = gen_rtvec (2, value, value);
15219 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15220 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15223 gcc_unreachable ();
15227 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15228 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15229 for an SSE register. If VECT is true, then replicate the mask for
15230 all elements of the vector register. If INVERT is true, then create
15231 a mask excluding the sign bit. */
15234 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15236 enum machine_mode vec_mode, imode;
15237 HOST_WIDE_INT hi, lo;
15242 /* Find the sign bit, sign extended to 2*HWI. */
15248 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15249 lo = 0x80000000, hi = lo < 0;
15255 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15256 if (HOST_BITS_PER_WIDE_INT >= 64)
15257 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15259 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15264 vec_mode = VOIDmode;
15265 if (HOST_BITS_PER_WIDE_INT >= 64)
15268 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15275 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15279 lo = ~lo, hi = ~hi;
15285 mask = immed_double_const (lo, hi, imode);
15287 vec = gen_rtvec (2, v, mask);
15288 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15289 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15296 gcc_unreachable ();
15300 lo = ~lo, hi = ~hi;
15302 /* Force this value into the low part of a fp vector constant. */
15303 mask = immed_double_const (lo, hi, imode);
15304 mask = gen_lowpart (mode, mask);
15306 if (vec_mode == VOIDmode)
15307 return force_reg (mode, mask);
15309 v = ix86_build_const_vector (mode, vect, mask);
15310 return force_reg (vec_mode, v);
15313 /* Generate code for floating point ABS or NEG. */
15316 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15319 rtx mask, set, use, clob, dst, src;
15320 bool use_sse = false;
15321 bool vector_mode = VECTOR_MODE_P (mode);
15322 enum machine_mode elt_mode = mode;
15326 elt_mode = GET_MODE_INNER (mode);
15329 else if (mode == TFmode)
15331 else if (TARGET_SSE_MATH)
15332 use_sse = SSE_FLOAT_MODE_P (mode);
15334 /* NEG and ABS performed with SSE use bitwise mask operations.
15335 Create the appropriate mask now. */
15337 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15346 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15347 set = gen_rtx_SET (VOIDmode, dst, set);
15352 set = gen_rtx_fmt_e (code, mode, src);
15353 set = gen_rtx_SET (VOIDmode, dst, set);
15356 use = gen_rtx_USE (VOIDmode, mask);
15357 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15358 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15359 gen_rtvec (3, set, use, clob)));
15366 /* Expand a copysign operation. Special case operand 0 being a constant. */
15369 ix86_expand_copysign (rtx operands[])
15371 enum machine_mode mode;
15372 rtx dest, op0, op1, mask, nmask;
15374 dest = operands[0];
15378 mode = GET_MODE (dest);
15380 if (GET_CODE (op0) == CONST_DOUBLE)
15382 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15384 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15385 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15387 if (mode == SFmode || mode == DFmode)
15389 enum machine_mode vmode;
15391 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15393 if (op0 == CONST0_RTX (mode))
15394 op0 = CONST0_RTX (vmode);
15397 rtx v = ix86_build_const_vector (mode, false, op0);
15399 op0 = force_reg (vmode, v);
15402 else if (op0 != CONST0_RTX (mode))
15403 op0 = force_reg (mode, op0);
15405 mask = ix86_build_signbit_mask (mode, 0, 0);
15407 if (mode == SFmode)
15408 copysign_insn = gen_copysignsf3_const;
15409 else if (mode == DFmode)
15410 copysign_insn = gen_copysigndf3_const;
15412 copysign_insn = gen_copysigntf3_const;
15414 emit_insn (copysign_insn (dest, op0, op1, mask));
15418 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15420 nmask = ix86_build_signbit_mask (mode, 0, 1);
15421 mask = ix86_build_signbit_mask (mode, 0, 0);
15423 if (mode == SFmode)
15424 copysign_insn = gen_copysignsf3_var;
15425 else if (mode == DFmode)
15426 copysign_insn = gen_copysigndf3_var;
15428 copysign_insn = gen_copysigntf3_var;
15430 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15434 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15435 be a constant, and so has already been expanded into a vector constant. */
15438 ix86_split_copysign_const (rtx operands[])
15440 enum machine_mode mode, vmode;
15441 rtx dest, op0, mask, x;
15443 dest = operands[0];
15445 mask = operands[3];
15447 mode = GET_MODE (dest);
15448 vmode = GET_MODE (mask);
15450 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15451 x = gen_rtx_AND (vmode, dest, mask);
15452 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15454 if (op0 != CONST0_RTX (vmode))
15456 x = gen_rtx_IOR (vmode, dest, op0);
15457 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15461 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15462 so we have to do two masks. */
15465 ix86_split_copysign_var (rtx operands[])
15467 enum machine_mode mode, vmode;
15468 rtx dest, scratch, op0, op1, mask, nmask, x;
15470 dest = operands[0];
15471 scratch = operands[1];
15474 nmask = operands[4];
15475 mask = operands[5];
15477 mode = GET_MODE (dest);
15478 vmode = GET_MODE (mask);
15480 if (rtx_equal_p (op0, op1))
15482 /* Shouldn't happen often (it's useless, obviously), but when it does
15483 we'd generate incorrect code if we continue below. */
15484 emit_move_insn (dest, op0);
15488 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15490 gcc_assert (REGNO (op1) == REGNO (scratch));
15492 x = gen_rtx_AND (vmode, scratch, mask);
15493 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15496 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15497 x = gen_rtx_NOT (vmode, dest);
15498 x = gen_rtx_AND (vmode, x, op0);
15499 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15503 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
15505 x = gen_rtx_AND (vmode, scratch, mask);
15507 else /* alternative 2,4 */
15509 gcc_assert (REGNO (mask) == REGNO (scratch));
15510 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
15511 x = gen_rtx_AND (vmode, scratch, op1);
15513 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15515 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
15517 dest = simplify_gen_subreg (vmode, op0, mode, 0);
15518 x = gen_rtx_AND (vmode, dest, nmask);
15520 else /* alternative 3,4 */
15522 gcc_assert (REGNO (nmask) == REGNO (dest));
15524 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15525 x = gen_rtx_AND (vmode, dest, op0);
15527 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15530 x = gen_rtx_IOR (vmode, dest, scratch);
15531 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15534 /* Return TRUE or FALSE depending on whether the first SET in INSN
15535 has source and destination with matching CC modes, and that the
15536 CC mode is at least as constrained as REQ_MODE. */
15539 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
15542 enum machine_mode set_mode;
15544 set = PATTERN (insn);
15545 if (GET_CODE (set) == PARALLEL)
15546 set = XVECEXP (set, 0, 0);
15547 gcc_assert (GET_CODE (set) == SET);
15548 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15550 set_mode = GET_MODE (SET_DEST (set));
15554 if (req_mode != CCNOmode
15555 && (req_mode != CCmode
15556 || XEXP (SET_SRC (set), 1) != const0_rtx))
15560 if (req_mode == CCGCmode)
15564 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15568 if (req_mode == CCZmode)
15579 gcc_unreachable ();
15582 return (GET_MODE (SET_SRC (set)) == set_mode);
15585 /* Generate insn patterns to do an integer compare of OPERANDS. */
15588 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
15590 enum machine_mode cmpmode;
15593 cmpmode = SELECT_CC_MODE (code, op0, op1);
15594 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
15596 /* This is very simple, but making the interface the same as in the
15597 FP case makes the rest of the code easier. */
15598 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
15599 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
15601 /* Return the test that should be put into the flags user, i.e.
15602 the bcc, scc, or cmov instruction. */
15603 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
15606 /* Figure out whether to use ordered or unordered fp comparisons.
15607 Return the appropriate mode to use. */
15610 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
15612 /* ??? In order to make all comparisons reversible, we do all comparisons
15613 non-trapping when compiling for IEEE. Once gcc is able to distinguish
15614 all forms trapping and nontrapping comparisons, we can make inequality
15615 comparisons trapping again, since it results in better code when using
15616 FCOM based compares. */
15617 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
15621 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15623 enum machine_mode mode = GET_MODE (op0);
15625 if (SCALAR_FLOAT_MODE_P (mode))
15627 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15628 return ix86_fp_compare_mode (code);
15633 /* Only zero flag is needed. */
15634 case EQ: /* ZF=0 */
15635 case NE: /* ZF!=0 */
15637 /* Codes needing carry flag. */
15638 case GEU: /* CF=0 */
15639 case LTU: /* CF=1 */
15640 /* Detect overflow checks. They need just the carry flag. */
15641 if (GET_CODE (op0) == PLUS
15642 && rtx_equal_p (op1, XEXP (op0, 0)))
15646 case GTU: /* CF=0 & ZF=0 */
15647 case LEU: /* CF=1 | ZF=1 */
15648 /* Detect overflow checks. They need just the carry flag. */
15649 if (GET_CODE (op0) == MINUS
15650 && rtx_equal_p (op1, XEXP (op0, 0)))
15654 /* Codes possibly doable only with sign flag when
15655 comparing against zero. */
15656 case GE: /* SF=OF or SF=0 */
15657 case LT: /* SF<>OF or SF=1 */
15658 if (op1 == const0_rtx)
15661 /* For other cases Carry flag is not required. */
15663 /* Codes doable only with sign flag when comparing
15664 against zero, but we miss jump instruction for it
15665 so we need to use relational tests against overflow
15666 that thus needs to be zero. */
15667 case GT: /* ZF=0 & SF=OF */
15668 case LE: /* ZF=1 | SF<>OF */
15669 if (op1 == const0_rtx)
15673 /* strcmp pattern do (use flags) and combine may ask us for proper
15678 gcc_unreachable ();
15682 /* Return the fixed registers used for condition codes. */
15685 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15692 /* If two condition code modes are compatible, return a condition code
15693 mode which is compatible with both. Otherwise, return
15696 static enum machine_mode
15697 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
15702 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15705 if ((m1 == CCGCmode && m2 == CCGOCmode)
15706 || (m1 == CCGOCmode && m2 == CCGCmode))
15712 gcc_unreachable ();
15742 /* These are only compatible with themselves, which we already
15749 /* Return a comparison we can do and that it is equivalent to
15750 swap_condition (code) apart possibly from orderedness.
15751 But, never change orderedness if TARGET_IEEE_FP, returning
15752 UNKNOWN in that case if necessary. */
15754 static enum rtx_code
15755 ix86_fp_swap_condition (enum rtx_code code)
15759 case GT: /* GTU - CF=0 & ZF=0 */
15760 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
15761 case GE: /* GEU - CF=0 */
15762 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
15763 case UNLT: /* LTU - CF=1 */
15764 return TARGET_IEEE_FP ? UNKNOWN : GT;
15765 case UNLE: /* LEU - CF=1 | ZF=1 */
15766 return TARGET_IEEE_FP ? UNKNOWN : GE;
15768 return swap_condition (code);
15772 /* Return cost of comparison CODE using the best strategy for performance.
15773 All following functions do use number of instructions as a cost metrics.
15774 In future this should be tweaked to compute bytes for optimize_size and
15775 take into account performance of various instructions on various CPUs. */
15778 ix86_fp_comparison_cost (enum rtx_code code)
15782 /* The cost of code using bit-twiddling on %ah. */
15799 arith_cost = TARGET_IEEE_FP ? 5 : 4;
15803 arith_cost = TARGET_IEEE_FP ? 6 : 4;
15806 gcc_unreachable ();
15809 switch (ix86_fp_comparison_strategy (code))
15811 case IX86_FPCMP_COMI:
15812 return arith_cost > 4 ? 3 : 2;
15813 case IX86_FPCMP_SAHF:
15814 return arith_cost > 4 ? 4 : 3;
15820 /* Return strategy to use for floating-point. We assume that fcomi is always
15821 preferrable where available, since that is also true when looking at size
15822 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15824 enum ix86_fpcmp_strategy
15825 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
15827 /* Do fcomi/sahf based test when profitable. */
15830 return IX86_FPCMP_COMI;
15832 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
15833 return IX86_FPCMP_SAHF;
15835 return IX86_FPCMP_ARITH;
15838 /* Swap, force into registers, or otherwise massage the two operands
15839 to a fp comparison. The operands are updated in place; the new
15840 comparison code is returned. */
15842 static enum rtx_code
15843 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
15845 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
15846 rtx op0 = *pop0, op1 = *pop1;
15847 enum machine_mode op_mode = GET_MODE (op0);
15848 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
15850 /* All of the unordered compare instructions only work on registers.
15851 The same is true of the fcomi compare instructions. The XFmode
15852 compare instructions require registers except when comparing
15853 against zero or when converting operand 1 from fixed point to
15857 && (fpcmp_mode == CCFPUmode
15858 || (op_mode == XFmode
15859 && ! (standard_80387_constant_p (op0) == 1
15860 || standard_80387_constant_p (op1) == 1)
15861 && GET_CODE (op1) != FLOAT)
15862 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
15864 op0 = force_reg (op_mode, op0);
15865 op1 = force_reg (op_mode, op1);
15869 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
15870 things around if they appear profitable, otherwise force op0
15871 into a register. */
15873 if (standard_80387_constant_p (op0) == 0
15875 && ! (standard_80387_constant_p (op1) == 0
15878 enum rtx_code new_code = ix86_fp_swap_condition (code);
15879 if (new_code != UNKNOWN)
15882 tmp = op0, op0 = op1, op1 = tmp;
15888 op0 = force_reg (op_mode, op0);
15890 if (CONSTANT_P (op1))
15892 int tmp = standard_80387_constant_p (op1);
15894 op1 = validize_mem (force_const_mem (op_mode, op1));
15898 op1 = force_reg (op_mode, op1);
15901 op1 = force_reg (op_mode, op1);
15905 /* Try to rearrange the comparison to make it cheaper. */
15906 if (ix86_fp_comparison_cost (code)
15907 > ix86_fp_comparison_cost (swap_condition (code))
15908 && (REG_P (op1) || can_create_pseudo_p ()))
15911 tmp = op0, op0 = op1, op1 = tmp;
15912 code = swap_condition (code);
15914 op0 = force_reg (op_mode, op0);
15922 /* Convert comparison codes we use to represent FP comparison to integer
15923 code that will result in proper branch. Return UNKNOWN if no such code
15927 ix86_fp_compare_code_to_integer (enum rtx_code code)
15956 /* Generate insn patterns to do a floating point compare of OPERANDS. */
15959 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
15961 enum machine_mode fpcmp_mode, intcmp_mode;
15964 fpcmp_mode = ix86_fp_compare_mode (code);
15965 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
15967 /* Do fcomi/sahf based test when profitable. */
15968 switch (ix86_fp_comparison_strategy (code))
15970 case IX86_FPCMP_COMI:
15971 intcmp_mode = fpcmp_mode;
15972 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15973 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15978 case IX86_FPCMP_SAHF:
15979 intcmp_mode = fpcmp_mode;
15980 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15981 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15985 scratch = gen_reg_rtx (HImode);
15986 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
15987 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
15990 case IX86_FPCMP_ARITH:
15991 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
15992 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15993 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
15995 scratch = gen_reg_rtx (HImode);
15996 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
15998 /* In the unordered case, we have to check C2 for NaN's, which
15999 doesn't happen to work out to anything nice combination-wise.
16000 So do some bit twiddling on the value we've got in AH to come
16001 up with an appropriate set of condition codes. */
16003 intcmp_mode = CCNOmode;
16008 if (code == GT || !TARGET_IEEE_FP)
16010 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16015 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16016 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16017 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
16018 intcmp_mode = CCmode;
16024 if (code == LT && TARGET_IEEE_FP)
16026 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16027 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
16028 intcmp_mode = CCmode;
16033 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
16039 if (code == GE || !TARGET_IEEE_FP)
16041 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
16046 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16047 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
16053 if (code == LE && TARGET_IEEE_FP)
16055 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16056 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
16057 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16058 intcmp_mode = CCmode;
16063 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
16069 if (code == EQ && TARGET_IEEE_FP)
16071 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16072 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
16073 intcmp_mode = CCmode;
16078 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16084 if (code == NE && TARGET_IEEE_FP)
16086 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
16087 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
16093 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
16099 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16103 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
16108 gcc_unreachable ();
16116 /* Return the test that should be put into the flags user, i.e.
16117 the bcc, scc, or cmov instruction. */
16118 return gen_rtx_fmt_ee (code, VOIDmode,
16119 gen_rtx_REG (intcmp_mode, FLAGS_REG),
16124 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
16128 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
16129 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
16131 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
16133 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
16134 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16137 ret = ix86_expand_int_compare (code, op0, op1);
16143 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
16147 switch (GET_MODE (op0))
16156 tmp = ix86_expand_compare (code, op0, op1);
16157 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
16158 gen_rtx_LABEL_REF (VOIDmode, label),
16160 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
16167 /* Expand DImode branch into multiple compare+branch. */
16169 rtx lo[2], hi[2], label2;
16170 enum rtx_code code1, code2, code3;
16171 enum machine_mode submode;
16173 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
16175 tmp = op0, op0 = op1, op1 = tmp;
16176 code = swap_condition (code);
16178 if (GET_MODE (op0) == DImode)
16180 split_di (&op0, 1, lo+0, hi+0);
16181 split_di (&op1, 1, lo+1, hi+1);
16186 split_ti (&op0, 1, lo+0, hi+0);
16187 split_ti (&op1, 1, lo+1, hi+1);
16191 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
16192 avoid two branches. This costs one extra insn, so disable when
16193 optimizing for size. */
16195 if ((code == EQ || code == NE)
16196 && (!optimize_insn_for_size_p ()
16197 || hi[1] == const0_rtx || lo[1] == const0_rtx))
16202 if (hi[1] != const0_rtx)
16203 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
16204 NULL_RTX, 0, OPTAB_WIDEN);
16207 if (lo[1] != const0_rtx)
16208 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
16209 NULL_RTX, 0, OPTAB_WIDEN);
16211 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16212 NULL_RTX, 0, OPTAB_WIDEN);
16214 ix86_expand_branch (code, tmp, const0_rtx, label);
16218 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16219 op1 is a constant and the low word is zero, then we can just
16220 examine the high word. Similarly for low word -1 and
16221 less-or-equal-than or greater-than. */
16223 if (CONST_INT_P (hi[1]))
16226 case LT: case LTU: case GE: case GEU:
16227 if (lo[1] == const0_rtx)
16229 ix86_expand_branch (code, hi[0], hi[1], label);
16233 case LE: case LEU: case GT: case GTU:
16234 if (lo[1] == constm1_rtx)
16236 ix86_expand_branch (code, hi[0], hi[1], label);
16244 /* Otherwise, we need two or three jumps. */
16246 label2 = gen_label_rtx ();
16249 code2 = swap_condition (code);
16250 code3 = unsigned_condition (code);
16254 case LT: case GT: case LTU: case GTU:
16257 case LE: code1 = LT; code2 = GT; break;
16258 case GE: code1 = GT; code2 = LT; break;
16259 case LEU: code1 = LTU; code2 = GTU; break;
16260 case GEU: code1 = GTU; code2 = LTU; break;
16262 case EQ: code1 = UNKNOWN; code2 = NE; break;
16263 case NE: code2 = UNKNOWN; break;
16266 gcc_unreachable ();
16271 * if (hi(a) < hi(b)) goto true;
16272 * if (hi(a) > hi(b)) goto false;
16273 * if (lo(a) < lo(b)) goto true;
16277 if (code1 != UNKNOWN)
16278 ix86_expand_branch (code1, hi[0], hi[1], label);
16279 if (code2 != UNKNOWN)
16280 ix86_expand_branch (code2, hi[0], hi[1], label2);
16282 ix86_expand_branch (code3, lo[0], lo[1], label);
16284 if (code2 != UNKNOWN)
16285 emit_label (label2);
16290 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16295 /* Split branch based on floating point condition. */
16297 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16298 rtx target1, rtx target2, rtx tmp, rtx pushed)
16303 if (target2 != pc_rtx)
16306 code = reverse_condition_maybe_unordered (code);
16311 condition = ix86_expand_fp_compare (code, op1, op2,
16314 /* Remove pushed operand from stack. */
16316 ix86_free_from_memory (GET_MODE (pushed));
16318 i = emit_jump_insn (gen_rtx_SET
16320 gen_rtx_IF_THEN_ELSE (VOIDmode,
16321 condition, target1, target2)));
16322 if (split_branch_probability >= 0)
16323 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16327 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16331 gcc_assert (GET_MODE (dest) == QImode);
16333 ret = ix86_expand_compare (code, op0, op1);
16334 PUT_MODE (ret, QImode);
16335 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16338 /* Expand comparison setting or clearing carry flag. Return true when
16339 successful and set pop for the operation. */
16341 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16343 enum machine_mode mode =
16344 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16346 /* Do not handle DImode compares that go through special path. */
16347 if (mode == (TARGET_64BIT ? TImode : DImode))
16350 if (SCALAR_FLOAT_MODE_P (mode))
16352 rtx compare_op, compare_seq;
16354 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16356 /* Shortcut: following common codes never translate
16357 into carry flag compares. */
16358 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16359 || code == ORDERED || code == UNORDERED)
16362 /* These comparisons require zero flag; swap operands so they won't. */
16363 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16364 && !TARGET_IEEE_FP)
16369 code = swap_condition (code);
16372 /* Try to expand the comparison and verify that we end up with
16373 carry flag based comparison. This fails to be true only when
16374 we decide to expand comparison using arithmetic that is not
16375 too common scenario. */
16377 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16378 compare_seq = get_insns ();
16381 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16382 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16383 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16385 code = GET_CODE (compare_op);
16387 if (code != LTU && code != GEU)
16390 emit_insn (compare_seq);
16395 if (!INTEGRAL_MODE_P (mode))
16404 /* Convert a==0 into (unsigned)a<1. */
16407 if (op1 != const0_rtx)
16410 code = (code == EQ ? LTU : GEU);
16413 /* Convert a>b into b<a or a>=b-1. */
16416 if (CONST_INT_P (op1))
16418 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16419 /* Bail out on overflow. We still can swap operands but that
16420 would force loading of the constant into register. */
16421 if (op1 == const0_rtx
16422 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16424 code = (code == GTU ? GEU : LTU);
16431 code = (code == GTU ? LTU : GEU);
16435 /* Convert a>=0 into (unsigned)a<0x80000000. */
16438 if (mode == DImode || op1 != const0_rtx)
16440 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16441 code = (code == LT ? GEU : LTU);
16445 if (mode == DImode || op1 != constm1_rtx)
16447 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16448 code = (code == LE ? GEU : LTU);
16454 /* Swapping operands may cause constant to appear as first operand. */
16455 if (!nonimmediate_operand (op0, VOIDmode))
16457 if (!can_create_pseudo_p ())
16459 op0 = force_reg (mode, op0);
16461 *pop = ix86_expand_compare (code, op0, op1);
16462 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16467 ix86_expand_int_movcc (rtx operands[])
16469 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16470 rtx compare_seq, compare_op;
16471 enum machine_mode mode = GET_MODE (operands[0]);
16472 bool sign_bit_compare_p = false;
16473 rtx op0 = XEXP (operands[1], 0);
16474 rtx op1 = XEXP (operands[1], 1);
16477 compare_op = ix86_expand_compare (code, op0, op1);
16478 compare_seq = get_insns ();
16481 compare_code = GET_CODE (compare_op);
16483 if ((op1 == const0_rtx && (code == GE || code == LT))
16484 || (op1 == constm1_rtx && (code == GT || code == LE)))
16485 sign_bit_compare_p = true;
16487 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16488 HImode insns, we'd be swallowed in word prefix ops. */
16490 if ((mode != HImode || TARGET_FAST_PREFIX)
16491 && (mode != (TARGET_64BIT ? TImode : DImode))
16492 && CONST_INT_P (operands[2])
16493 && CONST_INT_P (operands[3]))
16495 rtx out = operands[0];
16496 HOST_WIDE_INT ct = INTVAL (operands[2]);
16497 HOST_WIDE_INT cf = INTVAL (operands[3]);
16498 HOST_WIDE_INT diff;
16501 /* Sign bit compares are better done using shifts than we do by using
16503 if (sign_bit_compare_p
16504 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
16506 /* Detect overlap between destination and compare sources. */
16509 if (!sign_bit_compare_p)
16512 bool fpcmp = false;
16514 compare_code = GET_CODE (compare_op);
16516 flags = XEXP (compare_op, 0);
16518 if (GET_MODE (flags) == CCFPmode
16519 || GET_MODE (flags) == CCFPUmode)
16523 = ix86_fp_compare_code_to_integer (compare_code);
16526 /* To simplify rest of code, restrict to the GEU case. */
16527 if (compare_code == LTU)
16529 HOST_WIDE_INT tmp = ct;
16532 compare_code = reverse_condition (compare_code);
16533 code = reverse_condition (code);
16538 PUT_CODE (compare_op,
16539 reverse_condition_maybe_unordered
16540 (GET_CODE (compare_op)));
16542 PUT_CODE (compare_op,
16543 reverse_condition (GET_CODE (compare_op)));
16547 if (reg_overlap_mentioned_p (out, op0)
16548 || reg_overlap_mentioned_p (out, op1))
16549 tmp = gen_reg_rtx (mode);
16551 if (mode == DImode)
16552 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
16554 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
16555 flags, compare_op));
16559 if (code == GT || code == GE)
16560 code = reverse_condition (code);
16563 HOST_WIDE_INT tmp = ct;
16568 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
16581 tmp = expand_simple_binop (mode, PLUS,
16583 copy_rtx (tmp), 1, OPTAB_DIRECT);
16594 tmp = expand_simple_binop (mode, IOR,
16596 copy_rtx (tmp), 1, OPTAB_DIRECT);
16598 else if (diff == -1 && ct)
16608 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16610 tmp = expand_simple_binop (mode, PLUS,
16611 copy_rtx (tmp), GEN_INT (cf),
16612 copy_rtx (tmp), 1, OPTAB_DIRECT);
16620 * andl cf - ct, dest
16630 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16633 tmp = expand_simple_binop (mode, AND,
16635 gen_int_mode (cf - ct, mode),
16636 copy_rtx (tmp), 1, OPTAB_DIRECT);
16638 tmp = expand_simple_binop (mode, PLUS,
16639 copy_rtx (tmp), GEN_INT (ct),
16640 copy_rtx (tmp), 1, OPTAB_DIRECT);
16643 if (!rtx_equal_p (tmp, out))
16644 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
16646 return 1; /* DONE */
16651 enum machine_mode cmp_mode = GET_MODE (op0);
16654 tmp = ct, ct = cf, cf = tmp;
16657 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16659 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16661 /* We may be reversing unordered compare to normal compare, that
16662 is not valid in general (we may convert non-trapping condition
16663 to trapping one), however on i386 we currently emit all
16664 comparisons unordered. */
16665 compare_code = reverse_condition_maybe_unordered (compare_code);
16666 code = reverse_condition_maybe_unordered (code);
16670 compare_code = reverse_condition (compare_code);
16671 code = reverse_condition (code);
16675 compare_code = UNKNOWN;
16676 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
16677 && CONST_INT_P (op1))
16679 if (op1 == const0_rtx
16680 && (code == LT || code == GE))
16681 compare_code = code;
16682 else if (op1 == constm1_rtx)
16686 else if (code == GT)
16691 /* Optimize dest = (op0 < 0) ? -1 : cf. */
16692 if (compare_code != UNKNOWN
16693 && GET_MODE (op0) == GET_MODE (out)
16694 && (cf == -1 || ct == -1))
16696 /* If lea code below could be used, only optimize
16697 if it results in a 2 insn sequence. */
16699 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
16700 || diff == 3 || diff == 5 || diff == 9)
16701 || (compare_code == LT && ct == -1)
16702 || (compare_code == GE && cf == -1))
16705 * notl op1 (if necessary)
16713 code = reverse_condition (code);
16716 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16718 out = expand_simple_binop (mode, IOR,
16720 out, 1, OPTAB_DIRECT);
16721 if (out != operands[0])
16722 emit_move_insn (operands[0], out);
16724 return 1; /* DONE */
16729 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
16730 || diff == 3 || diff == 5 || diff == 9)
16731 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
16733 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
16739 * lea cf(dest*(ct-cf)),dest
16743 * This also catches the degenerate setcc-only case.
16749 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16752 /* On x86_64 the lea instruction operates on Pmode, so we need
16753 to get arithmetics done in proper mode to match. */
16755 tmp = copy_rtx (out);
16759 out1 = copy_rtx (out);
16760 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
16764 tmp = gen_rtx_PLUS (mode, tmp, out1);
16770 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
16773 if (!rtx_equal_p (tmp, out))
16776 out = force_operand (tmp, copy_rtx (out));
16778 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
16780 if (!rtx_equal_p (out, operands[0]))
16781 emit_move_insn (operands[0], copy_rtx (out));
16783 return 1; /* DONE */
16787 * General case: Jumpful:
16788 * xorl dest,dest cmpl op1, op2
16789 * cmpl op1, op2 movl ct, dest
16790 * setcc dest jcc 1f
16791 * decl dest movl cf, dest
16792 * andl (cf-ct),dest 1:
16795 * Size 20. Size 14.
16797 * This is reasonably steep, but branch mispredict costs are
16798 * high on modern cpus, so consider failing only if optimizing
16802 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16803 && BRANCH_COST (optimize_insn_for_speed_p (),
16808 enum machine_mode cmp_mode = GET_MODE (op0);
16813 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16815 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16817 /* We may be reversing unordered compare to normal compare,
16818 that is not valid in general (we may convert non-trapping
16819 condition to trapping one), however on i386 we currently
16820 emit all comparisons unordered. */
16821 code = reverse_condition_maybe_unordered (code);
16825 code = reverse_condition (code);
16826 if (compare_code != UNKNOWN)
16827 compare_code = reverse_condition (compare_code);
16831 if (compare_code != UNKNOWN)
16833 /* notl op1 (if needed)
16838 For x < 0 (resp. x <= -1) there will be no notl,
16839 so if possible swap the constants to get rid of the
16841 True/false will be -1/0 while code below (store flag
16842 followed by decrement) is 0/-1, so the constants need
16843 to be exchanged once more. */
16845 if (compare_code == GE || !cf)
16847 code = reverse_condition (code);
16852 HOST_WIDE_INT tmp = cf;
16857 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16861 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16863 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
16865 copy_rtx (out), 1, OPTAB_DIRECT);
16868 out = expand_simple_binop (mode, AND, copy_rtx (out),
16869 gen_int_mode (cf - ct, mode),
16870 copy_rtx (out), 1, OPTAB_DIRECT);
16872 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
16873 copy_rtx (out), 1, OPTAB_DIRECT);
16874 if (!rtx_equal_p (out, operands[0]))
16875 emit_move_insn (operands[0], copy_rtx (out));
16877 return 1; /* DONE */
16881 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16883 /* Try a few things more with specific constants and a variable. */
16886 rtx var, orig_out, out, tmp;
16888 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
16889 return 0; /* FAIL */
16891 /* If one of the two operands is an interesting constant, load a
16892 constant with the above and mask it in with a logical operation. */
16894 if (CONST_INT_P (operands[2]))
16897 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
16898 operands[3] = constm1_rtx, op = and_optab;
16899 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
16900 operands[3] = const0_rtx, op = ior_optab;
16902 return 0; /* FAIL */
16904 else if (CONST_INT_P (operands[3]))
16907 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
16908 operands[2] = constm1_rtx, op = and_optab;
16909 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
16910 operands[2] = const0_rtx, op = ior_optab;
16912 return 0; /* FAIL */
16915 return 0; /* FAIL */
16917 orig_out = operands[0];
16918 tmp = gen_reg_rtx (mode);
16921 /* Recurse to get the constant loaded. */
16922 if (ix86_expand_int_movcc (operands) == 0)
16923 return 0; /* FAIL */
16925 /* Mask in the interesting variable. */
16926 out = expand_binop (mode, op, var, tmp, orig_out, 0,
16928 if (!rtx_equal_p (out, orig_out))
16929 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
16931 return 1; /* DONE */
16935 * For comparison with above,
16945 if (! nonimmediate_operand (operands[2], mode))
16946 operands[2] = force_reg (mode, operands[2]);
16947 if (! nonimmediate_operand (operands[3], mode))
16948 operands[3] = force_reg (mode, operands[3]);
16950 if (! register_operand (operands[2], VOIDmode)
16952 || ! register_operand (operands[3], VOIDmode)))
16953 operands[2] = force_reg (mode, operands[2]);
16956 && ! register_operand (operands[3], VOIDmode))
16957 operands[3] = force_reg (mode, operands[3]);
16959 emit_insn (compare_seq);
16960 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16961 gen_rtx_IF_THEN_ELSE (mode,
16962 compare_op, operands[2],
16965 return 1; /* DONE */
16968 /* Swap, force into registers, or otherwise massage the two operands
16969 to an sse comparison with a mask result. Thus we differ a bit from
16970 ix86_prepare_fp_compare_args which expects to produce a flags result.
16972 The DEST operand exists to help determine whether to commute commutative
16973 operators. The POP0/POP1 operands are updated in place. The new
16974 comparison code is returned, or UNKNOWN if not implementable. */
16976 static enum rtx_code
16977 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
16978 rtx *pop0, rtx *pop1)
16986 /* We have no LTGT as an operator. We could implement it with
16987 NE & ORDERED, but this requires an extra temporary. It's
16988 not clear that it's worth it. */
16995 /* These are supported directly. */
17002 /* For commutative operators, try to canonicalize the destination
17003 operand to be first in the comparison - this helps reload to
17004 avoid extra moves. */
17005 if (!dest || !rtx_equal_p (dest, *pop1))
17013 /* These are not supported directly. Swap the comparison operands
17014 to transform into something that is supported. */
17018 code = swap_condition (code);
17022 gcc_unreachable ();
17028 /* Detect conditional moves that exactly match min/max operational
17029 semantics. Note that this is IEEE safe, as long as we don't
17030 interchange the operands.
17032 Returns FALSE if this conditional move doesn't match a MIN/MAX,
17033 and TRUE if the operation is successful and instructions are emitted. */
17036 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
17037 rtx cmp_op1, rtx if_true, rtx if_false)
17039 enum machine_mode mode;
17045 else if (code == UNGE)
17048 if_true = if_false;
17054 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
17056 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
17061 mode = GET_MODE (dest);
17063 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
17064 but MODE may be a vector mode and thus not appropriate. */
17065 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
17067 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
17070 if_true = force_reg (mode, if_true);
17071 v = gen_rtvec (2, if_true, if_false);
17072 tmp = gen_rtx_UNSPEC (mode, v, u);
17076 code = is_min ? SMIN : SMAX;
17077 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
17080 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
17084 /* Expand an sse vector comparison. Return the register with the result. */
17087 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
17088 rtx op_true, rtx op_false)
17090 enum machine_mode mode = GET_MODE (dest);
17093 cmp_op0 = force_reg (mode, cmp_op0);
17094 if (!nonimmediate_operand (cmp_op1, mode))
17095 cmp_op1 = force_reg (mode, cmp_op1);
17098 || reg_overlap_mentioned_p (dest, op_true)
17099 || reg_overlap_mentioned_p (dest, op_false))
17100 dest = gen_reg_rtx (mode);
17102 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
17103 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17108 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
17109 operations. This is used for both scalar and vector conditional moves. */
17112 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
17114 enum machine_mode mode = GET_MODE (dest);
17117 if (op_false == CONST0_RTX (mode))
17119 op_true = force_reg (mode, op_true);
17120 x = gen_rtx_AND (mode, cmp, op_true);
17121 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17123 else if (op_true == CONST0_RTX (mode))
17125 op_false = force_reg (mode, op_false);
17126 x = gen_rtx_NOT (mode, cmp);
17127 x = gen_rtx_AND (mode, x, op_false);
17128 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17130 else if (TARGET_XOP)
17132 rtx pcmov = gen_rtx_SET (mode, dest,
17133 gen_rtx_IF_THEN_ELSE (mode, cmp,
17140 op_true = force_reg (mode, op_true);
17141 op_false = force_reg (mode, op_false);
17143 t2 = gen_reg_rtx (mode);
17145 t3 = gen_reg_rtx (mode);
17149 x = gen_rtx_AND (mode, op_true, cmp);
17150 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
17152 x = gen_rtx_NOT (mode, cmp);
17153 x = gen_rtx_AND (mode, x, op_false);
17154 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
17156 x = gen_rtx_IOR (mode, t3, t2);
17157 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
17161 /* Expand a floating-point conditional move. Return true if successful. */
17164 ix86_expand_fp_movcc (rtx operands[])
17166 enum machine_mode mode = GET_MODE (operands[0]);
17167 enum rtx_code code = GET_CODE (operands[1]);
17168 rtx tmp, compare_op;
17169 rtx op0 = XEXP (operands[1], 0);
17170 rtx op1 = XEXP (operands[1], 1);
17172 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17174 enum machine_mode cmode;
17176 /* Since we've no cmove for sse registers, don't force bad register
17177 allocation just to gain access to it. Deny movcc when the
17178 comparison mode doesn't match the move mode. */
17179 cmode = GET_MODE (op0);
17180 if (cmode == VOIDmode)
17181 cmode = GET_MODE (op1);
17185 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
17186 if (code == UNKNOWN)
17189 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
17190 operands[2], operands[3]))
17193 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
17194 operands[2], operands[3]);
17195 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
17199 /* The floating point conditional move instructions don't directly
17200 support conditions resulting from a signed integer comparison. */
17202 compare_op = ix86_expand_compare (code, op0, op1);
17203 if (!fcmov_comparison_operator (compare_op, VOIDmode))
17205 tmp = gen_reg_rtx (QImode);
17206 ix86_expand_setcc (tmp, code, op0, op1);
17208 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17211 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17212 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17213 operands[2], operands[3])));
17218 /* Expand a floating-point vector conditional move; a vcond operation
17219 rather than a movcc operation. */
17222 ix86_expand_fp_vcond (rtx operands[])
17224 enum rtx_code code = GET_CODE (operands[3]);
17227 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17228 &operands[4], &operands[5]);
17229 if (code == UNKNOWN)
17232 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17233 operands[5], operands[1], operands[2]))
17236 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17237 operands[1], operands[2]);
17238 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17242 /* Expand a signed/unsigned integral vector conditional move. */
17245 ix86_expand_int_vcond (rtx operands[])
17247 enum machine_mode mode = GET_MODE (operands[0]);
17248 enum rtx_code code = GET_CODE (operands[3]);
17249 bool negate = false;
17252 cop0 = operands[4];
17253 cop1 = operands[5];
17255 /* XOP supports all of the comparisons on all vector int types. */
17258 /* Canonicalize the comparison to EQ, GT, GTU. */
17269 code = reverse_condition (code);
17275 code = reverse_condition (code);
17281 code = swap_condition (code);
17282 x = cop0, cop0 = cop1, cop1 = x;
17286 gcc_unreachable ();
17289 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17290 if (mode == V2DImode)
17295 /* SSE4.1 supports EQ. */
17296 if (!TARGET_SSE4_1)
17302 /* SSE4.2 supports GT/GTU. */
17303 if (!TARGET_SSE4_2)
17308 gcc_unreachable ();
17312 /* Unsigned parallel compare is not supported by the hardware.
17313 Play some tricks to turn this into a signed comparison
17317 cop0 = force_reg (mode, cop0);
17325 rtx (*gen_sub3) (rtx, rtx, rtx);
17327 /* Subtract (-(INT MAX) - 1) from both operands to make
17329 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17331 gen_sub3 = (mode == V4SImode
17332 ? gen_subv4si3 : gen_subv2di3);
17333 t1 = gen_reg_rtx (mode);
17334 emit_insn (gen_sub3 (t1, cop0, mask));
17336 t2 = gen_reg_rtx (mode);
17337 emit_insn (gen_sub3 (t2, cop1, mask));
17347 /* Perform a parallel unsigned saturating subtraction. */
17348 x = gen_reg_rtx (mode);
17349 emit_insn (gen_rtx_SET (VOIDmode, x,
17350 gen_rtx_US_MINUS (mode, cop0, cop1)));
17353 cop1 = CONST0_RTX (mode);
17359 gcc_unreachable ();
17364 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17365 operands[1+negate], operands[2-negate]);
17367 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17368 operands[2-negate]);
17372 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17373 true if we should do zero extension, else sign extension. HIGH_P is
17374 true if we want the N/2 high elements, else the low elements. */
17377 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17379 enum machine_mode imode = GET_MODE (operands[1]);
17380 rtx (*unpack)(rtx, rtx, rtx);
17387 unpack = gen_vec_interleave_highv16qi;
17389 unpack = gen_vec_interleave_lowv16qi;
17393 unpack = gen_vec_interleave_highv8hi;
17395 unpack = gen_vec_interleave_lowv8hi;
17399 unpack = gen_vec_interleave_highv4si;
17401 unpack = gen_vec_interleave_lowv4si;
17404 gcc_unreachable ();
17407 dest = gen_lowpart (imode, operands[0]);
17410 se = force_reg (imode, CONST0_RTX (imode));
17412 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17413 operands[1], pc_rtx, pc_rtx);
17415 emit_insn (unpack (dest, operands[1], se));
17418 /* This function performs the same task as ix86_expand_sse_unpack,
17419 but with SSE4.1 instructions. */
17422 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17424 enum machine_mode imode = GET_MODE (operands[1]);
17425 rtx (*unpack)(rtx, rtx);
17432 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17434 unpack = gen_sse4_1_sign_extendv8qiv8hi2;
17438 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17440 unpack = gen_sse4_1_sign_extendv4hiv4si2;
17444 unpack = gen_sse4_1_zero_extendv2siv2di2;
17446 unpack = gen_sse4_1_sign_extendv2siv2di2;
17449 gcc_unreachable ();
17452 dest = operands[0];
17455 /* Shift higher 8 bytes to lower 8 bytes. */
17456 src = gen_reg_rtx (imode);
17457 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17458 gen_lowpart (V1TImode, operands[1]),
17464 emit_insn (unpack (dest, src));
17467 /* Expand conditional increment or decrement using adb/sbb instructions.
17468 The default case using setcc followed by the conditional move can be
17469 done by generic code. */
17471 ix86_expand_int_addcc (rtx operands[])
17473 enum rtx_code code = GET_CODE (operands[1]);
17475 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17477 rtx val = const0_rtx;
17478 bool fpcmp = false;
17479 enum machine_mode mode;
17480 rtx op0 = XEXP (operands[1], 0);
17481 rtx op1 = XEXP (operands[1], 1);
17483 if (operands[3] != const1_rtx
17484 && operands[3] != constm1_rtx)
17486 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17488 code = GET_CODE (compare_op);
17490 flags = XEXP (compare_op, 0);
17492 if (GET_MODE (flags) == CCFPmode
17493 || GET_MODE (flags) == CCFPUmode)
17496 code = ix86_fp_compare_code_to_integer (code);
17503 PUT_CODE (compare_op,
17504 reverse_condition_maybe_unordered
17505 (GET_CODE (compare_op)));
17507 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
17510 mode = GET_MODE (operands[0]);
17512 /* Construct either adc or sbb insn. */
17513 if ((code == LTU) == (operands[3] == constm1_rtx))
17518 insn = gen_subqi3_carry;
17521 insn = gen_subhi3_carry;
17524 insn = gen_subsi3_carry;
17527 insn = gen_subdi3_carry;
17530 gcc_unreachable ();
17538 insn = gen_addqi3_carry;
17541 insn = gen_addhi3_carry;
17544 insn = gen_addsi3_carry;
17547 insn = gen_adddi3_carry;
17550 gcc_unreachable ();
17553 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
17555 return 1; /* DONE */
17559 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
17560 works for floating pointer parameters and nonoffsetable memories.
17561 For pushes, it returns just stack offsets; the values will be saved
17562 in the right order. Maximally three parts are generated. */
17565 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
17570 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
17572 size = (GET_MODE_SIZE (mode) + 4) / 8;
17574 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
17575 gcc_assert (size >= 2 && size <= 4);
17577 /* Optimize constant pool reference to immediates. This is used by fp
17578 moves, that force all constants to memory to allow combining. */
17579 if (MEM_P (operand) && MEM_READONLY_P (operand))
17581 rtx tmp = maybe_get_pool_constant (operand);
17586 if (MEM_P (operand) && !offsettable_memref_p (operand))
17588 /* The only non-offsetable memories we handle are pushes. */
17589 int ok = push_operand (operand, VOIDmode);
17593 operand = copy_rtx (operand);
17594 PUT_MODE (operand, Pmode);
17595 parts[0] = parts[1] = parts[2] = parts[3] = operand;
17599 if (GET_CODE (operand) == CONST_VECTOR)
17601 enum machine_mode imode = int_mode_for_mode (mode);
17602 /* Caution: if we looked through a constant pool memory above,
17603 the operand may actually have a different mode now. That's
17604 ok, since we want to pun this all the way back to an integer. */
17605 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
17606 gcc_assert (operand != NULL);
17612 if (mode == DImode)
17613 split_di (&operand, 1, &parts[0], &parts[1]);
17618 if (REG_P (operand))
17620 gcc_assert (reload_completed);
17621 for (i = 0; i < size; i++)
17622 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
17624 else if (offsettable_memref_p (operand))
17626 operand = adjust_address (operand, SImode, 0);
17627 parts[0] = operand;
17628 for (i = 1; i < size; i++)
17629 parts[i] = adjust_address (operand, SImode, 4 * i);
17631 else if (GET_CODE (operand) == CONST_DOUBLE)
17636 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17640 real_to_target (l, &r, mode);
17641 parts[3] = gen_int_mode (l[3], SImode);
17642 parts[2] = gen_int_mode (l[2], SImode);
17645 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
17646 parts[2] = gen_int_mode (l[2], SImode);
17649 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
17652 gcc_unreachable ();
17654 parts[1] = gen_int_mode (l[1], SImode);
17655 parts[0] = gen_int_mode (l[0], SImode);
17658 gcc_unreachable ();
17663 if (mode == TImode)
17664 split_ti (&operand, 1, &parts[0], &parts[1]);
17665 if (mode == XFmode || mode == TFmode)
17667 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
17668 if (REG_P (operand))
17670 gcc_assert (reload_completed);
17671 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
17672 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
17674 else if (offsettable_memref_p (operand))
17676 operand = adjust_address (operand, DImode, 0);
17677 parts[0] = operand;
17678 parts[1] = adjust_address (operand, upper_mode, 8);
17680 else if (GET_CODE (operand) == CONST_DOUBLE)
17685 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17686 real_to_target (l, &r, mode);
17688 /* Do not use shift by 32 to avoid warning on 32bit systems. */
17689 if (HOST_BITS_PER_WIDE_INT >= 64)
17692 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
17693 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
17696 parts[0] = immed_double_const (l[0], l[1], DImode);
17698 if (upper_mode == SImode)
17699 parts[1] = gen_int_mode (l[2], SImode);
17700 else if (HOST_BITS_PER_WIDE_INT >= 64)
17703 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
17704 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
17707 parts[1] = immed_double_const (l[2], l[3], DImode);
17710 gcc_unreachable ();
17717 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
17718 Return false when normal moves are needed; true when all required
17719 insns have been emitted. Operands 2-4 contain the input values
17720 int the correct order; operands 5-7 contain the output values. */
17723 ix86_split_long_move (rtx operands[])
17728 int collisions = 0;
17729 enum machine_mode mode = GET_MODE (operands[0]);
17730 bool collisionparts[4];
17732 /* The DFmode expanders may ask us to move double.
17733 For 64bit target this is single move. By hiding the fact
17734 here we simplify i386.md splitters. */
17735 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
17737 /* Optimize constant pool reference to immediates. This is used by
17738 fp moves, that force all constants to memory to allow combining. */
17740 if (MEM_P (operands[1])
17741 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
17742 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
17743 operands[1] = get_pool_constant (XEXP (operands[1], 0));
17744 if (push_operand (operands[0], VOIDmode))
17746 operands[0] = copy_rtx (operands[0]);
17747 PUT_MODE (operands[0], Pmode);
17750 operands[0] = gen_lowpart (DImode, operands[0]);
17751 operands[1] = gen_lowpart (DImode, operands[1]);
17752 emit_move_insn (operands[0], operands[1]);
17756 /* The only non-offsettable memory we handle is push. */
17757 if (push_operand (operands[0], VOIDmode))
17760 gcc_assert (!MEM_P (operands[0])
17761 || offsettable_memref_p (operands[0]));
17763 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
17764 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
17766 /* When emitting push, take care for source operands on the stack. */
17767 if (push && MEM_P (operands[1])
17768 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
17770 rtx src_base = XEXP (part[1][nparts - 1], 0);
17772 /* Compensate for the stack decrement by 4. */
17773 if (!TARGET_64BIT && nparts == 3
17774 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
17775 src_base = plus_constant (src_base, 4);
17777 /* src_base refers to the stack pointer and is
17778 automatically decreased by emitted push. */
17779 for (i = 0; i < nparts; i++)
17780 part[1][i] = change_address (part[1][i],
17781 GET_MODE (part[1][i]), src_base);
17784 /* We need to do copy in the right order in case an address register
17785 of the source overlaps the destination. */
17786 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
17790 for (i = 0; i < nparts; i++)
17793 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
17794 if (collisionparts[i])
17798 /* Collision in the middle part can be handled by reordering. */
17799 if (collisions == 1 && nparts == 3 && collisionparts [1])
17801 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17802 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17804 else if (collisions == 1
17806 && (collisionparts [1] || collisionparts [2]))
17808 if (collisionparts [1])
17810 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17811 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17815 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
17816 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
17820 /* If there are more collisions, we can't handle it by reordering.
17821 Do an lea to the last part and use only one colliding move. */
17822 else if (collisions > 1)
17828 base = part[0][nparts - 1];
17830 /* Handle the case when the last part isn't valid for lea.
17831 Happens in 64-bit mode storing the 12-byte XFmode. */
17832 if (GET_MODE (base) != Pmode)
17833 base = gen_rtx_REG (Pmode, REGNO (base));
17835 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
17836 part[1][0] = replace_equiv_address (part[1][0], base);
17837 for (i = 1; i < nparts; i++)
17839 tmp = plus_constant (base, UNITS_PER_WORD * i);
17840 part[1][i] = replace_equiv_address (part[1][i], tmp);
17851 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
17852 emit_insn (gen_addsi3 (stack_pointer_rtx,
17853 stack_pointer_rtx, GEN_INT (-4)));
17854 emit_move_insn (part[0][2], part[1][2]);
17856 else if (nparts == 4)
17858 emit_move_insn (part[0][3], part[1][3]);
17859 emit_move_insn (part[0][2], part[1][2]);
17864 /* In 64bit mode we don't have 32bit push available. In case this is
17865 register, it is OK - we will just use larger counterpart. We also
17866 retype memory - these comes from attempt to avoid REX prefix on
17867 moving of second half of TFmode value. */
17868 if (GET_MODE (part[1][1]) == SImode)
17870 switch (GET_CODE (part[1][1]))
17873 part[1][1] = adjust_address (part[1][1], DImode, 0);
17877 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
17881 gcc_unreachable ();
17884 if (GET_MODE (part[1][0]) == SImode)
17885 part[1][0] = part[1][1];
17888 emit_move_insn (part[0][1], part[1][1]);
17889 emit_move_insn (part[0][0], part[1][0]);
17893 /* Choose correct order to not overwrite the source before it is copied. */
17894 if ((REG_P (part[0][0])
17895 && REG_P (part[1][1])
17896 && (REGNO (part[0][0]) == REGNO (part[1][1])
17898 && REGNO (part[0][0]) == REGNO (part[1][2]))
17900 && REGNO (part[0][0]) == REGNO (part[1][3]))))
17902 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
17904 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
17906 operands[2 + i] = part[0][j];
17907 operands[6 + i] = part[1][j];
17912 for (i = 0; i < nparts; i++)
17914 operands[2 + i] = part[0][i];
17915 operands[6 + i] = part[1][i];
17919 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
17920 if (optimize_insn_for_size_p ())
17922 for (j = 0; j < nparts - 1; j++)
17923 if (CONST_INT_P (operands[6 + j])
17924 && operands[6 + j] != const0_rtx
17925 && REG_P (operands[2 + j]))
17926 for (i = j; i < nparts - 1; i++)
17927 if (CONST_INT_P (operands[7 + i])
17928 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
17929 operands[7 + i] = operands[2 + j];
17932 for (i = 0; i < nparts; i++)
17933 emit_move_insn (operands[2 + i], operands[6 + i]);
17938 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
17939 left shift by a constant, either using a single shift or
17940 a sequence of add instructions. */
17943 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
17947 emit_insn ((mode == DImode
17949 : gen_adddi3) (operand, operand, operand));
17951 else if (!optimize_insn_for_size_p ()
17952 && count * ix86_cost->add <= ix86_cost->shift_const)
17955 for (i=0; i<count; i++)
17957 emit_insn ((mode == DImode
17959 : gen_adddi3) (operand, operand, operand));
17963 emit_insn ((mode == DImode
17965 : gen_ashldi3) (operand, operand, GEN_INT (count)));
17969 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
17971 rtx low[2], high[2];
17973 const int single_width = mode == DImode ? 32 : 64;
17975 if (CONST_INT_P (operands[2]))
17977 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17978 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17980 if (count >= single_width)
17982 emit_move_insn (high[0], low[1]);
17983 emit_move_insn (low[0], const0_rtx);
17985 if (count > single_width)
17986 ix86_expand_ashl_const (high[0], count - single_width, mode);
17990 if (!rtx_equal_p (operands[0], operands[1]))
17991 emit_move_insn (operands[0], operands[1]);
17992 emit_insn ((mode == DImode
17994 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
17995 ix86_expand_ashl_const (low[0], count, mode);
18000 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18002 if (operands[1] == const1_rtx)
18004 /* Assuming we've chosen a QImode capable registers, then 1 << N
18005 can be done with two 32/64-bit shifts, no branches, no cmoves. */
18006 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
18008 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
18010 ix86_expand_clear (low[0]);
18011 ix86_expand_clear (high[0]);
18012 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
18014 d = gen_lowpart (QImode, low[0]);
18015 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18016 s = gen_rtx_EQ (QImode, flags, const0_rtx);
18017 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18019 d = gen_lowpart (QImode, high[0]);
18020 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
18021 s = gen_rtx_NE (QImode, flags, const0_rtx);
18022 emit_insn (gen_rtx_SET (VOIDmode, d, s));
18025 /* Otherwise, we can get the same results by manually performing
18026 a bit extract operation on bit 5/6, and then performing the two
18027 shifts. The two methods of getting 0/1 into low/high are exactly
18028 the same size. Avoiding the shift in the bit extract case helps
18029 pentium4 a bit; no one else seems to care much either way. */
18034 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
18035 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
18037 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
18038 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
18040 emit_insn ((mode == DImode
18042 : gen_lshrdi3) (high[0], high[0],
18043 GEN_INT (mode == DImode ? 5 : 6)));
18044 emit_insn ((mode == DImode
18046 : gen_anddi3) (high[0], high[0], const1_rtx));
18047 emit_move_insn (low[0], high[0]);
18048 emit_insn ((mode == DImode
18050 : gen_xordi3) (low[0], low[0], const1_rtx));
18053 emit_insn ((mode == DImode
18055 : gen_ashldi3) (low[0], low[0], operands[2]));
18056 emit_insn ((mode == DImode
18058 : gen_ashldi3) (high[0], high[0], operands[2]));
18062 if (operands[1] == constm1_rtx)
18064 /* For -1 << N, we can avoid the shld instruction, because we
18065 know that we're shifting 0...31/63 ones into a -1. */
18066 emit_move_insn (low[0], constm1_rtx);
18067 if (optimize_insn_for_size_p ())
18068 emit_move_insn (high[0], low[0]);
18070 emit_move_insn (high[0], constm1_rtx);
18074 if (!rtx_equal_p (operands[0], operands[1]))
18075 emit_move_insn (operands[0], operands[1]);
18077 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18078 emit_insn ((mode == DImode
18080 : gen_x86_64_shld) (high[0], low[0], operands[2]));
18083 emit_insn ((mode == DImode
18085 : gen_ashldi3) (low[0], low[0], operands[2]));
18087 if (TARGET_CMOVE && scratch)
18089 ix86_expand_clear (scratch);
18090 emit_insn ((mode == DImode
18091 ? gen_x86_shiftsi_adj_1
18092 : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
18096 emit_insn ((mode == DImode
18097 ? gen_x86_shiftsi_adj_2
18098 : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
18102 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
18104 rtx low[2], high[2];
18106 const int single_width = mode == DImode ? 32 : 64;
18108 if (CONST_INT_P (operands[2]))
18110 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18111 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18113 if (count == single_width * 2 - 1)
18115 emit_move_insn (high[0], high[1]);
18116 emit_insn ((mode == DImode
18118 : gen_ashrdi3) (high[0], high[0],
18119 GEN_INT (single_width - 1)));
18120 emit_move_insn (low[0], high[0]);
18123 else if (count >= single_width)
18125 emit_move_insn (low[0], high[1]);
18126 emit_move_insn (high[0], low[0]);
18127 emit_insn ((mode == DImode
18129 : gen_ashrdi3) (high[0], high[0],
18130 GEN_INT (single_width - 1)));
18131 if (count > single_width)
18132 emit_insn ((mode == DImode
18134 : gen_ashrdi3) (low[0], low[0],
18135 GEN_INT (count - single_width)));
18139 if (!rtx_equal_p (operands[0], operands[1]))
18140 emit_move_insn (operands[0], operands[1]);
18141 emit_insn ((mode == DImode
18143 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18144 emit_insn ((mode == DImode
18146 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
18151 if (!rtx_equal_p (operands[0], operands[1]))
18152 emit_move_insn (operands[0], operands[1]);
18154 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18156 emit_insn ((mode == DImode
18158 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18159 emit_insn ((mode == DImode
18161 : gen_ashrdi3) (high[0], high[0], operands[2]));
18163 if (TARGET_CMOVE && scratch)
18165 emit_move_insn (scratch, high[0]);
18166 emit_insn ((mode == DImode
18168 : gen_ashrdi3) (scratch, scratch,
18169 GEN_INT (single_width - 1)));
18170 emit_insn ((mode == DImode
18171 ? gen_x86_shiftsi_adj_1
18172 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18176 emit_insn ((mode == DImode
18177 ? gen_x86_shiftsi_adj_3
18178 : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
18183 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
18185 rtx low[2], high[2];
18187 const int single_width = mode == DImode ? 32 : 64;
18189 if (CONST_INT_P (operands[2]))
18191 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
18192 count = INTVAL (operands[2]) & (single_width * 2 - 1);
18194 if (count >= single_width)
18196 emit_move_insn (low[0], high[1]);
18197 ix86_expand_clear (high[0]);
18199 if (count > single_width)
18200 emit_insn ((mode == DImode
18202 : gen_lshrdi3) (low[0], low[0],
18203 GEN_INT (count - single_width)));
18207 if (!rtx_equal_p (operands[0], operands[1]))
18208 emit_move_insn (operands[0], operands[1]);
18209 emit_insn ((mode == DImode
18211 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18212 emit_insn ((mode == DImode
18214 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
18219 if (!rtx_equal_p (operands[0], operands[1]))
18220 emit_move_insn (operands[0], operands[1]);
18222 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18224 emit_insn ((mode == DImode
18226 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18227 emit_insn ((mode == DImode
18229 : gen_lshrdi3) (high[0], high[0], operands[2]));
18231 /* Heh. By reversing the arguments, we can reuse this pattern. */
18232 if (TARGET_CMOVE && scratch)
18234 ix86_expand_clear (scratch);
18235 emit_insn ((mode == DImode
18236 ? gen_x86_shiftsi_adj_1
18237 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18241 emit_insn ((mode == DImode
18242 ? gen_x86_shiftsi_adj_2
18243 : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
18247 /* Predict just emitted jump instruction to be taken with probability PROB. */
18249 predict_jump (int prob)
18251 rtx insn = get_last_insn ();
18252 gcc_assert (JUMP_P (insn));
18253 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18256 /* Helper function for the string operations below. Dest VARIABLE whether
18257 it is aligned to VALUE bytes. If true, jump to the label. */
18259 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18261 rtx label = gen_label_rtx ();
18262 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18263 if (GET_MODE (variable) == DImode)
18264 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18266 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18267 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18270 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18272 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18276 /* Adjust COUNTER by the VALUE. */
18278 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18280 if (GET_MODE (countreg) == DImode)
18281 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
18283 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
18286 /* Zero extend possibly SImode EXP to Pmode register. */
18288 ix86_zero_extend_to_Pmode (rtx exp)
18291 if (GET_MODE (exp) == VOIDmode)
18292 return force_reg (Pmode, exp);
18293 if (GET_MODE (exp) == Pmode)
18294 return copy_to_mode_reg (Pmode, exp);
18295 r = gen_reg_rtx (Pmode);
18296 emit_insn (gen_zero_extendsidi2 (r, exp));
18300 /* Divide COUNTREG by SCALE. */
18302 scale_counter (rtx countreg, int scale)
18308 if (CONST_INT_P (countreg))
18309 return GEN_INT (INTVAL (countreg) / scale);
18310 gcc_assert (REG_P (countreg));
18312 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18313 GEN_INT (exact_log2 (scale)),
18314 NULL, 1, OPTAB_DIRECT);
18318 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18319 DImode for constant loop counts. */
18321 static enum machine_mode
18322 counter_mode (rtx count_exp)
18324 if (GET_MODE (count_exp) != VOIDmode)
18325 return GET_MODE (count_exp);
18326 if (!CONST_INT_P (count_exp))
18328 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18333 /* When SRCPTR is non-NULL, output simple loop to move memory
18334 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18335 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18336 equivalent loop to set memory by VALUE (supposed to be in MODE).
18338 The size is rounded down to whole number of chunk size moved at once.
18339 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18343 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18344 rtx destptr, rtx srcptr, rtx value,
18345 rtx count, enum machine_mode mode, int unroll,
18348 rtx out_label, top_label, iter, tmp;
18349 enum machine_mode iter_mode = counter_mode (count);
18350 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18351 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18357 top_label = gen_label_rtx ();
18358 out_label = gen_label_rtx ();
18359 iter = gen_reg_rtx (iter_mode);
18361 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18362 NULL, 1, OPTAB_DIRECT);
18363 /* Those two should combine. */
18364 if (piece_size == const1_rtx)
18366 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18368 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18370 emit_move_insn (iter, const0_rtx);
18372 emit_label (top_label);
18374 tmp = convert_modes (Pmode, iter_mode, iter, true);
18375 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18376 destmem = change_address (destmem, mode, x_addr);
18380 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18381 srcmem = change_address (srcmem, mode, y_addr);
18383 /* When unrolling for chips that reorder memory reads and writes,
18384 we can save registers by using single temporary.
18385 Also using 4 temporaries is overkill in 32bit mode. */
18386 if (!TARGET_64BIT && 0)
18388 for (i = 0; i < unroll; i++)
18393 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18395 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18397 emit_move_insn (destmem, srcmem);
18403 gcc_assert (unroll <= 4);
18404 for (i = 0; i < unroll; i++)
18406 tmpreg[i] = gen_reg_rtx (mode);
18410 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18412 emit_move_insn (tmpreg[i], srcmem);
18414 for (i = 0; i < unroll; i++)
18419 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18421 emit_move_insn (destmem, tmpreg[i]);
18426 for (i = 0; i < unroll; i++)
18430 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18431 emit_move_insn (destmem, value);
18434 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18435 true, OPTAB_LIB_WIDEN);
18437 emit_move_insn (iter, tmp);
18439 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18441 if (expected_size != -1)
18443 expected_size /= GET_MODE_SIZE (mode) * unroll;
18444 if (expected_size == 0)
18446 else if (expected_size > REG_BR_PROB_BASE)
18447 predict_jump (REG_BR_PROB_BASE - 1);
18449 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18452 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18453 iter = ix86_zero_extend_to_Pmode (iter);
18454 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18455 true, OPTAB_LIB_WIDEN);
18456 if (tmp != destptr)
18457 emit_move_insn (destptr, tmp);
18460 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18461 true, OPTAB_LIB_WIDEN);
18463 emit_move_insn (srcptr, tmp);
18465 emit_label (out_label);
18468 /* Output "rep; mov" instruction.
18469 Arguments have same meaning as for previous function */
18471 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18472 rtx destptr, rtx srcptr,
18474 enum machine_mode mode)
18480 /* If the size is known, it is shorter to use rep movs. */
18481 if (mode == QImode && CONST_INT_P (count)
18482 && !(INTVAL (count) & 3))
18485 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18486 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18487 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
18488 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
18489 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18490 if (mode != QImode)
18492 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18493 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18494 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18495 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
18496 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18497 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
18501 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18502 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
18504 if (CONST_INT_P (count))
18506 count = GEN_INT (INTVAL (count)
18507 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18508 destmem = shallow_copy_rtx (destmem);
18509 srcmem = shallow_copy_rtx (srcmem);
18510 set_mem_size (destmem, count);
18511 set_mem_size (srcmem, count);
18515 if (MEM_SIZE (destmem))
18516 set_mem_size (destmem, NULL_RTX);
18517 if (MEM_SIZE (srcmem))
18518 set_mem_size (srcmem, NULL_RTX);
18520 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
18524 /* Output "rep; stos" instruction.
18525 Arguments have same meaning as for previous function */
18527 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
18528 rtx count, enum machine_mode mode,
18534 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18535 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18536 value = force_reg (mode, gen_lowpart (mode, value));
18537 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18538 if (mode != QImode)
18540 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18541 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18542 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18545 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18546 if (orig_value == const0_rtx && CONST_INT_P (count))
18548 count = GEN_INT (INTVAL (count)
18549 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18550 destmem = shallow_copy_rtx (destmem);
18551 set_mem_size (destmem, count);
18553 else if (MEM_SIZE (destmem))
18554 set_mem_size (destmem, NULL_RTX);
18555 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
18559 emit_strmov (rtx destmem, rtx srcmem,
18560 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
18562 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
18563 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
18564 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18567 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
18569 expand_movmem_epilogue (rtx destmem, rtx srcmem,
18570 rtx destptr, rtx srcptr, rtx count, int max_size)
18573 if (CONST_INT_P (count))
18575 HOST_WIDE_INT countval = INTVAL (count);
18578 if ((countval & 0x10) && max_size > 16)
18582 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18583 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
18586 gcc_unreachable ();
18589 if ((countval & 0x08) && max_size > 8)
18592 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18595 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18596 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
18600 if ((countval & 0x04) && max_size > 4)
18602 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18605 if ((countval & 0x02) && max_size > 2)
18607 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
18610 if ((countval & 0x01) && max_size > 1)
18612 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
18619 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
18620 count, 1, OPTAB_DIRECT);
18621 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
18622 count, QImode, 1, 4);
18626 /* When there are stringops, we can cheaply increase dest and src pointers.
18627 Otherwise we save code size by maintaining offset (zero is readily
18628 available from preceding rep operation) and using x86 addressing modes.
18630 if (TARGET_SINGLE_STRINGOP)
18634 rtx label = ix86_expand_aligntest (count, 4, true);
18635 src = change_address (srcmem, SImode, srcptr);
18636 dest = change_address (destmem, SImode, destptr);
18637 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18638 emit_label (label);
18639 LABEL_NUSES (label) = 1;
18643 rtx label = ix86_expand_aligntest (count, 2, true);
18644 src = change_address (srcmem, HImode, srcptr);
18645 dest = change_address (destmem, HImode, destptr);
18646 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18647 emit_label (label);
18648 LABEL_NUSES (label) = 1;
18652 rtx label = ix86_expand_aligntest (count, 1, true);
18653 src = change_address (srcmem, QImode, srcptr);
18654 dest = change_address (destmem, QImode, destptr);
18655 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18656 emit_label (label);
18657 LABEL_NUSES (label) = 1;
18662 rtx offset = force_reg (Pmode, const0_rtx);
18667 rtx label = ix86_expand_aligntest (count, 4, true);
18668 src = change_address (srcmem, SImode, srcptr);
18669 dest = change_address (destmem, SImode, destptr);
18670 emit_move_insn (dest, src);
18671 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
18672 true, OPTAB_LIB_WIDEN);
18674 emit_move_insn (offset, tmp);
18675 emit_label (label);
18676 LABEL_NUSES (label) = 1;
18680 rtx label = ix86_expand_aligntest (count, 2, true);
18681 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18682 src = change_address (srcmem, HImode, tmp);
18683 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18684 dest = change_address (destmem, HImode, tmp);
18685 emit_move_insn (dest, src);
18686 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
18687 true, OPTAB_LIB_WIDEN);
18689 emit_move_insn (offset, tmp);
18690 emit_label (label);
18691 LABEL_NUSES (label) = 1;
18695 rtx label = ix86_expand_aligntest (count, 1, true);
18696 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18697 src = change_address (srcmem, QImode, tmp);
18698 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18699 dest = change_address (destmem, QImode, tmp);
18700 emit_move_insn (dest, src);
18701 emit_label (label);
18702 LABEL_NUSES (label) = 1;
18707 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18709 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
18710 rtx count, int max_size)
18713 expand_simple_binop (counter_mode (count), AND, count,
18714 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
18715 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
18716 gen_lowpart (QImode, value), count, QImode,
18720 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18722 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
18726 if (CONST_INT_P (count))
18728 HOST_WIDE_INT countval = INTVAL (count);
18731 if ((countval & 0x10) && max_size > 16)
18735 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18736 emit_insn (gen_strset (destptr, dest, value));
18737 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
18738 emit_insn (gen_strset (destptr, dest, value));
18741 gcc_unreachable ();
18744 if ((countval & 0x08) && max_size > 8)
18748 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18749 emit_insn (gen_strset (destptr, dest, value));
18753 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18754 emit_insn (gen_strset (destptr, dest, value));
18755 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
18756 emit_insn (gen_strset (destptr, dest, value));
18760 if ((countval & 0x04) && max_size > 4)
18762 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18763 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18766 if ((countval & 0x02) && max_size > 2)
18768 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
18769 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18772 if ((countval & 0x01) && max_size > 1)
18774 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
18775 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18782 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
18787 rtx label = ix86_expand_aligntest (count, 16, true);
18790 dest = change_address (destmem, DImode, destptr);
18791 emit_insn (gen_strset (destptr, dest, value));
18792 emit_insn (gen_strset (destptr, dest, value));
18796 dest = change_address (destmem, SImode, destptr);
18797 emit_insn (gen_strset (destptr, dest, value));
18798 emit_insn (gen_strset (destptr, dest, value));
18799 emit_insn (gen_strset (destptr, dest, value));
18800 emit_insn (gen_strset (destptr, dest, value));
18802 emit_label (label);
18803 LABEL_NUSES (label) = 1;
18807 rtx label = ix86_expand_aligntest (count, 8, true);
18810 dest = change_address (destmem, DImode, destptr);
18811 emit_insn (gen_strset (destptr, dest, value));
18815 dest = change_address (destmem, SImode, destptr);
18816 emit_insn (gen_strset (destptr, dest, value));
18817 emit_insn (gen_strset (destptr, dest, value));
18819 emit_label (label);
18820 LABEL_NUSES (label) = 1;
18824 rtx label = ix86_expand_aligntest (count, 4, true);
18825 dest = change_address (destmem, SImode, destptr);
18826 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18827 emit_label (label);
18828 LABEL_NUSES (label) = 1;
18832 rtx label = ix86_expand_aligntest (count, 2, true);
18833 dest = change_address (destmem, HImode, destptr);
18834 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18835 emit_label (label);
18836 LABEL_NUSES (label) = 1;
18840 rtx label = ix86_expand_aligntest (count, 1, true);
18841 dest = change_address (destmem, QImode, destptr);
18842 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18843 emit_label (label);
18844 LABEL_NUSES (label) = 1;
18848 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
18849 DESIRED_ALIGNMENT. */
18851 expand_movmem_prologue (rtx destmem, rtx srcmem,
18852 rtx destptr, rtx srcptr, rtx count,
18853 int align, int desired_alignment)
18855 if (align <= 1 && desired_alignment > 1)
18857 rtx label = ix86_expand_aligntest (destptr, 1, false);
18858 srcmem = change_address (srcmem, QImode, srcptr);
18859 destmem = change_address (destmem, QImode, destptr);
18860 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18861 ix86_adjust_counter (count, 1);
18862 emit_label (label);
18863 LABEL_NUSES (label) = 1;
18865 if (align <= 2 && desired_alignment > 2)
18867 rtx label = ix86_expand_aligntest (destptr, 2, false);
18868 srcmem = change_address (srcmem, HImode, srcptr);
18869 destmem = change_address (destmem, HImode, destptr);
18870 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18871 ix86_adjust_counter (count, 2);
18872 emit_label (label);
18873 LABEL_NUSES (label) = 1;
18875 if (align <= 4 && desired_alignment > 4)
18877 rtx label = ix86_expand_aligntest (destptr, 4, false);
18878 srcmem = change_address (srcmem, SImode, srcptr);
18879 destmem = change_address (destmem, SImode, destptr);
18880 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18881 ix86_adjust_counter (count, 4);
18882 emit_label (label);
18883 LABEL_NUSES (label) = 1;
18885 gcc_assert (desired_alignment <= 8);
18888 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
18889 ALIGN_BYTES is how many bytes need to be copied. */
18891 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
18892 int desired_align, int align_bytes)
18895 rtx src_size, dst_size;
18897 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
18898 if (src_align_bytes >= 0)
18899 src_align_bytes = desired_align - src_align_bytes;
18900 src_size = MEM_SIZE (src);
18901 dst_size = MEM_SIZE (dst);
18902 if (align_bytes & 1)
18904 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18905 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
18907 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18909 if (align_bytes & 2)
18911 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18912 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
18913 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18914 set_mem_align (dst, 2 * BITS_PER_UNIT);
18915 if (src_align_bytes >= 0
18916 && (src_align_bytes & 1) == (align_bytes & 1)
18917 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
18918 set_mem_align (src, 2 * BITS_PER_UNIT);
18920 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18922 if (align_bytes & 4)
18924 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18925 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
18926 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18927 set_mem_align (dst, 4 * BITS_PER_UNIT);
18928 if (src_align_bytes >= 0)
18930 unsigned int src_align = 0;
18931 if ((src_align_bytes & 3) == (align_bytes & 3))
18933 else if ((src_align_bytes & 1) == (align_bytes & 1))
18935 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18936 set_mem_align (src, src_align * BITS_PER_UNIT);
18939 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18941 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18942 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
18943 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18944 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18945 if (src_align_bytes >= 0)
18947 unsigned int src_align = 0;
18948 if ((src_align_bytes & 7) == (align_bytes & 7))
18950 else if ((src_align_bytes & 3) == (align_bytes & 3))
18952 else if ((src_align_bytes & 1) == (align_bytes & 1))
18954 if (src_align > (unsigned int) desired_align)
18955 src_align = desired_align;
18956 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18957 set_mem_align (src, src_align * BITS_PER_UNIT);
18960 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18962 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
18967 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
18968 DESIRED_ALIGNMENT. */
18970 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
18971 int align, int desired_alignment)
18973 if (align <= 1 && desired_alignment > 1)
18975 rtx label = ix86_expand_aligntest (destptr, 1, false);
18976 destmem = change_address (destmem, QImode, destptr);
18977 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
18978 ix86_adjust_counter (count, 1);
18979 emit_label (label);
18980 LABEL_NUSES (label) = 1;
18982 if (align <= 2 && desired_alignment > 2)
18984 rtx label = ix86_expand_aligntest (destptr, 2, false);
18985 destmem = change_address (destmem, HImode, destptr);
18986 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
18987 ix86_adjust_counter (count, 2);
18988 emit_label (label);
18989 LABEL_NUSES (label) = 1;
18991 if (align <= 4 && desired_alignment > 4)
18993 rtx label = ix86_expand_aligntest (destptr, 4, false);
18994 destmem = change_address (destmem, SImode, destptr);
18995 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
18996 ix86_adjust_counter (count, 4);
18997 emit_label (label);
18998 LABEL_NUSES (label) = 1;
19000 gcc_assert (desired_alignment <= 8);
19003 /* Set enough from DST to align DST known to by aligned by ALIGN to
19004 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
19006 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
19007 int desired_align, int align_bytes)
19010 rtx dst_size = MEM_SIZE (dst);
19011 if (align_bytes & 1)
19013 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
19015 emit_insn (gen_strset (destreg, dst,
19016 gen_lowpart (QImode, value)));
19018 if (align_bytes & 2)
19020 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
19021 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
19022 set_mem_align (dst, 2 * BITS_PER_UNIT);
19024 emit_insn (gen_strset (destreg, dst,
19025 gen_lowpart (HImode, value)));
19027 if (align_bytes & 4)
19029 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
19030 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
19031 set_mem_align (dst, 4 * BITS_PER_UNIT);
19033 emit_insn (gen_strset (destreg, dst,
19034 gen_lowpart (SImode, value)));
19036 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
19037 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
19038 set_mem_align (dst, desired_align * BITS_PER_UNIT);
19040 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
19044 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
19045 static enum stringop_alg
19046 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
19047 int *dynamic_check)
19049 const struct stringop_algs * algs;
19050 bool optimize_for_speed;
19051 /* Algorithms using the rep prefix want at least edi and ecx;
19052 additionally, memset wants eax and memcpy wants esi. Don't
19053 consider such algorithms if the user has appropriated those
19054 registers for their own purposes. */
19055 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
19057 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
19059 #define ALG_USABLE_P(alg) (rep_prefix_usable \
19060 || (alg != rep_prefix_1_byte \
19061 && alg != rep_prefix_4_byte \
19062 && alg != rep_prefix_8_byte))
19063 const struct processor_costs *cost;
19065 /* Even if the string operation call is cold, we still might spend a lot
19066 of time processing large blocks. */
19067 if (optimize_function_for_size_p (cfun)
19068 || (optimize_insn_for_size_p ()
19069 && expected_size != -1 && expected_size < 256))
19070 optimize_for_speed = false;
19072 optimize_for_speed = true;
19074 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
19076 *dynamic_check = -1;
19078 algs = &cost->memset[TARGET_64BIT != 0];
19080 algs = &cost->memcpy[TARGET_64BIT != 0];
19081 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
19082 return stringop_alg;
19083 /* rep; movq or rep; movl is the smallest variant. */
19084 else if (!optimize_for_speed)
19086 if (!count || (count & 3))
19087 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
19089 return rep_prefix_usable ? rep_prefix_4_byte : loop;
19091 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
19093 else if (expected_size != -1 && expected_size < 4)
19094 return loop_1_byte;
19095 else if (expected_size != -1)
19098 enum stringop_alg alg = libcall;
19099 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
19101 /* We get here if the algorithms that were not libcall-based
19102 were rep-prefix based and we are unable to use rep prefixes
19103 based on global register usage. Break out of the loop and
19104 use the heuristic below. */
19105 if (algs->size[i].max == 0)
19107 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
19109 enum stringop_alg candidate = algs->size[i].alg;
19111 if (candidate != libcall && ALG_USABLE_P (candidate))
19113 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
19114 last non-libcall inline algorithm. */
19115 if (TARGET_INLINE_ALL_STRINGOPS)
19117 /* When the current size is best to be copied by a libcall,
19118 but we are still forced to inline, run the heuristic below
19119 that will pick code for medium sized blocks. */
19120 if (alg != libcall)
19124 else if (ALG_USABLE_P (candidate))
19128 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
19130 /* When asked to inline the call anyway, try to pick meaningful choice.
19131 We look for maximal size of block that is faster to copy by hand and
19132 take blocks of at most of that size guessing that average size will
19133 be roughly half of the block.
19135 If this turns out to be bad, we might simply specify the preferred
19136 choice in ix86_costs. */
19137 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19138 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
19141 enum stringop_alg alg;
19143 bool any_alg_usable_p = true;
19145 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
19147 enum stringop_alg candidate = algs->size[i].alg;
19148 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
19150 if (candidate != libcall && candidate
19151 && ALG_USABLE_P (candidate))
19152 max = algs->size[i].max;
19154 /* If there aren't any usable algorithms, then recursing on
19155 smaller sizes isn't going to find anything. Just return the
19156 simple byte-at-a-time copy loop. */
19157 if (!any_alg_usable_p)
19159 /* Pick something reasonable. */
19160 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19161 *dynamic_check = 128;
19162 return loop_1_byte;
19166 alg = decide_alg (count, max / 2, memset, dynamic_check);
19167 gcc_assert (*dynamic_check == -1);
19168 gcc_assert (alg != libcall);
19169 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
19170 *dynamic_check = max;
19173 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
19174 #undef ALG_USABLE_P
19177 /* Decide on alignment. We know that the operand is already aligned to ALIGN
19178 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
19180 decide_alignment (int align,
19181 enum stringop_alg alg,
19184 int desired_align = 0;
19188 gcc_unreachable ();
19190 case unrolled_loop:
19191 desired_align = GET_MODE_SIZE (Pmode);
19193 case rep_prefix_8_byte:
19196 case rep_prefix_4_byte:
19197 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19198 copying whole cacheline at once. */
19199 if (TARGET_PENTIUMPRO)
19204 case rep_prefix_1_byte:
19205 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
19206 copying whole cacheline at once. */
19207 if (TARGET_PENTIUMPRO)
19221 if (desired_align < align)
19222 desired_align = align;
19223 if (expected_size != -1 && expected_size < 4)
19224 desired_align = align;
19225 return desired_align;
19228 /* Return the smallest power of 2 greater than VAL. */
19230 smallest_pow2_greater_than (int val)
19238 /* Expand string move (memcpy) operation. Use i386 string operations when
19239 profitable. expand_setmem contains similar code. The code depends upon
19240 architecture, block size and alignment, but always has the same
19243 1) Prologue guard: Conditional that jumps up to epilogues for small
19244 blocks that can be handled by epilogue alone. This is faster but
19245 also needed for correctness, since prologue assume the block is larger
19246 than the desired alignment.
19248 Optional dynamic check for size and libcall for large
19249 blocks is emitted here too, with -minline-stringops-dynamically.
19251 2) Prologue: copy first few bytes in order to get destination aligned
19252 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19253 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19254 We emit either a jump tree on power of two sized blocks, or a byte loop.
19256 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19257 with specified algorithm.
19259 4) Epilogue: code copying tail of the block that is too small to be
19260 handled by main body (or up to size guarded by prologue guard). */
19263 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19264 rtx expected_align_exp, rtx expected_size_exp)
19270 rtx jump_around_label = NULL;
19271 HOST_WIDE_INT align = 1;
19272 unsigned HOST_WIDE_INT count = 0;
19273 HOST_WIDE_INT expected_size = -1;
19274 int size_needed = 0, epilogue_size_needed;
19275 int desired_align = 0, align_bytes = 0;
19276 enum stringop_alg alg;
19278 bool need_zero_guard = false;
19280 if (CONST_INT_P (align_exp))
19281 align = INTVAL (align_exp);
19282 /* i386 can do misaligned access on reasonably increased cost. */
19283 if (CONST_INT_P (expected_align_exp)
19284 && INTVAL (expected_align_exp) > align)
19285 align = INTVAL (expected_align_exp);
19286 /* ALIGN is the minimum of destination and source alignment, but we care here
19287 just about destination alignment. */
19288 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19289 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19291 if (CONST_INT_P (count_exp))
19292 count = expected_size = INTVAL (count_exp);
19293 if (CONST_INT_P (expected_size_exp) && count == 0)
19294 expected_size = INTVAL (expected_size_exp);
19296 /* Make sure we don't need to care about overflow later on. */
19297 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19300 /* Step 0: Decide on preferred algorithm, desired alignment and
19301 size of chunks to be copied by main loop. */
19303 alg = decide_alg (count, expected_size, false, &dynamic_check);
19304 desired_align = decide_alignment (align, alg, expected_size);
19306 if (!TARGET_ALIGN_STRINGOPS)
19307 align = desired_align;
19309 if (alg == libcall)
19311 gcc_assert (alg != no_stringop);
19313 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19314 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19315 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19320 gcc_unreachable ();
19322 need_zero_guard = true;
19323 size_needed = GET_MODE_SIZE (Pmode);
19325 case unrolled_loop:
19326 need_zero_guard = true;
19327 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19329 case rep_prefix_8_byte:
19332 case rep_prefix_4_byte:
19335 case rep_prefix_1_byte:
19339 need_zero_guard = true;
19344 epilogue_size_needed = size_needed;
19346 /* Step 1: Prologue guard. */
19348 /* Alignment code needs count to be in register. */
19349 if (CONST_INT_P (count_exp) && desired_align > align)
19351 if (INTVAL (count_exp) > desired_align
19352 && INTVAL (count_exp) > size_needed)
19355 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19356 if (align_bytes <= 0)
19359 align_bytes = desired_align - align_bytes;
19361 if (align_bytes == 0)
19362 count_exp = force_reg (counter_mode (count_exp), count_exp);
19364 gcc_assert (desired_align >= 1 && align >= 1);
19366 /* Ensure that alignment prologue won't copy past end of block. */
19367 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19369 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19370 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19371 Make sure it is power of 2. */
19372 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19376 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19378 /* If main algorithm works on QImode, no epilogue is needed.
19379 For small sizes just don't align anything. */
19380 if (size_needed == 1)
19381 desired_align = align;
19388 label = gen_label_rtx ();
19389 emit_cmp_and_jump_insns (count_exp,
19390 GEN_INT (epilogue_size_needed),
19391 LTU, 0, counter_mode (count_exp), 1, label);
19392 if (expected_size == -1 || expected_size < epilogue_size_needed)
19393 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19395 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19399 /* Emit code to decide on runtime whether library call or inline should be
19401 if (dynamic_check != -1)
19403 if (CONST_INT_P (count_exp))
19405 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19407 emit_block_move_via_libcall (dst, src, count_exp, false);
19408 count_exp = const0_rtx;
19414 rtx hot_label = gen_label_rtx ();
19415 jump_around_label = gen_label_rtx ();
19416 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19417 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19418 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19419 emit_block_move_via_libcall (dst, src, count_exp, false);
19420 emit_jump (jump_around_label);
19421 emit_label (hot_label);
19425 /* Step 2: Alignment prologue. */
19427 if (desired_align > align)
19429 if (align_bytes == 0)
19431 /* Except for the first move in epilogue, we no longer know
19432 constant offset in aliasing info. It don't seems to worth
19433 the pain to maintain it for the first move, so throw away
19435 src = change_address (src, BLKmode, srcreg);
19436 dst = change_address (dst, BLKmode, destreg);
19437 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19442 /* If we know how many bytes need to be stored before dst is
19443 sufficiently aligned, maintain aliasing info accurately. */
19444 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19445 desired_align, align_bytes);
19446 count_exp = plus_constant (count_exp, -align_bytes);
19447 count -= align_bytes;
19449 if (need_zero_guard
19450 && (count < (unsigned HOST_WIDE_INT) size_needed
19451 || (align_bytes == 0
19452 && count < ((unsigned HOST_WIDE_INT) size_needed
19453 + desired_align - align))))
19455 /* It is possible that we copied enough so the main loop will not
19457 gcc_assert (size_needed > 1);
19458 if (label == NULL_RTX)
19459 label = gen_label_rtx ();
19460 emit_cmp_and_jump_insns (count_exp,
19461 GEN_INT (size_needed),
19462 LTU, 0, counter_mode (count_exp), 1, label);
19463 if (expected_size == -1
19464 || expected_size < (desired_align - align) / 2 + size_needed)
19465 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19467 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19470 if (label && size_needed == 1)
19472 emit_label (label);
19473 LABEL_NUSES (label) = 1;
19475 epilogue_size_needed = 1;
19477 else if (label == NULL_RTX)
19478 epilogue_size_needed = size_needed;
19480 /* Step 3: Main loop. */
19486 gcc_unreachable ();
19488 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19489 count_exp, QImode, 1, expected_size);
19492 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19493 count_exp, Pmode, 1, expected_size);
19495 case unrolled_loop:
19496 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
19497 registers for 4 temporaries anyway. */
19498 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19499 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
19502 case rep_prefix_8_byte:
19503 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19506 case rep_prefix_4_byte:
19507 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19510 case rep_prefix_1_byte:
19511 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19515 /* Adjust properly the offset of src and dest memory for aliasing. */
19516 if (CONST_INT_P (count_exp))
19518 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
19519 (count / size_needed) * size_needed);
19520 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19521 (count / size_needed) * size_needed);
19525 src = change_address (src, BLKmode, srcreg);
19526 dst = change_address (dst, BLKmode, destreg);
19529 /* Step 4: Epilogue to copy the remaining bytes. */
19533 /* When the main loop is done, COUNT_EXP might hold original count,
19534 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19535 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19536 bytes. Compensate if needed. */
19538 if (size_needed < epilogue_size_needed)
19541 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19542 GEN_INT (size_needed - 1), count_exp, 1,
19544 if (tmp != count_exp)
19545 emit_move_insn (count_exp, tmp);
19547 emit_label (label);
19548 LABEL_NUSES (label) = 1;
19551 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19552 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
19553 epilogue_size_needed);
19554 if (jump_around_label)
19555 emit_label (jump_around_label);
19559 /* Helper function for memcpy. For QImode value 0xXY produce
19560 0xXYXYXYXY of wide specified by MODE. This is essentially
19561 a * 0x10101010, but we can do slightly better than
19562 synth_mult by unwinding the sequence by hand on CPUs with
19565 promote_duplicated_reg (enum machine_mode mode, rtx val)
19567 enum machine_mode valmode = GET_MODE (val);
19569 int nops = mode == DImode ? 3 : 2;
19571 gcc_assert (mode == SImode || mode == DImode);
19572 if (val == const0_rtx)
19573 return copy_to_mode_reg (mode, const0_rtx);
19574 if (CONST_INT_P (val))
19576 HOST_WIDE_INT v = INTVAL (val) & 255;
19580 if (mode == DImode)
19581 v |= (v << 16) << 16;
19582 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
19585 if (valmode == VOIDmode)
19587 if (valmode != QImode)
19588 val = gen_lowpart (QImode, val);
19589 if (mode == QImode)
19591 if (!TARGET_PARTIAL_REG_STALL)
19593 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
19594 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
19595 <= (ix86_cost->shift_const + ix86_cost->add) * nops
19596 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
19598 rtx reg = convert_modes (mode, QImode, val, true);
19599 tmp = promote_duplicated_reg (mode, const1_rtx);
19600 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
19605 rtx reg = convert_modes (mode, QImode, val, true);
19607 if (!TARGET_PARTIAL_REG_STALL)
19608 if (mode == SImode)
19609 emit_insn (gen_movsi_insv_1 (reg, reg));
19611 emit_insn (gen_movdi_insv_1 (reg, reg));
19614 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
19615 NULL, 1, OPTAB_DIRECT);
19617 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19619 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
19620 NULL, 1, OPTAB_DIRECT);
19621 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19622 if (mode == SImode)
19624 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
19625 NULL, 1, OPTAB_DIRECT);
19626 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19631 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
19632 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
19633 alignment from ALIGN to DESIRED_ALIGN. */
19635 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
19640 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
19641 promoted_val = promote_duplicated_reg (DImode, val);
19642 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
19643 promoted_val = promote_duplicated_reg (SImode, val);
19644 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
19645 promoted_val = promote_duplicated_reg (HImode, val);
19647 promoted_val = val;
19649 return promoted_val;
19652 /* Expand string clear operation (bzero). Use i386 string operations when
19653 profitable. See expand_movmem comment for explanation of individual
19654 steps performed. */
19656 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
19657 rtx expected_align_exp, rtx expected_size_exp)
19662 rtx jump_around_label = NULL;
19663 HOST_WIDE_INT align = 1;
19664 unsigned HOST_WIDE_INT count = 0;
19665 HOST_WIDE_INT expected_size = -1;
19666 int size_needed = 0, epilogue_size_needed;
19667 int desired_align = 0, align_bytes = 0;
19668 enum stringop_alg alg;
19669 rtx promoted_val = NULL;
19670 bool force_loopy_epilogue = false;
19672 bool need_zero_guard = false;
19674 if (CONST_INT_P (align_exp))
19675 align = INTVAL (align_exp);
19676 /* i386 can do misaligned access on reasonably increased cost. */
19677 if (CONST_INT_P (expected_align_exp)
19678 && INTVAL (expected_align_exp) > align)
19679 align = INTVAL (expected_align_exp);
19680 if (CONST_INT_P (count_exp))
19681 count = expected_size = INTVAL (count_exp);
19682 if (CONST_INT_P (expected_size_exp) && count == 0)
19683 expected_size = INTVAL (expected_size_exp);
19685 /* Make sure we don't need to care about overflow later on. */
19686 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19689 /* Step 0: Decide on preferred algorithm, desired alignment and
19690 size of chunks to be copied by main loop. */
19692 alg = decide_alg (count, expected_size, true, &dynamic_check);
19693 desired_align = decide_alignment (align, alg, expected_size);
19695 if (!TARGET_ALIGN_STRINGOPS)
19696 align = desired_align;
19698 if (alg == libcall)
19700 gcc_assert (alg != no_stringop);
19702 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
19703 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19708 gcc_unreachable ();
19710 need_zero_guard = true;
19711 size_needed = GET_MODE_SIZE (Pmode);
19713 case unrolled_loop:
19714 need_zero_guard = true;
19715 size_needed = GET_MODE_SIZE (Pmode) * 4;
19717 case rep_prefix_8_byte:
19720 case rep_prefix_4_byte:
19723 case rep_prefix_1_byte:
19727 need_zero_guard = true;
19731 epilogue_size_needed = size_needed;
19733 /* Step 1: Prologue guard. */
19735 /* Alignment code needs count to be in register. */
19736 if (CONST_INT_P (count_exp) && desired_align > align)
19738 if (INTVAL (count_exp) > desired_align
19739 && INTVAL (count_exp) > size_needed)
19742 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19743 if (align_bytes <= 0)
19746 align_bytes = desired_align - align_bytes;
19748 if (align_bytes == 0)
19750 enum machine_mode mode = SImode;
19751 if (TARGET_64BIT && (count & ~0xffffffff))
19753 count_exp = force_reg (mode, count_exp);
19756 /* Do the cheap promotion to allow better CSE across the
19757 main loop and epilogue (ie one load of the big constant in the
19758 front of all code. */
19759 if (CONST_INT_P (val_exp))
19760 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19761 desired_align, align);
19762 /* Ensure that alignment prologue won't copy past end of block. */
19763 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19765 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19766 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
19767 Make sure it is power of 2. */
19768 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19770 /* To improve performance of small blocks, we jump around the VAL
19771 promoting mode. This mean that if the promoted VAL is not constant,
19772 we might not use it in the epilogue and have to use byte
19774 if (epilogue_size_needed > 2 && !promoted_val)
19775 force_loopy_epilogue = true;
19778 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19780 /* If main algorithm works on QImode, no epilogue is needed.
19781 For small sizes just don't align anything. */
19782 if (size_needed == 1)
19783 desired_align = align;
19790 label = gen_label_rtx ();
19791 emit_cmp_and_jump_insns (count_exp,
19792 GEN_INT (epilogue_size_needed),
19793 LTU, 0, counter_mode (count_exp), 1, label);
19794 if (expected_size == -1 || expected_size <= epilogue_size_needed)
19795 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19797 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19800 if (dynamic_check != -1)
19802 rtx hot_label = gen_label_rtx ();
19803 jump_around_label = gen_label_rtx ();
19804 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19805 LEU, 0, counter_mode (count_exp), 1, hot_label);
19806 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19807 set_storage_via_libcall (dst, count_exp, val_exp, false);
19808 emit_jump (jump_around_label);
19809 emit_label (hot_label);
19812 /* Step 2: Alignment prologue. */
19814 /* Do the expensive promotion once we branched off the small blocks. */
19816 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19817 desired_align, align);
19818 gcc_assert (desired_align >= 1 && align >= 1);
19820 if (desired_align > align)
19822 if (align_bytes == 0)
19824 /* Except for the first move in epilogue, we no longer know
19825 constant offset in aliasing info. It don't seems to worth
19826 the pain to maintain it for the first move, so throw away
19828 dst = change_address (dst, BLKmode, destreg);
19829 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
19834 /* If we know how many bytes need to be stored before dst is
19835 sufficiently aligned, maintain aliasing info accurately. */
19836 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
19837 desired_align, align_bytes);
19838 count_exp = plus_constant (count_exp, -align_bytes);
19839 count -= align_bytes;
19841 if (need_zero_guard
19842 && (count < (unsigned HOST_WIDE_INT) size_needed
19843 || (align_bytes == 0
19844 && count < ((unsigned HOST_WIDE_INT) size_needed
19845 + desired_align - align))))
19847 /* It is possible that we copied enough so the main loop will not
19849 gcc_assert (size_needed > 1);
19850 if (label == NULL_RTX)
19851 label = gen_label_rtx ();
19852 emit_cmp_and_jump_insns (count_exp,
19853 GEN_INT (size_needed),
19854 LTU, 0, counter_mode (count_exp), 1, label);
19855 if (expected_size == -1
19856 || expected_size < (desired_align - align) / 2 + size_needed)
19857 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19859 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19862 if (label && size_needed == 1)
19864 emit_label (label);
19865 LABEL_NUSES (label) = 1;
19867 promoted_val = val_exp;
19868 epilogue_size_needed = 1;
19870 else if (label == NULL_RTX)
19871 epilogue_size_needed = size_needed;
19873 /* Step 3: Main loop. */
19879 gcc_unreachable ();
19881 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19882 count_exp, QImode, 1, expected_size);
19885 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19886 count_exp, Pmode, 1, expected_size);
19888 case unrolled_loop:
19889 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19890 count_exp, Pmode, 4, expected_size);
19892 case rep_prefix_8_byte:
19893 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19896 case rep_prefix_4_byte:
19897 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19900 case rep_prefix_1_byte:
19901 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19905 /* Adjust properly the offset of src and dest memory for aliasing. */
19906 if (CONST_INT_P (count_exp))
19907 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19908 (count / size_needed) * size_needed);
19910 dst = change_address (dst, BLKmode, destreg);
19912 /* Step 4: Epilogue to copy the remaining bytes. */
19916 /* When the main loop is done, COUNT_EXP might hold original count,
19917 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19918 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19919 bytes. Compensate if needed. */
19921 if (size_needed < epilogue_size_needed)
19924 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19925 GEN_INT (size_needed - 1), count_exp, 1,
19927 if (tmp != count_exp)
19928 emit_move_insn (count_exp, tmp);
19930 emit_label (label);
19931 LABEL_NUSES (label) = 1;
19934 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19936 if (force_loopy_epilogue)
19937 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
19938 epilogue_size_needed);
19940 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
19941 epilogue_size_needed);
19943 if (jump_around_label)
19944 emit_label (jump_around_label);
19948 /* Expand the appropriate insns for doing strlen if not just doing
19951 out = result, initialized with the start address
19952 align_rtx = alignment of the address.
19953 scratch = scratch register, initialized with the startaddress when
19954 not aligned, otherwise undefined
19956 This is just the body. It needs the initializations mentioned above and
19957 some address computing at the end. These things are done in i386.md. */
19960 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
19964 rtx align_2_label = NULL_RTX;
19965 rtx align_3_label = NULL_RTX;
19966 rtx align_4_label = gen_label_rtx ();
19967 rtx end_0_label = gen_label_rtx ();
19969 rtx tmpreg = gen_reg_rtx (SImode);
19970 rtx scratch = gen_reg_rtx (SImode);
19974 if (CONST_INT_P (align_rtx))
19975 align = INTVAL (align_rtx);
19977 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
19979 /* Is there a known alignment and is it less than 4? */
19982 rtx scratch1 = gen_reg_rtx (Pmode);
19983 emit_move_insn (scratch1, out);
19984 /* Is there a known alignment and is it not 2? */
19987 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
19988 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
19990 /* Leave just the 3 lower bits. */
19991 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
19992 NULL_RTX, 0, OPTAB_WIDEN);
19994 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19995 Pmode, 1, align_4_label);
19996 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
19997 Pmode, 1, align_2_label);
19998 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
19999 Pmode, 1, align_3_label);
20003 /* Since the alignment is 2, we have to check 2 or 0 bytes;
20004 check if is aligned to 4 - byte. */
20006 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
20007 NULL_RTX, 0, OPTAB_WIDEN);
20009 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
20010 Pmode, 1, align_4_label);
20013 mem = change_address (src, QImode, out);
20015 /* Now compare the bytes. */
20017 /* Compare the first n unaligned byte on a byte per byte basis. */
20018 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
20019 QImode, 1, end_0_label);
20021 /* Increment the address. */
20022 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20024 /* Not needed with an alignment of 2 */
20027 emit_label (align_2_label);
20029 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20032 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20034 emit_label (align_3_label);
20037 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
20040 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
20043 /* Generate loop to check 4 bytes at a time. It is not a good idea to
20044 align this loop. It gives only huge programs, but does not help to
20046 emit_label (align_4_label);
20048 mem = change_address (src, SImode, out);
20049 emit_move_insn (scratch, mem);
20050 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
20052 /* This formula yields a nonzero result iff one of the bytes is zero.
20053 This saves three branches inside loop and many cycles. */
20055 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
20056 emit_insn (gen_one_cmplsi2 (scratch, scratch));
20057 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
20058 emit_insn (gen_andsi3 (tmpreg, tmpreg,
20059 gen_int_mode (0x80808080, SImode)));
20060 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
20065 rtx reg = gen_reg_rtx (SImode);
20066 rtx reg2 = gen_reg_rtx (Pmode);
20067 emit_move_insn (reg, tmpreg);
20068 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
20070 /* If zero is not in the first two bytes, move two bytes forward. */
20071 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20072 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20073 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20074 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
20075 gen_rtx_IF_THEN_ELSE (SImode, tmp,
20078 /* Emit lea manually to avoid clobbering of flags. */
20079 emit_insn (gen_rtx_SET (SImode, reg2,
20080 gen_rtx_PLUS (Pmode, out, const2_rtx)));
20082 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20083 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
20084 emit_insn (gen_rtx_SET (VOIDmode, out,
20085 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
20091 rtx end_2_label = gen_label_rtx ();
20092 /* Is zero in the first two bytes? */
20094 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
20095 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20096 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
20097 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20098 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
20100 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20101 JUMP_LABEL (tmp) = end_2_label;
20103 /* Not in the first two. Move two bytes forward. */
20104 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
20105 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
20107 emit_label (end_2_label);
20111 /* Avoid branch in fixing the byte. */
20112 tmpreg = gen_lowpart (QImode, tmpreg);
20113 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
20114 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
20115 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
20116 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
20118 emit_label (end_0_label);
20121 /* Expand strlen. */
20124 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
20126 rtx addr, scratch1, scratch2, scratch3, scratch4;
20128 /* The generic case of strlen expander is long. Avoid it's
20129 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
20131 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20132 && !TARGET_INLINE_ALL_STRINGOPS
20133 && !optimize_insn_for_size_p ()
20134 && (!CONST_INT_P (align) || INTVAL (align) < 4))
20137 addr = force_reg (Pmode, XEXP (src, 0));
20138 scratch1 = gen_reg_rtx (Pmode);
20140 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
20141 && !optimize_insn_for_size_p ())
20143 /* Well it seems that some optimizer does not combine a call like
20144 foo(strlen(bar), strlen(bar));
20145 when the move and the subtraction is done here. It does calculate
20146 the length just once when these instructions are done inside of
20147 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
20148 often used and I use one fewer register for the lifetime of
20149 output_strlen_unroll() this is better. */
20151 emit_move_insn (out, addr);
20153 ix86_expand_strlensi_unroll_1 (out, src, align);
20155 /* strlensi_unroll_1 returns the address of the zero at the end of
20156 the string, like memchr(), so compute the length by subtracting
20157 the start address. */
20158 emit_insn (ix86_gen_sub3 (out, out, addr));
20164 /* Can't use this if the user has appropriated eax, ecx, or edi. */
20165 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
20168 scratch2 = gen_reg_rtx (Pmode);
20169 scratch3 = gen_reg_rtx (Pmode);
20170 scratch4 = force_reg (Pmode, constm1_rtx);
20172 emit_move_insn (scratch3, addr);
20173 eoschar = force_reg (QImode, eoschar);
20175 src = replace_equiv_address_nv (src, scratch3);
20177 /* If .md starts supporting :P, this can be done in .md. */
20178 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
20179 scratch4), UNSPEC_SCAS);
20180 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
20181 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
20182 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
20187 /* For given symbol (function) construct code to compute address of it's PLT
20188 entry in large x86-64 PIC model. */
20190 construct_plt_address (rtx symbol)
20192 rtx tmp = gen_reg_rtx (Pmode);
20193 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
20195 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
20196 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
20198 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
20199 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
20204 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
20206 rtx pop, int sibcall)
20208 rtx use = NULL, call;
20210 if (pop == const0_rtx)
20212 gcc_assert (!TARGET_64BIT || !pop);
20214 if (TARGET_MACHO && !TARGET_64BIT)
20217 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20218 fnaddr = machopic_indirect_call_target (fnaddr);
20223 /* Static functions and indirect calls don't need the pic register. */
20224 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20225 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20226 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20227 use_reg (&use, pic_offset_table_rtx);
20230 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20232 rtx al = gen_rtx_REG (QImode, AX_REG);
20233 emit_move_insn (al, callarg2);
20234 use_reg (&use, al);
20237 if (ix86_cmodel == CM_LARGE_PIC
20239 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20240 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20241 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20243 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20244 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20246 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20247 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20250 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20252 call = gen_rtx_SET (VOIDmode, retval, call);
20255 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20256 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20257 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20260 && ix86_cfun_abi () == MS_ABI
20261 && (!callarg2 || INTVAL (callarg2) != -2))
20263 /* We need to represent that SI and DI registers are clobbered
20265 static int clobbered_registers[] = {
20266 XMM6_REG, XMM7_REG, XMM8_REG,
20267 XMM9_REG, XMM10_REG, XMM11_REG,
20268 XMM12_REG, XMM13_REG, XMM14_REG,
20269 XMM15_REG, SI_REG, DI_REG
20272 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20273 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20274 UNSPEC_MS_TO_SYSV_CALL);
20278 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20279 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20282 (SSE_REGNO_P (clobbered_registers[i])
20284 clobbered_registers[i]));
20286 call = gen_rtx_PARALLEL (VOIDmode,
20287 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20291 call = emit_call_insn (call);
20293 CALL_INSN_FUNCTION_USAGE (call) = use;
20297 /* Clear stack slot assignments remembered from previous functions.
20298 This is called from INIT_EXPANDERS once before RTL is emitted for each
20301 static struct machine_function *
20302 ix86_init_machine_status (void)
20304 struct machine_function *f;
20306 f = ggc_alloc_cleared_machine_function ();
20307 f->use_fast_prologue_epilogue_nregs = -1;
20308 f->tls_descriptor_call_expanded_p = 0;
20309 f->call_abi = ix86_abi;
20314 /* Return a MEM corresponding to a stack slot with mode MODE.
20315 Allocate a new slot if necessary.
20317 The RTL for a function can have several slots available: N is
20318 which slot to use. */
20321 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20323 struct stack_local_entry *s;
20325 gcc_assert (n < MAX_386_STACK_LOCALS);
20327 /* Virtual slot is valid only before vregs are instantiated. */
20328 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20330 for (s = ix86_stack_locals; s; s = s->next)
20331 if (s->mode == mode && s->n == n)
20332 return copy_rtx (s->rtl);
20334 s = ggc_alloc_stack_local_entry ();
20337 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20339 s->next = ix86_stack_locals;
20340 ix86_stack_locals = s;
20344 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20346 static GTY(()) rtx ix86_tls_symbol;
20348 ix86_tls_get_addr (void)
20351 if (!ix86_tls_symbol)
20353 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20354 (TARGET_ANY_GNU_TLS
20356 ? "___tls_get_addr"
20357 : "__tls_get_addr");
20360 return ix86_tls_symbol;
20363 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20365 static GTY(()) rtx ix86_tls_module_base_symbol;
20367 ix86_tls_module_base (void)
20370 if (!ix86_tls_module_base_symbol)
20372 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20373 "_TLS_MODULE_BASE_");
20374 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20375 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20378 return ix86_tls_module_base_symbol;
20381 /* Calculate the length of the memory address in the instruction
20382 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20385 memory_address_length (rtx addr)
20387 struct ix86_address parts;
20388 rtx base, index, disp;
20392 if (GET_CODE (addr) == PRE_DEC
20393 || GET_CODE (addr) == POST_INC
20394 || GET_CODE (addr) == PRE_MODIFY
20395 || GET_CODE (addr) == POST_MODIFY)
20398 ok = ix86_decompose_address (addr, &parts);
20401 if (parts.base && GET_CODE (parts.base) == SUBREG)
20402 parts.base = SUBREG_REG (parts.base);
20403 if (parts.index && GET_CODE (parts.index) == SUBREG)
20404 parts.index = SUBREG_REG (parts.index);
20407 index = parts.index;
20412 - esp as the base always wants an index,
20413 - ebp as the base always wants a displacement,
20414 - r12 as the base always wants an index,
20415 - r13 as the base always wants a displacement. */
20417 /* Register Indirect. */
20418 if (base && !index && !disp)
20420 /* esp (for its index) and ebp (for its displacement) need
20421 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20424 && (addr == arg_pointer_rtx
20425 || addr == frame_pointer_rtx
20426 || REGNO (addr) == SP_REG
20427 || REGNO (addr) == BP_REG
20428 || REGNO (addr) == R12_REG
20429 || REGNO (addr) == R13_REG))
20433 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20434 is not disp32, but disp32(%rip), so for disp32
20435 SIB byte is needed, unless print_operand_address
20436 optimizes it into disp32(%rip) or (%rip) is implied
20438 else if (disp && !base && !index)
20445 if (GET_CODE (disp) == CONST)
20446 symbol = XEXP (disp, 0);
20447 if (GET_CODE (symbol) == PLUS
20448 && CONST_INT_P (XEXP (symbol, 1)))
20449 symbol = XEXP (symbol, 0);
20451 if (GET_CODE (symbol) != LABEL_REF
20452 && (GET_CODE (symbol) != SYMBOL_REF
20453 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20454 && (GET_CODE (symbol) != UNSPEC
20455 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20456 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20463 /* Find the length of the displacement constant. */
20466 if (base && satisfies_constraint_K (disp))
20471 /* ebp always wants a displacement. Similarly r13. */
20472 else if (base && REG_P (base)
20473 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20476 /* An index requires the two-byte modrm form.... */
20478 /* ...like esp (or r12), which always wants an index. */
20479 || base == arg_pointer_rtx
20480 || base == frame_pointer_rtx
20481 || (base && REG_P (base)
20482 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
20499 /* Compute default value for "length_immediate" attribute. When SHORTFORM
20500 is set, expect that insn have 8bit immediate alternative. */
20502 ix86_attr_length_immediate_default (rtx insn, int shortform)
20506 extract_insn_cached (insn);
20507 for (i = recog_data.n_operands - 1; i >= 0; --i)
20508 if (CONSTANT_P (recog_data.operand[i]))
20510 enum attr_mode mode = get_attr_mode (insn);
20513 if (shortform && CONST_INT_P (recog_data.operand[i]))
20515 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
20522 ival = trunc_int_for_mode (ival, HImode);
20525 ival = trunc_int_for_mode (ival, SImode);
20530 if (IN_RANGE (ival, -128, 127))
20547 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
20552 fatal_insn ("unknown insn mode", insn);
20557 /* Compute default value for "length_address" attribute. */
20559 ix86_attr_length_address_default (rtx insn)
20563 if (get_attr_type (insn) == TYPE_LEA)
20565 rtx set = PATTERN (insn), addr;
20567 if (GET_CODE (set) == PARALLEL)
20568 set = XVECEXP (set, 0, 0);
20570 gcc_assert (GET_CODE (set) == SET);
20572 addr = SET_SRC (set);
20573 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
20575 if (GET_CODE (addr) == ZERO_EXTEND)
20576 addr = XEXP (addr, 0);
20577 if (GET_CODE (addr) == SUBREG)
20578 addr = SUBREG_REG (addr);
20581 return memory_address_length (addr);
20584 extract_insn_cached (insn);
20585 for (i = recog_data.n_operands - 1; i >= 0; --i)
20586 if (MEM_P (recog_data.operand[i]))
20588 constrain_operands_cached (reload_completed);
20589 if (which_alternative != -1)
20591 const char *constraints = recog_data.constraints[i];
20592 int alt = which_alternative;
20594 while (*constraints == '=' || *constraints == '+')
20597 while (*constraints++ != ',')
20599 /* Skip ignored operands. */
20600 if (*constraints == 'X')
20603 return memory_address_length (XEXP (recog_data.operand[i], 0));
20608 /* Compute default value for "length_vex" attribute. It includes
20609 2 or 3 byte VEX prefix and 1 opcode byte. */
20612 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
20617 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
20618 byte VEX prefix. */
20619 if (!has_0f_opcode || has_vex_w)
20622 /* We can always use 2 byte VEX prefix in 32bit. */
20626 extract_insn_cached (insn);
20628 for (i = recog_data.n_operands - 1; i >= 0; --i)
20629 if (REG_P (recog_data.operand[i]))
20631 /* REX.W bit uses 3 byte VEX prefix. */
20632 if (GET_MODE (recog_data.operand[i]) == DImode
20633 && GENERAL_REG_P (recog_data.operand[i]))
20638 /* REX.X or REX.B bits use 3 byte VEX prefix. */
20639 if (MEM_P (recog_data.operand[i])
20640 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
20647 /* Return the maximum number of instructions a cpu can issue. */
20650 ix86_issue_rate (void)
20654 case PROCESSOR_PENTIUM:
20655 case PROCESSOR_ATOM:
20659 case PROCESSOR_PENTIUMPRO:
20660 case PROCESSOR_PENTIUM4:
20661 case PROCESSOR_ATHLON:
20663 case PROCESSOR_AMDFAM10:
20664 case PROCESSOR_NOCONA:
20665 case PROCESSOR_GENERIC32:
20666 case PROCESSOR_GENERIC64:
20667 case PROCESSOR_BDVER1:
20670 case PROCESSOR_CORE2:
20678 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
20679 by DEP_INSN and nothing set by DEP_INSN. */
20682 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
20686 /* Simplify the test for uninteresting insns. */
20687 if (insn_type != TYPE_SETCC
20688 && insn_type != TYPE_ICMOV
20689 && insn_type != TYPE_FCMOV
20690 && insn_type != TYPE_IBR)
20693 if ((set = single_set (dep_insn)) != 0)
20695 set = SET_DEST (set);
20698 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
20699 && XVECLEN (PATTERN (dep_insn), 0) == 2
20700 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
20701 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
20703 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20704 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20709 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
20712 /* This test is true if the dependent insn reads the flags but
20713 not any other potentially set register. */
20714 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
20717 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
20723 /* Return true iff USE_INSN has a memory address with operands set by
20727 ix86_agi_dependent (rtx set_insn, rtx use_insn)
20730 extract_insn_cached (use_insn);
20731 for (i = recog_data.n_operands - 1; i >= 0; --i)
20732 if (MEM_P (recog_data.operand[i]))
20734 rtx addr = XEXP (recog_data.operand[i], 0);
20735 return modified_in_p (addr, set_insn) != 0;
20741 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
20743 enum attr_type insn_type, dep_insn_type;
20744 enum attr_memory memory;
20746 int dep_insn_code_number;
20748 /* Anti and output dependencies have zero cost on all CPUs. */
20749 if (REG_NOTE_KIND (link) != 0)
20752 dep_insn_code_number = recog_memoized (dep_insn);
20754 /* If we can't recognize the insns, we can't really do anything. */
20755 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
20758 insn_type = get_attr_type (insn);
20759 dep_insn_type = get_attr_type (dep_insn);
20763 case PROCESSOR_PENTIUM:
20764 /* Address Generation Interlock adds a cycle of latency. */
20765 if (insn_type == TYPE_LEA)
20767 rtx addr = PATTERN (insn);
20769 if (GET_CODE (addr) == PARALLEL)
20770 addr = XVECEXP (addr, 0, 0);
20772 gcc_assert (GET_CODE (addr) == SET);
20774 addr = SET_SRC (addr);
20775 if (modified_in_p (addr, dep_insn))
20778 else if (ix86_agi_dependent (dep_insn, insn))
20781 /* ??? Compares pair with jump/setcc. */
20782 if (ix86_flags_dependent (insn, dep_insn, insn_type))
20785 /* Floating point stores require value to be ready one cycle earlier. */
20786 if (insn_type == TYPE_FMOV
20787 && get_attr_memory (insn) == MEMORY_STORE
20788 && !ix86_agi_dependent (dep_insn, insn))
20792 case PROCESSOR_PENTIUMPRO:
20793 memory = get_attr_memory (insn);
20795 /* INT->FP conversion is expensive. */
20796 if (get_attr_fp_int_src (dep_insn))
20799 /* There is one cycle extra latency between an FP op and a store. */
20800 if (insn_type == TYPE_FMOV
20801 && (set = single_set (dep_insn)) != NULL_RTX
20802 && (set2 = single_set (insn)) != NULL_RTX
20803 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
20804 && MEM_P (SET_DEST (set2)))
20807 /* Show ability of reorder buffer to hide latency of load by executing
20808 in parallel with previous instruction in case
20809 previous instruction is not needed to compute the address. */
20810 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20811 && !ix86_agi_dependent (dep_insn, insn))
20813 /* Claim moves to take one cycle, as core can issue one load
20814 at time and the next load can start cycle later. */
20815 if (dep_insn_type == TYPE_IMOV
20816 || dep_insn_type == TYPE_FMOV)
20824 memory = get_attr_memory (insn);
20826 /* The esp dependency is resolved before the instruction is really
20828 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
20829 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
20832 /* INT->FP conversion is expensive. */
20833 if (get_attr_fp_int_src (dep_insn))
20836 /* Show ability of reorder buffer to hide latency of load by executing
20837 in parallel with previous instruction in case
20838 previous instruction is not needed to compute the address. */
20839 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20840 && !ix86_agi_dependent (dep_insn, insn))
20842 /* Claim moves to take one cycle, as core can issue one load
20843 at time and the next load can start cycle later. */
20844 if (dep_insn_type == TYPE_IMOV
20845 || dep_insn_type == TYPE_FMOV)
20854 case PROCESSOR_ATHLON:
20856 case PROCESSOR_AMDFAM10:
20857 case PROCESSOR_BDVER1:
20858 case PROCESSOR_ATOM:
20859 case PROCESSOR_GENERIC32:
20860 case PROCESSOR_GENERIC64:
20861 memory = get_attr_memory (insn);
20863 /* Show ability of reorder buffer to hide latency of load by executing
20864 in parallel with previous instruction in case
20865 previous instruction is not needed to compute the address. */
20866 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20867 && !ix86_agi_dependent (dep_insn, insn))
20869 enum attr_unit unit = get_attr_unit (insn);
20872 /* Because of the difference between the length of integer and
20873 floating unit pipeline preparation stages, the memory operands
20874 for floating point are cheaper.
20876 ??? For Athlon it the difference is most probably 2. */
20877 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
20880 loadcost = TARGET_ATHLON ? 2 : 0;
20882 if (cost >= loadcost)
20895 /* How many alternative schedules to try. This should be as wide as the
20896 scheduling freedom in the DFA, but no wider. Making this value too
20897 large results extra work for the scheduler. */
20900 ia32_multipass_dfa_lookahead (void)
20904 case PROCESSOR_PENTIUM:
20907 case PROCESSOR_PENTIUMPRO:
20917 /* Compute the alignment given to a constant that is being placed in memory.
20918 EXP is the constant and ALIGN is the alignment that the object would
20920 The value of this function is used instead of that alignment to align
20924 ix86_constant_alignment (tree exp, int align)
20926 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
20927 || TREE_CODE (exp) == INTEGER_CST)
20929 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
20931 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
20934 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
20935 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
20936 return BITS_PER_WORD;
20941 /* Compute the alignment for a static variable.
20942 TYPE is the data type, and ALIGN is the alignment that
20943 the object would ordinarily have. The value of this function is used
20944 instead of that alignment to align the object. */
20947 ix86_data_alignment (tree type, int align)
20949 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
20951 if (AGGREGATE_TYPE_P (type)
20952 && TYPE_SIZE (type)
20953 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20954 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
20955 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
20956 && align < max_align)
20959 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20960 to 16byte boundary. */
20963 if (AGGREGATE_TYPE_P (type)
20964 && TYPE_SIZE (type)
20965 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20966 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
20967 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20971 if (TREE_CODE (type) == ARRAY_TYPE)
20973 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20975 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20978 else if (TREE_CODE (type) == COMPLEX_TYPE)
20981 if (TYPE_MODE (type) == DCmode && align < 64)
20983 if ((TYPE_MODE (type) == XCmode
20984 || TYPE_MODE (type) == TCmode) && align < 128)
20987 else if ((TREE_CODE (type) == RECORD_TYPE
20988 || TREE_CODE (type) == UNION_TYPE
20989 || TREE_CODE (type) == QUAL_UNION_TYPE)
20990 && TYPE_FIELDS (type))
20992 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20994 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20997 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20998 || TREE_CODE (type) == INTEGER_TYPE)
21000 if (TYPE_MODE (type) == DFmode && align < 64)
21002 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21009 /* Compute the alignment for a local variable or a stack slot. EXP is
21010 the data type or decl itself, MODE is the widest mode available and
21011 ALIGN is the alignment that the object would ordinarily have. The
21012 value of this macro is used instead of that alignment to align the
21016 ix86_local_alignment (tree exp, enum machine_mode mode,
21017 unsigned int align)
21021 if (exp && DECL_P (exp))
21023 type = TREE_TYPE (exp);
21032 /* Don't do dynamic stack realignment for long long objects with
21033 -mpreferred-stack-boundary=2. */
21036 && ix86_preferred_stack_boundary < 64
21037 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
21038 && (!type || !TYPE_USER_ALIGN (type))
21039 && (!decl || !DECL_USER_ALIGN (decl)))
21042 /* If TYPE is NULL, we are allocating a stack slot for caller-save
21043 register in MODE. We will return the largest alignment of XF
21047 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
21048 align = GET_MODE_ALIGNMENT (DFmode);
21052 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
21053 to 16byte boundary. Exact wording is:
21055 An array uses the same alignment as its elements, except that a local or
21056 global array variable of length at least 16 bytes or
21057 a C99 variable-length array variable always has alignment of at least 16 bytes.
21059 This was added to allow use of aligned SSE instructions at arrays. This
21060 rule is meant for static storage (where compiler can not do the analysis
21061 by itself). We follow it for automatic variables only when convenient.
21062 We fully control everything in the function compiled and functions from
21063 other unit can not rely on the alignment.
21065 Exclude va_list type. It is the common case of local array where
21066 we can not benefit from the alignment. */
21067 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
21070 if (AGGREGATE_TYPE_P (type)
21071 && (TYPE_MAIN_VARIANT (type)
21072 != TYPE_MAIN_VARIANT (va_list_type_node))
21073 && TYPE_SIZE (type)
21074 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
21075 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
21076 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
21079 if (TREE_CODE (type) == ARRAY_TYPE)
21081 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
21083 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
21086 else if (TREE_CODE (type) == COMPLEX_TYPE)
21088 if (TYPE_MODE (type) == DCmode && align < 64)
21090 if ((TYPE_MODE (type) == XCmode
21091 || TYPE_MODE (type) == TCmode) && align < 128)
21094 else if ((TREE_CODE (type) == RECORD_TYPE
21095 || TREE_CODE (type) == UNION_TYPE
21096 || TREE_CODE (type) == QUAL_UNION_TYPE)
21097 && TYPE_FIELDS (type))
21099 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
21101 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
21104 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
21105 || TREE_CODE (type) == INTEGER_TYPE)
21108 if (TYPE_MODE (type) == DFmode && align < 64)
21110 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
21116 /* Compute the minimum required alignment for dynamic stack realignment
21117 purposes for a local variable, parameter or a stack slot. EXP is
21118 the data type or decl itself, MODE is its mode and ALIGN is the
21119 alignment that the object would ordinarily have. */
21122 ix86_minimum_alignment (tree exp, enum machine_mode mode,
21123 unsigned int align)
21127 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
21130 if (exp && DECL_P (exp))
21132 type = TREE_TYPE (exp);
21141 /* Don't do dynamic stack realignment for long long objects with
21142 -mpreferred-stack-boundary=2. */
21143 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
21144 && (!type || !TYPE_USER_ALIGN (type))
21145 && (!decl || !DECL_USER_ALIGN (decl)))
21151 /* Find a location for the static chain incoming to a nested function.
21152 This is a register, unless all free registers are used by arguments. */
21155 ix86_static_chain (const_tree fndecl, bool incoming_p)
21159 if (!DECL_STATIC_CHAIN (fndecl))
21164 /* We always use R10 in 64-bit mode. */
21170 /* By default in 32-bit mode we use ECX to pass the static chain. */
21173 fntype = TREE_TYPE (fndecl);
21174 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
21176 /* Fastcall functions use ecx/edx for arguments, which leaves
21177 us with EAX for the static chain. */
21180 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
21182 /* Thiscall functions use ecx for arguments, which leaves
21183 us with EAX for the static chain. */
21186 else if (ix86_function_regparm (fntype, fndecl) == 3)
21188 /* For regparm 3, we have no free call-clobbered registers in
21189 which to store the static chain. In order to implement this,
21190 we have the trampoline push the static chain to the stack.
21191 However, we can't push a value below the return address when
21192 we call the nested function directly, so we have to use an
21193 alternate entry point. For this we use ESI, and have the
21194 alternate entry point push ESI, so that things appear the
21195 same once we're executing the nested function. */
21198 if (fndecl == current_function_decl)
21199 ix86_static_chain_on_stack = true;
21200 return gen_frame_mem (SImode,
21201 plus_constant (arg_pointer_rtx, -8));
21207 return gen_rtx_REG (Pmode, regno);
21210 /* Emit RTL insns to initialize the variable parts of a trampoline.
21211 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21212 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21213 to be passed to the target function. */
21216 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21220 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21227 /* Depending on the static chain location, either load a register
21228 with a constant, or push the constant to the stack. All of the
21229 instructions are the same size. */
21230 chain = ix86_static_chain (fndecl, true);
21233 if (REGNO (chain) == CX_REG)
21235 else if (REGNO (chain) == AX_REG)
21238 gcc_unreachable ();
21243 mem = adjust_address (m_tramp, QImode, 0);
21244 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21246 mem = adjust_address (m_tramp, SImode, 1);
21247 emit_move_insn (mem, chain_value);
21249 /* Compute offset from the end of the jmp to the target function.
21250 In the case in which the trampoline stores the static chain on
21251 the stack, we need to skip the first insn which pushes the
21252 (call-saved) register static chain; this push is 1 byte. */
21253 disp = expand_binop (SImode, sub_optab, fnaddr,
21254 plus_constant (XEXP (m_tramp, 0),
21255 MEM_P (chain) ? 9 : 10),
21256 NULL_RTX, 1, OPTAB_DIRECT);
21258 mem = adjust_address (m_tramp, QImode, 5);
21259 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21261 mem = adjust_address (m_tramp, SImode, 6);
21262 emit_move_insn (mem, disp);
21268 /* Load the function address to r11. Try to load address using
21269 the shorter movl instead of movabs. We may want to support
21270 movq for kernel mode, but kernel does not use trampolines at
21272 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21274 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21276 mem = adjust_address (m_tramp, HImode, offset);
21277 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21279 mem = adjust_address (m_tramp, SImode, offset + 2);
21280 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21285 mem = adjust_address (m_tramp, HImode, offset);
21286 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21288 mem = adjust_address (m_tramp, DImode, offset + 2);
21289 emit_move_insn (mem, fnaddr);
21293 /* Load static chain using movabs to r10. */
21294 mem = adjust_address (m_tramp, HImode, offset);
21295 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21297 mem = adjust_address (m_tramp, DImode, offset + 2);
21298 emit_move_insn (mem, chain_value);
21301 /* Jump to r11; the last (unused) byte is a nop, only there to
21302 pad the write out to a single 32-bit store. */
21303 mem = adjust_address (m_tramp, SImode, offset);
21304 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21307 gcc_assert (offset <= TRAMPOLINE_SIZE);
21310 #ifdef ENABLE_EXECUTE_STACK
21311 #ifdef CHECK_EXECUTE_STACK_ENABLED
21312 if (CHECK_EXECUTE_STACK_ENABLED)
21314 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21315 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21319 /* The following file contains several enumerations and data structures
21320 built from the definitions in i386-builtin-types.def. */
21322 #include "i386-builtin-types.inc"
21324 /* Table for the ix86 builtin non-function types. */
21325 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21327 /* Retrieve an element from the above table, building some of
21328 the types lazily. */
21331 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21333 unsigned int index;
21336 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21338 type = ix86_builtin_type_tab[(int) tcode];
21342 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21343 if (tcode <= IX86_BT_LAST_VECT)
21345 enum machine_mode mode;
21347 index = tcode - IX86_BT_LAST_PRIM - 1;
21348 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21349 mode = ix86_builtin_type_vect_mode[index];
21351 type = build_vector_type_for_mode (itype, mode);
21357 index = tcode - IX86_BT_LAST_VECT - 1;
21358 if (tcode <= IX86_BT_LAST_PTR)
21359 quals = TYPE_UNQUALIFIED;
21361 quals = TYPE_QUAL_CONST;
21363 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21364 if (quals != TYPE_UNQUALIFIED)
21365 itype = build_qualified_type (itype, quals);
21367 type = build_pointer_type (itype);
21370 ix86_builtin_type_tab[(int) tcode] = type;
21374 /* Table for the ix86 builtin function types. */
21375 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21377 /* Retrieve an element from the above table, building some of
21378 the types lazily. */
21381 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21385 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21387 type = ix86_builtin_func_type_tab[(int) tcode];
21391 if (tcode <= IX86_BT_LAST_FUNC)
21393 unsigned start = ix86_builtin_func_start[(int) tcode];
21394 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21395 tree rtype, atype, args = void_list_node;
21398 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21399 for (i = after - 1; i > start; --i)
21401 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21402 args = tree_cons (NULL, atype, args);
21405 type = build_function_type (rtype, args);
21409 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21410 enum ix86_builtin_func_type icode;
21412 icode = ix86_builtin_func_alias_base[index];
21413 type = ix86_get_builtin_func_type (icode);
21416 ix86_builtin_func_type_tab[(int) tcode] = type;
21421 /* Codes for all the SSE/MMX builtins. */
21424 IX86_BUILTIN_ADDPS,
21425 IX86_BUILTIN_ADDSS,
21426 IX86_BUILTIN_DIVPS,
21427 IX86_BUILTIN_DIVSS,
21428 IX86_BUILTIN_MULPS,
21429 IX86_BUILTIN_MULSS,
21430 IX86_BUILTIN_SUBPS,
21431 IX86_BUILTIN_SUBSS,
21433 IX86_BUILTIN_CMPEQPS,
21434 IX86_BUILTIN_CMPLTPS,
21435 IX86_BUILTIN_CMPLEPS,
21436 IX86_BUILTIN_CMPGTPS,
21437 IX86_BUILTIN_CMPGEPS,
21438 IX86_BUILTIN_CMPNEQPS,
21439 IX86_BUILTIN_CMPNLTPS,
21440 IX86_BUILTIN_CMPNLEPS,
21441 IX86_BUILTIN_CMPNGTPS,
21442 IX86_BUILTIN_CMPNGEPS,
21443 IX86_BUILTIN_CMPORDPS,
21444 IX86_BUILTIN_CMPUNORDPS,
21445 IX86_BUILTIN_CMPEQSS,
21446 IX86_BUILTIN_CMPLTSS,
21447 IX86_BUILTIN_CMPLESS,
21448 IX86_BUILTIN_CMPNEQSS,
21449 IX86_BUILTIN_CMPNLTSS,
21450 IX86_BUILTIN_CMPNLESS,
21451 IX86_BUILTIN_CMPNGTSS,
21452 IX86_BUILTIN_CMPNGESS,
21453 IX86_BUILTIN_CMPORDSS,
21454 IX86_BUILTIN_CMPUNORDSS,
21456 IX86_BUILTIN_COMIEQSS,
21457 IX86_BUILTIN_COMILTSS,
21458 IX86_BUILTIN_COMILESS,
21459 IX86_BUILTIN_COMIGTSS,
21460 IX86_BUILTIN_COMIGESS,
21461 IX86_BUILTIN_COMINEQSS,
21462 IX86_BUILTIN_UCOMIEQSS,
21463 IX86_BUILTIN_UCOMILTSS,
21464 IX86_BUILTIN_UCOMILESS,
21465 IX86_BUILTIN_UCOMIGTSS,
21466 IX86_BUILTIN_UCOMIGESS,
21467 IX86_BUILTIN_UCOMINEQSS,
21469 IX86_BUILTIN_CVTPI2PS,
21470 IX86_BUILTIN_CVTPS2PI,
21471 IX86_BUILTIN_CVTSI2SS,
21472 IX86_BUILTIN_CVTSI642SS,
21473 IX86_BUILTIN_CVTSS2SI,
21474 IX86_BUILTIN_CVTSS2SI64,
21475 IX86_BUILTIN_CVTTPS2PI,
21476 IX86_BUILTIN_CVTTSS2SI,
21477 IX86_BUILTIN_CVTTSS2SI64,
21479 IX86_BUILTIN_MAXPS,
21480 IX86_BUILTIN_MAXSS,
21481 IX86_BUILTIN_MINPS,
21482 IX86_BUILTIN_MINSS,
21484 IX86_BUILTIN_LOADUPS,
21485 IX86_BUILTIN_STOREUPS,
21486 IX86_BUILTIN_MOVSS,
21488 IX86_BUILTIN_MOVHLPS,
21489 IX86_BUILTIN_MOVLHPS,
21490 IX86_BUILTIN_LOADHPS,
21491 IX86_BUILTIN_LOADLPS,
21492 IX86_BUILTIN_STOREHPS,
21493 IX86_BUILTIN_STORELPS,
21495 IX86_BUILTIN_MASKMOVQ,
21496 IX86_BUILTIN_MOVMSKPS,
21497 IX86_BUILTIN_PMOVMSKB,
21499 IX86_BUILTIN_MOVNTPS,
21500 IX86_BUILTIN_MOVNTQ,
21502 IX86_BUILTIN_LOADDQU,
21503 IX86_BUILTIN_STOREDQU,
21505 IX86_BUILTIN_PACKSSWB,
21506 IX86_BUILTIN_PACKSSDW,
21507 IX86_BUILTIN_PACKUSWB,
21509 IX86_BUILTIN_PADDB,
21510 IX86_BUILTIN_PADDW,
21511 IX86_BUILTIN_PADDD,
21512 IX86_BUILTIN_PADDQ,
21513 IX86_BUILTIN_PADDSB,
21514 IX86_BUILTIN_PADDSW,
21515 IX86_BUILTIN_PADDUSB,
21516 IX86_BUILTIN_PADDUSW,
21517 IX86_BUILTIN_PSUBB,
21518 IX86_BUILTIN_PSUBW,
21519 IX86_BUILTIN_PSUBD,
21520 IX86_BUILTIN_PSUBQ,
21521 IX86_BUILTIN_PSUBSB,
21522 IX86_BUILTIN_PSUBSW,
21523 IX86_BUILTIN_PSUBUSB,
21524 IX86_BUILTIN_PSUBUSW,
21527 IX86_BUILTIN_PANDN,
21531 IX86_BUILTIN_PAVGB,
21532 IX86_BUILTIN_PAVGW,
21534 IX86_BUILTIN_PCMPEQB,
21535 IX86_BUILTIN_PCMPEQW,
21536 IX86_BUILTIN_PCMPEQD,
21537 IX86_BUILTIN_PCMPGTB,
21538 IX86_BUILTIN_PCMPGTW,
21539 IX86_BUILTIN_PCMPGTD,
21541 IX86_BUILTIN_PMADDWD,
21543 IX86_BUILTIN_PMAXSW,
21544 IX86_BUILTIN_PMAXUB,
21545 IX86_BUILTIN_PMINSW,
21546 IX86_BUILTIN_PMINUB,
21548 IX86_BUILTIN_PMULHUW,
21549 IX86_BUILTIN_PMULHW,
21550 IX86_BUILTIN_PMULLW,
21552 IX86_BUILTIN_PSADBW,
21553 IX86_BUILTIN_PSHUFW,
21555 IX86_BUILTIN_PSLLW,
21556 IX86_BUILTIN_PSLLD,
21557 IX86_BUILTIN_PSLLQ,
21558 IX86_BUILTIN_PSRAW,
21559 IX86_BUILTIN_PSRAD,
21560 IX86_BUILTIN_PSRLW,
21561 IX86_BUILTIN_PSRLD,
21562 IX86_BUILTIN_PSRLQ,
21563 IX86_BUILTIN_PSLLWI,
21564 IX86_BUILTIN_PSLLDI,
21565 IX86_BUILTIN_PSLLQI,
21566 IX86_BUILTIN_PSRAWI,
21567 IX86_BUILTIN_PSRADI,
21568 IX86_BUILTIN_PSRLWI,
21569 IX86_BUILTIN_PSRLDI,
21570 IX86_BUILTIN_PSRLQI,
21572 IX86_BUILTIN_PUNPCKHBW,
21573 IX86_BUILTIN_PUNPCKHWD,
21574 IX86_BUILTIN_PUNPCKHDQ,
21575 IX86_BUILTIN_PUNPCKLBW,
21576 IX86_BUILTIN_PUNPCKLWD,
21577 IX86_BUILTIN_PUNPCKLDQ,
21579 IX86_BUILTIN_SHUFPS,
21581 IX86_BUILTIN_RCPPS,
21582 IX86_BUILTIN_RCPSS,
21583 IX86_BUILTIN_RSQRTPS,
21584 IX86_BUILTIN_RSQRTPS_NR,
21585 IX86_BUILTIN_RSQRTSS,
21586 IX86_BUILTIN_RSQRTF,
21587 IX86_BUILTIN_SQRTPS,
21588 IX86_BUILTIN_SQRTPS_NR,
21589 IX86_BUILTIN_SQRTSS,
21591 IX86_BUILTIN_UNPCKHPS,
21592 IX86_BUILTIN_UNPCKLPS,
21594 IX86_BUILTIN_ANDPS,
21595 IX86_BUILTIN_ANDNPS,
21597 IX86_BUILTIN_XORPS,
21600 IX86_BUILTIN_LDMXCSR,
21601 IX86_BUILTIN_STMXCSR,
21602 IX86_BUILTIN_SFENCE,
21604 /* 3DNow! Original */
21605 IX86_BUILTIN_FEMMS,
21606 IX86_BUILTIN_PAVGUSB,
21607 IX86_BUILTIN_PF2ID,
21608 IX86_BUILTIN_PFACC,
21609 IX86_BUILTIN_PFADD,
21610 IX86_BUILTIN_PFCMPEQ,
21611 IX86_BUILTIN_PFCMPGE,
21612 IX86_BUILTIN_PFCMPGT,
21613 IX86_BUILTIN_PFMAX,
21614 IX86_BUILTIN_PFMIN,
21615 IX86_BUILTIN_PFMUL,
21616 IX86_BUILTIN_PFRCP,
21617 IX86_BUILTIN_PFRCPIT1,
21618 IX86_BUILTIN_PFRCPIT2,
21619 IX86_BUILTIN_PFRSQIT1,
21620 IX86_BUILTIN_PFRSQRT,
21621 IX86_BUILTIN_PFSUB,
21622 IX86_BUILTIN_PFSUBR,
21623 IX86_BUILTIN_PI2FD,
21624 IX86_BUILTIN_PMULHRW,
21626 /* 3DNow! Athlon Extensions */
21627 IX86_BUILTIN_PF2IW,
21628 IX86_BUILTIN_PFNACC,
21629 IX86_BUILTIN_PFPNACC,
21630 IX86_BUILTIN_PI2FW,
21631 IX86_BUILTIN_PSWAPDSI,
21632 IX86_BUILTIN_PSWAPDSF,
21635 IX86_BUILTIN_ADDPD,
21636 IX86_BUILTIN_ADDSD,
21637 IX86_BUILTIN_DIVPD,
21638 IX86_BUILTIN_DIVSD,
21639 IX86_BUILTIN_MULPD,
21640 IX86_BUILTIN_MULSD,
21641 IX86_BUILTIN_SUBPD,
21642 IX86_BUILTIN_SUBSD,
21644 IX86_BUILTIN_CMPEQPD,
21645 IX86_BUILTIN_CMPLTPD,
21646 IX86_BUILTIN_CMPLEPD,
21647 IX86_BUILTIN_CMPGTPD,
21648 IX86_BUILTIN_CMPGEPD,
21649 IX86_BUILTIN_CMPNEQPD,
21650 IX86_BUILTIN_CMPNLTPD,
21651 IX86_BUILTIN_CMPNLEPD,
21652 IX86_BUILTIN_CMPNGTPD,
21653 IX86_BUILTIN_CMPNGEPD,
21654 IX86_BUILTIN_CMPORDPD,
21655 IX86_BUILTIN_CMPUNORDPD,
21656 IX86_BUILTIN_CMPEQSD,
21657 IX86_BUILTIN_CMPLTSD,
21658 IX86_BUILTIN_CMPLESD,
21659 IX86_BUILTIN_CMPNEQSD,
21660 IX86_BUILTIN_CMPNLTSD,
21661 IX86_BUILTIN_CMPNLESD,
21662 IX86_BUILTIN_CMPORDSD,
21663 IX86_BUILTIN_CMPUNORDSD,
21665 IX86_BUILTIN_COMIEQSD,
21666 IX86_BUILTIN_COMILTSD,
21667 IX86_BUILTIN_COMILESD,
21668 IX86_BUILTIN_COMIGTSD,
21669 IX86_BUILTIN_COMIGESD,
21670 IX86_BUILTIN_COMINEQSD,
21671 IX86_BUILTIN_UCOMIEQSD,
21672 IX86_BUILTIN_UCOMILTSD,
21673 IX86_BUILTIN_UCOMILESD,
21674 IX86_BUILTIN_UCOMIGTSD,
21675 IX86_BUILTIN_UCOMIGESD,
21676 IX86_BUILTIN_UCOMINEQSD,
21678 IX86_BUILTIN_MAXPD,
21679 IX86_BUILTIN_MAXSD,
21680 IX86_BUILTIN_MINPD,
21681 IX86_BUILTIN_MINSD,
21683 IX86_BUILTIN_ANDPD,
21684 IX86_BUILTIN_ANDNPD,
21686 IX86_BUILTIN_XORPD,
21688 IX86_BUILTIN_SQRTPD,
21689 IX86_BUILTIN_SQRTSD,
21691 IX86_BUILTIN_UNPCKHPD,
21692 IX86_BUILTIN_UNPCKLPD,
21694 IX86_BUILTIN_SHUFPD,
21696 IX86_BUILTIN_LOADUPD,
21697 IX86_BUILTIN_STOREUPD,
21698 IX86_BUILTIN_MOVSD,
21700 IX86_BUILTIN_LOADHPD,
21701 IX86_BUILTIN_LOADLPD,
21703 IX86_BUILTIN_CVTDQ2PD,
21704 IX86_BUILTIN_CVTDQ2PS,
21706 IX86_BUILTIN_CVTPD2DQ,
21707 IX86_BUILTIN_CVTPD2PI,
21708 IX86_BUILTIN_CVTPD2PS,
21709 IX86_BUILTIN_CVTTPD2DQ,
21710 IX86_BUILTIN_CVTTPD2PI,
21712 IX86_BUILTIN_CVTPI2PD,
21713 IX86_BUILTIN_CVTSI2SD,
21714 IX86_BUILTIN_CVTSI642SD,
21716 IX86_BUILTIN_CVTSD2SI,
21717 IX86_BUILTIN_CVTSD2SI64,
21718 IX86_BUILTIN_CVTSD2SS,
21719 IX86_BUILTIN_CVTSS2SD,
21720 IX86_BUILTIN_CVTTSD2SI,
21721 IX86_BUILTIN_CVTTSD2SI64,
21723 IX86_BUILTIN_CVTPS2DQ,
21724 IX86_BUILTIN_CVTPS2PD,
21725 IX86_BUILTIN_CVTTPS2DQ,
21727 IX86_BUILTIN_MOVNTI,
21728 IX86_BUILTIN_MOVNTPD,
21729 IX86_BUILTIN_MOVNTDQ,
21731 IX86_BUILTIN_MOVQ128,
21734 IX86_BUILTIN_MASKMOVDQU,
21735 IX86_BUILTIN_MOVMSKPD,
21736 IX86_BUILTIN_PMOVMSKB128,
21738 IX86_BUILTIN_PACKSSWB128,
21739 IX86_BUILTIN_PACKSSDW128,
21740 IX86_BUILTIN_PACKUSWB128,
21742 IX86_BUILTIN_PADDB128,
21743 IX86_BUILTIN_PADDW128,
21744 IX86_BUILTIN_PADDD128,
21745 IX86_BUILTIN_PADDQ128,
21746 IX86_BUILTIN_PADDSB128,
21747 IX86_BUILTIN_PADDSW128,
21748 IX86_BUILTIN_PADDUSB128,
21749 IX86_BUILTIN_PADDUSW128,
21750 IX86_BUILTIN_PSUBB128,
21751 IX86_BUILTIN_PSUBW128,
21752 IX86_BUILTIN_PSUBD128,
21753 IX86_BUILTIN_PSUBQ128,
21754 IX86_BUILTIN_PSUBSB128,
21755 IX86_BUILTIN_PSUBSW128,
21756 IX86_BUILTIN_PSUBUSB128,
21757 IX86_BUILTIN_PSUBUSW128,
21759 IX86_BUILTIN_PAND128,
21760 IX86_BUILTIN_PANDN128,
21761 IX86_BUILTIN_POR128,
21762 IX86_BUILTIN_PXOR128,
21764 IX86_BUILTIN_PAVGB128,
21765 IX86_BUILTIN_PAVGW128,
21767 IX86_BUILTIN_PCMPEQB128,
21768 IX86_BUILTIN_PCMPEQW128,
21769 IX86_BUILTIN_PCMPEQD128,
21770 IX86_BUILTIN_PCMPGTB128,
21771 IX86_BUILTIN_PCMPGTW128,
21772 IX86_BUILTIN_PCMPGTD128,
21774 IX86_BUILTIN_PMADDWD128,
21776 IX86_BUILTIN_PMAXSW128,
21777 IX86_BUILTIN_PMAXUB128,
21778 IX86_BUILTIN_PMINSW128,
21779 IX86_BUILTIN_PMINUB128,
21781 IX86_BUILTIN_PMULUDQ,
21782 IX86_BUILTIN_PMULUDQ128,
21783 IX86_BUILTIN_PMULHUW128,
21784 IX86_BUILTIN_PMULHW128,
21785 IX86_BUILTIN_PMULLW128,
21787 IX86_BUILTIN_PSADBW128,
21788 IX86_BUILTIN_PSHUFHW,
21789 IX86_BUILTIN_PSHUFLW,
21790 IX86_BUILTIN_PSHUFD,
21792 IX86_BUILTIN_PSLLDQI128,
21793 IX86_BUILTIN_PSLLWI128,
21794 IX86_BUILTIN_PSLLDI128,
21795 IX86_BUILTIN_PSLLQI128,
21796 IX86_BUILTIN_PSRAWI128,
21797 IX86_BUILTIN_PSRADI128,
21798 IX86_BUILTIN_PSRLDQI128,
21799 IX86_BUILTIN_PSRLWI128,
21800 IX86_BUILTIN_PSRLDI128,
21801 IX86_BUILTIN_PSRLQI128,
21803 IX86_BUILTIN_PSLLDQ128,
21804 IX86_BUILTIN_PSLLW128,
21805 IX86_BUILTIN_PSLLD128,
21806 IX86_BUILTIN_PSLLQ128,
21807 IX86_BUILTIN_PSRAW128,
21808 IX86_BUILTIN_PSRAD128,
21809 IX86_BUILTIN_PSRLW128,
21810 IX86_BUILTIN_PSRLD128,
21811 IX86_BUILTIN_PSRLQ128,
21813 IX86_BUILTIN_PUNPCKHBW128,
21814 IX86_BUILTIN_PUNPCKHWD128,
21815 IX86_BUILTIN_PUNPCKHDQ128,
21816 IX86_BUILTIN_PUNPCKHQDQ128,
21817 IX86_BUILTIN_PUNPCKLBW128,
21818 IX86_BUILTIN_PUNPCKLWD128,
21819 IX86_BUILTIN_PUNPCKLDQ128,
21820 IX86_BUILTIN_PUNPCKLQDQ128,
21822 IX86_BUILTIN_CLFLUSH,
21823 IX86_BUILTIN_MFENCE,
21824 IX86_BUILTIN_LFENCE,
21826 IX86_BUILTIN_BSRSI,
21827 IX86_BUILTIN_BSRDI,
21828 IX86_BUILTIN_RDPMC,
21829 IX86_BUILTIN_RDTSC,
21830 IX86_BUILTIN_RDTSCP,
21831 IX86_BUILTIN_ROLQI,
21832 IX86_BUILTIN_ROLHI,
21833 IX86_BUILTIN_RORQI,
21834 IX86_BUILTIN_RORHI,
21837 IX86_BUILTIN_ADDSUBPS,
21838 IX86_BUILTIN_HADDPS,
21839 IX86_BUILTIN_HSUBPS,
21840 IX86_BUILTIN_MOVSHDUP,
21841 IX86_BUILTIN_MOVSLDUP,
21842 IX86_BUILTIN_ADDSUBPD,
21843 IX86_BUILTIN_HADDPD,
21844 IX86_BUILTIN_HSUBPD,
21845 IX86_BUILTIN_LDDQU,
21847 IX86_BUILTIN_MONITOR,
21848 IX86_BUILTIN_MWAIT,
21851 IX86_BUILTIN_PHADDW,
21852 IX86_BUILTIN_PHADDD,
21853 IX86_BUILTIN_PHADDSW,
21854 IX86_BUILTIN_PHSUBW,
21855 IX86_BUILTIN_PHSUBD,
21856 IX86_BUILTIN_PHSUBSW,
21857 IX86_BUILTIN_PMADDUBSW,
21858 IX86_BUILTIN_PMULHRSW,
21859 IX86_BUILTIN_PSHUFB,
21860 IX86_BUILTIN_PSIGNB,
21861 IX86_BUILTIN_PSIGNW,
21862 IX86_BUILTIN_PSIGND,
21863 IX86_BUILTIN_PALIGNR,
21864 IX86_BUILTIN_PABSB,
21865 IX86_BUILTIN_PABSW,
21866 IX86_BUILTIN_PABSD,
21868 IX86_BUILTIN_PHADDW128,
21869 IX86_BUILTIN_PHADDD128,
21870 IX86_BUILTIN_PHADDSW128,
21871 IX86_BUILTIN_PHSUBW128,
21872 IX86_BUILTIN_PHSUBD128,
21873 IX86_BUILTIN_PHSUBSW128,
21874 IX86_BUILTIN_PMADDUBSW128,
21875 IX86_BUILTIN_PMULHRSW128,
21876 IX86_BUILTIN_PSHUFB128,
21877 IX86_BUILTIN_PSIGNB128,
21878 IX86_BUILTIN_PSIGNW128,
21879 IX86_BUILTIN_PSIGND128,
21880 IX86_BUILTIN_PALIGNR128,
21881 IX86_BUILTIN_PABSB128,
21882 IX86_BUILTIN_PABSW128,
21883 IX86_BUILTIN_PABSD128,
21885 /* AMDFAM10 - SSE4A New Instructions. */
21886 IX86_BUILTIN_MOVNTSD,
21887 IX86_BUILTIN_MOVNTSS,
21888 IX86_BUILTIN_EXTRQI,
21889 IX86_BUILTIN_EXTRQ,
21890 IX86_BUILTIN_INSERTQI,
21891 IX86_BUILTIN_INSERTQ,
21894 IX86_BUILTIN_BLENDPD,
21895 IX86_BUILTIN_BLENDPS,
21896 IX86_BUILTIN_BLENDVPD,
21897 IX86_BUILTIN_BLENDVPS,
21898 IX86_BUILTIN_PBLENDVB128,
21899 IX86_BUILTIN_PBLENDW128,
21904 IX86_BUILTIN_INSERTPS128,
21906 IX86_BUILTIN_MOVNTDQA,
21907 IX86_BUILTIN_MPSADBW128,
21908 IX86_BUILTIN_PACKUSDW128,
21909 IX86_BUILTIN_PCMPEQQ,
21910 IX86_BUILTIN_PHMINPOSUW128,
21912 IX86_BUILTIN_PMAXSB128,
21913 IX86_BUILTIN_PMAXSD128,
21914 IX86_BUILTIN_PMAXUD128,
21915 IX86_BUILTIN_PMAXUW128,
21917 IX86_BUILTIN_PMINSB128,
21918 IX86_BUILTIN_PMINSD128,
21919 IX86_BUILTIN_PMINUD128,
21920 IX86_BUILTIN_PMINUW128,
21922 IX86_BUILTIN_PMOVSXBW128,
21923 IX86_BUILTIN_PMOVSXBD128,
21924 IX86_BUILTIN_PMOVSXBQ128,
21925 IX86_BUILTIN_PMOVSXWD128,
21926 IX86_BUILTIN_PMOVSXWQ128,
21927 IX86_BUILTIN_PMOVSXDQ128,
21929 IX86_BUILTIN_PMOVZXBW128,
21930 IX86_BUILTIN_PMOVZXBD128,
21931 IX86_BUILTIN_PMOVZXBQ128,
21932 IX86_BUILTIN_PMOVZXWD128,
21933 IX86_BUILTIN_PMOVZXWQ128,
21934 IX86_BUILTIN_PMOVZXDQ128,
21936 IX86_BUILTIN_PMULDQ128,
21937 IX86_BUILTIN_PMULLD128,
21939 IX86_BUILTIN_ROUNDPD,
21940 IX86_BUILTIN_ROUNDPS,
21941 IX86_BUILTIN_ROUNDSD,
21942 IX86_BUILTIN_ROUNDSS,
21944 IX86_BUILTIN_PTESTZ,
21945 IX86_BUILTIN_PTESTC,
21946 IX86_BUILTIN_PTESTNZC,
21948 IX86_BUILTIN_VEC_INIT_V2SI,
21949 IX86_BUILTIN_VEC_INIT_V4HI,
21950 IX86_BUILTIN_VEC_INIT_V8QI,
21951 IX86_BUILTIN_VEC_EXT_V2DF,
21952 IX86_BUILTIN_VEC_EXT_V2DI,
21953 IX86_BUILTIN_VEC_EXT_V4SF,
21954 IX86_BUILTIN_VEC_EXT_V4SI,
21955 IX86_BUILTIN_VEC_EXT_V8HI,
21956 IX86_BUILTIN_VEC_EXT_V2SI,
21957 IX86_BUILTIN_VEC_EXT_V4HI,
21958 IX86_BUILTIN_VEC_EXT_V16QI,
21959 IX86_BUILTIN_VEC_SET_V2DI,
21960 IX86_BUILTIN_VEC_SET_V4SF,
21961 IX86_BUILTIN_VEC_SET_V4SI,
21962 IX86_BUILTIN_VEC_SET_V8HI,
21963 IX86_BUILTIN_VEC_SET_V4HI,
21964 IX86_BUILTIN_VEC_SET_V16QI,
21966 IX86_BUILTIN_VEC_PACK_SFIX,
21969 IX86_BUILTIN_CRC32QI,
21970 IX86_BUILTIN_CRC32HI,
21971 IX86_BUILTIN_CRC32SI,
21972 IX86_BUILTIN_CRC32DI,
21974 IX86_BUILTIN_PCMPESTRI128,
21975 IX86_BUILTIN_PCMPESTRM128,
21976 IX86_BUILTIN_PCMPESTRA128,
21977 IX86_BUILTIN_PCMPESTRC128,
21978 IX86_BUILTIN_PCMPESTRO128,
21979 IX86_BUILTIN_PCMPESTRS128,
21980 IX86_BUILTIN_PCMPESTRZ128,
21981 IX86_BUILTIN_PCMPISTRI128,
21982 IX86_BUILTIN_PCMPISTRM128,
21983 IX86_BUILTIN_PCMPISTRA128,
21984 IX86_BUILTIN_PCMPISTRC128,
21985 IX86_BUILTIN_PCMPISTRO128,
21986 IX86_BUILTIN_PCMPISTRS128,
21987 IX86_BUILTIN_PCMPISTRZ128,
21989 IX86_BUILTIN_PCMPGTQ,
21991 /* AES instructions */
21992 IX86_BUILTIN_AESENC128,
21993 IX86_BUILTIN_AESENCLAST128,
21994 IX86_BUILTIN_AESDEC128,
21995 IX86_BUILTIN_AESDECLAST128,
21996 IX86_BUILTIN_AESIMC128,
21997 IX86_BUILTIN_AESKEYGENASSIST128,
21999 /* PCLMUL instruction */
22000 IX86_BUILTIN_PCLMULQDQ128,
22003 IX86_BUILTIN_ADDPD256,
22004 IX86_BUILTIN_ADDPS256,
22005 IX86_BUILTIN_ADDSUBPD256,
22006 IX86_BUILTIN_ADDSUBPS256,
22007 IX86_BUILTIN_ANDPD256,
22008 IX86_BUILTIN_ANDPS256,
22009 IX86_BUILTIN_ANDNPD256,
22010 IX86_BUILTIN_ANDNPS256,
22011 IX86_BUILTIN_BLENDPD256,
22012 IX86_BUILTIN_BLENDPS256,
22013 IX86_BUILTIN_BLENDVPD256,
22014 IX86_BUILTIN_BLENDVPS256,
22015 IX86_BUILTIN_DIVPD256,
22016 IX86_BUILTIN_DIVPS256,
22017 IX86_BUILTIN_DPPS256,
22018 IX86_BUILTIN_HADDPD256,
22019 IX86_BUILTIN_HADDPS256,
22020 IX86_BUILTIN_HSUBPD256,
22021 IX86_BUILTIN_HSUBPS256,
22022 IX86_BUILTIN_MAXPD256,
22023 IX86_BUILTIN_MAXPS256,
22024 IX86_BUILTIN_MINPD256,
22025 IX86_BUILTIN_MINPS256,
22026 IX86_BUILTIN_MULPD256,
22027 IX86_BUILTIN_MULPS256,
22028 IX86_BUILTIN_ORPD256,
22029 IX86_BUILTIN_ORPS256,
22030 IX86_BUILTIN_SHUFPD256,
22031 IX86_BUILTIN_SHUFPS256,
22032 IX86_BUILTIN_SUBPD256,
22033 IX86_BUILTIN_SUBPS256,
22034 IX86_BUILTIN_XORPD256,
22035 IX86_BUILTIN_XORPS256,
22036 IX86_BUILTIN_CMPSD,
22037 IX86_BUILTIN_CMPSS,
22038 IX86_BUILTIN_CMPPD,
22039 IX86_BUILTIN_CMPPS,
22040 IX86_BUILTIN_CMPPD256,
22041 IX86_BUILTIN_CMPPS256,
22042 IX86_BUILTIN_CVTDQ2PD256,
22043 IX86_BUILTIN_CVTDQ2PS256,
22044 IX86_BUILTIN_CVTPD2PS256,
22045 IX86_BUILTIN_CVTPS2DQ256,
22046 IX86_BUILTIN_CVTPS2PD256,
22047 IX86_BUILTIN_CVTTPD2DQ256,
22048 IX86_BUILTIN_CVTPD2DQ256,
22049 IX86_BUILTIN_CVTTPS2DQ256,
22050 IX86_BUILTIN_EXTRACTF128PD256,
22051 IX86_BUILTIN_EXTRACTF128PS256,
22052 IX86_BUILTIN_EXTRACTF128SI256,
22053 IX86_BUILTIN_VZEROALL,
22054 IX86_BUILTIN_VZEROUPPER,
22055 IX86_BUILTIN_VPERMILVARPD,
22056 IX86_BUILTIN_VPERMILVARPS,
22057 IX86_BUILTIN_VPERMILVARPD256,
22058 IX86_BUILTIN_VPERMILVARPS256,
22059 IX86_BUILTIN_VPERMILPD,
22060 IX86_BUILTIN_VPERMILPS,
22061 IX86_BUILTIN_VPERMILPD256,
22062 IX86_BUILTIN_VPERMILPS256,
22063 IX86_BUILTIN_VPERMIL2PD,
22064 IX86_BUILTIN_VPERMIL2PS,
22065 IX86_BUILTIN_VPERMIL2PD256,
22066 IX86_BUILTIN_VPERMIL2PS256,
22067 IX86_BUILTIN_VPERM2F128PD256,
22068 IX86_BUILTIN_VPERM2F128PS256,
22069 IX86_BUILTIN_VPERM2F128SI256,
22070 IX86_BUILTIN_VBROADCASTSS,
22071 IX86_BUILTIN_VBROADCASTSD256,
22072 IX86_BUILTIN_VBROADCASTSS256,
22073 IX86_BUILTIN_VBROADCASTPD256,
22074 IX86_BUILTIN_VBROADCASTPS256,
22075 IX86_BUILTIN_VINSERTF128PD256,
22076 IX86_BUILTIN_VINSERTF128PS256,
22077 IX86_BUILTIN_VINSERTF128SI256,
22078 IX86_BUILTIN_LOADUPD256,
22079 IX86_BUILTIN_LOADUPS256,
22080 IX86_BUILTIN_STOREUPD256,
22081 IX86_BUILTIN_STOREUPS256,
22082 IX86_BUILTIN_LDDQU256,
22083 IX86_BUILTIN_MOVNTDQ256,
22084 IX86_BUILTIN_MOVNTPD256,
22085 IX86_BUILTIN_MOVNTPS256,
22086 IX86_BUILTIN_LOADDQU256,
22087 IX86_BUILTIN_STOREDQU256,
22088 IX86_BUILTIN_MASKLOADPD,
22089 IX86_BUILTIN_MASKLOADPS,
22090 IX86_BUILTIN_MASKSTOREPD,
22091 IX86_BUILTIN_MASKSTOREPS,
22092 IX86_BUILTIN_MASKLOADPD256,
22093 IX86_BUILTIN_MASKLOADPS256,
22094 IX86_BUILTIN_MASKSTOREPD256,
22095 IX86_BUILTIN_MASKSTOREPS256,
22096 IX86_BUILTIN_MOVSHDUP256,
22097 IX86_BUILTIN_MOVSLDUP256,
22098 IX86_BUILTIN_MOVDDUP256,
22100 IX86_BUILTIN_SQRTPD256,
22101 IX86_BUILTIN_SQRTPS256,
22102 IX86_BUILTIN_SQRTPS_NR256,
22103 IX86_BUILTIN_RSQRTPS256,
22104 IX86_BUILTIN_RSQRTPS_NR256,
22106 IX86_BUILTIN_RCPPS256,
22108 IX86_BUILTIN_ROUNDPD256,
22109 IX86_BUILTIN_ROUNDPS256,
22111 IX86_BUILTIN_UNPCKHPD256,
22112 IX86_BUILTIN_UNPCKLPD256,
22113 IX86_BUILTIN_UNPCKHPS256,
22114 IX86_BUILTIN_UNPCKLPS256,
22116 IX86_BUILTIN_SI256_SI,
22117 IX86_BUILTIN_PS256_PS,
22118 IX86_BUILTIN_PD256_PD,
22119 IX86_BUILTIN_SI_SI256,
22120 IX86_BUILTIN_PS_PS256,
22121 IX86_BUILTIN_PD_PD256,
22123 IX86_BUILTIN_VTESTZPD,
22124 IX86_BUILTIN_VTESTCPD,
22125 IX86_BUILTIN_VTESTNZCPD,
22126 IX86_BUILTIN_VTESTZPS,
22127 IX86_BUILTIN_VTESTCPS,
22128 IX86_BUILTIN_VTESTNZCPS,
22129 IX86_BUILTIN_VTESTZPD256,
22130 IX86_BUILTIN_VTESTCPD256,
22131 IX86_BUILTIN_VTESTNZCPD256,
22132 IX86_BUILTIN_VTESTZPS256,
22133 IX86_BUILTIN_VTESTCPS256,
22134 IX86_BUILTIN_VTESTNZCPS256,
22135 IX86_BUILTIN_PTESTZ256,
22136 IX86_BUILTIN_PTESTC256,
22137 IX86_BUILTIN_PTESTNZC256,
22139 IX86_BUILTIN_MOVMSKPD256,
22140 IX86_BUILTIN_MOVMSKPS256,
22142 /* TFmode support builtins. */
22144 IX86_BUILTIN_HUGE_VALQ,
22145 IX86_BUILTIN_FABSQ,
22146 IX86_BUILTIN_COPYSIGNQ,
22148 /* Vectorizer support builtins. */
22149 IX86_BUILTIN_CPYSGNPS,
22150 IX86_BUILTIN_CPYSGNPD,
22152 IX86_BUILTIN_CVTUDQ2PS,
22154 IX86_BUILTIN_VEC_PERM_V2DF,
22155 IX86_BUILTIN_VEC_PERM_V4SF,
22156 IX86_BUILTIN_VEC_PERM_V2DI,
22157 IX86_BUILTIN_VEC_PERM_V4SI,
22158 IX86_BUILTIN_VEC_PERM_V8HI,
22159 IX86_BUILTIN_VEC_PERM_V16QI,
22160 IX86_BUILTIN_VEC_PERM_V2DI_U,
22161 IX86_BUILTIN_VEC_PERM_V4SI_U,
22162 IX86_BUILTIN_VEC_PERM_V8HI_U,
22163 IX86_BUILTIN_VEC_PERM_V16QI_U,
22164 IX86_BUILTIN_VEC_PERM_V4DF,
22165 IX86_BUILTIN_VEC_PERM_V8SF,
22167 /* FMA4 and XOP instructions. */
22168 IX86_BUILTIN_VFMADDSS,
22169 IX86_BUILTIN_VFMADDSD,
22170 IX86_BUILTIN_VFMADDPS,
22171 IX86_BUILTIN_VFMADDPD,
22172 IX86_BUILTIN_VFMSUBSS,
22173 IX86_BUILTIN_VFMSUBSD,
22174 IX86_BUILTIN_VFMSUBPS,
22175 IX86_BUILTIN_VFMSUBPD,
22176 IX86_BUILTIN_VFMADDSUBPS,
22177 IX86_BUILTIN_VFMADDSUBPD,
22178 IX86_BUILTIN_VFMSUBADDPS,
22179 IX86_BUILTIN_VFMSUBADDPD,
22180 IX86_BUILTIN_VFNMADDSS,
22181 IX86_BUILTIN_VFNMADDSD,
22182 IX86_BUILTIN_VFNMADDPS,
22183 IX86_BUILTIN_VFNMADDPD,
22184 IX86_BUILTIN_VFNMSUBSS,
22185 IX86_BUILTIN_VFNMSUBSD,
22186 IX86_BUILTIN_VFNMSUBPS,
22187 IX86_BUILTIN_VFNMSUBPD,
22188 IX86_BUILTIN_VFMADDPS256,
22189 IX86_BUILTIN_VFMADDPD256,
22190 IX86_BUILTIN_VFMSUBPS256,
22191 IX86_BUILTIN_VFMSUBPD256,
22192 IX86_BUILTIN_VFMADDSUBPS256,
22193 IX86_BUILTIN_VFMADDSUBPD256,
22194 IX86_BUILTIN_VFMSUBADDPS256,
22195 IX86_BUILTIN_VFMSUBADDPD256,
22196 IX86_BUILTIN_VFNMADDPS256,
22197 IX86_BUILTIN_VFNMADDPD256,
22198 IX86_BUILTIN_VFNMSUBPS256,
22199 IX86_BUILTIN_VFNMSUBPD256,
22201 IX86_BUILTIN_VPCMOV,
22202 IX86_BUILTIN_VPCMOV_V2DI,
22203 IX86_BUILTIN_VPCMOV_V4SI,
22204 IX86_BUILTIN_VPCMOV_V8HI,
22205 IX86_BUILTIN_VPCMOV_V16QI,
22206 IX86_BUILTIN_VPCMOV_V4SF,
22207 IX86_BUILTIN_VPCMOV_V2DF,
22208 IX86_BUILTIN_VPCMOV256,
22209 IX86_BUILTIN_VPCMOV_V4DI256,
22210 IX86_BUILTIN_VPCMOV_V8SI256,
22211 IX86_BUILTIN_VPCMOV_V16HI256,
22212 IX86_BUILTIN_VPCMOV_V32QI256,
22213 IX86_BUILTIN_VPCMOV_V8SF256,
22214 IX86_BUILTIN_VPCMOV_V4DF256,
22216 IX86_BUILTIN_VPPERM,
22218 IX86_BUILTIN_VPMACSSWW,
22219 IX86_BUILTIN_VPMACSWW,
22220 IX86_BUILTIN_VPMACSSWD,
22221 IX86_BUILTIN_VPMACSWD,
22222 IX86_BUILTIN_VPMACSSDD,
22223 IX86_BUILTIN_VPMACSDD,
22224 IX86_BUILTIN_VPMACSSDQL,
22225 IX86_BUILTIN_VPMACSSDQH,
22226 IX86_BUILTIN_VPMACSDQL,
22227 IX86_BUILTIN_VPMACSDQH,
22228 IX86_BUILTIN_VPMADCSSWD,
22229 IX86_BUILTIN_VPMADCSWD,
22231 IX86_BUILTIN_VPHADDBW,
22232 IX86_BUILTIN_VPHADDBD,
22233 IX86_BUILTIN_VPHADDBQ,
22234 IX86_BUILTIN_VPHADDWD,
22235 IX86_BUILTIN_VPHADDWQ,
22236 IX86_BUILTIN_VPHADDDQ,
22237 IX86_BUILTIN_VPHADDUBW,
22238 IX86_BUILTIN_VPHADDUBD,
22239 IX86_BUILTIN_VPHADDUBQ,
22240 IX86_BUILTIN_VPHADDUWD,
22241 IX86_BUILTIN_VPHADDUWQ,
22242 IX86_BUILTIN_VPHADDUDQ,
22243 IX86_BUILTIN_VPHSUBBW,
22244 IX86_BUILTIN_VPHSUBWD,
22245 IX86_BUILTIN_VPHSUBDQ,
22247 IX86_BUILTIN_VPROTB,
22248 IX86_BUILTIN_VPROTW,
22249 IX86_BUILTIN_VPROTD,
22250 IX86_BUILTIN_VPROTQ,
22251 IX86_BUILTIN_VPROTB_IMM,
22252 IX86_BUILTIN_VPROTW_IMM,
22253 IX86_BUILTIN_VPROTD_IMM,
22254 IX86_BUILTIN_VPROTQ_IMM,
22256 IX86_BUILTIN_VPSHLB,
22257 IX86_BUILTIN_VPSHLW,
22258 IX86_BUILTIN_VPSHLD,
22259 IX86_BUILTIN_VPSHLQ,
22260 IX86_BUILTIN_VPSHAB,
22261 IX86_BUILTIN_VPSHAW,
22262 IX86_BUILTIN_VPSHAD,
22263 IX86_BUILTIN_VPSHAQ,
22265 IX86_BUILTIN_VFRCZSS,
22266 IX86_BUILTIN_VFRCZSD,
22267 IX86_BUILTIN_VFRCZPS,
22268 IX86_BUILTIN_VFRCZPD,
22269 IX86_BUILTIN_VFRCZPS256,
22270 IX86_BUILTIN_VFRCZPD256,
22272 IX86_BUILTIN_VPCOMEQUB,
22273 IX86_BUILTIN_VPCOMNEUB,
22274 IX86_BUILTIN_VPCOMLTUB,
22275 IX86_BUILTIN_VPCOMLEUB,
22276 IX86_BUILTIN_VPCOMGTUB,
22277 IX86_BUILTIN_VPCOMGEUB,
22278 IX86_BUILTIN_VPCOMFALSEUB,
22279 IX86_BUILTIN_VPCOMTRUEUB,
22281 IX86_BUILTIN_VPCOMEQUW,
22282 IX86_BUILTIN_VPCOMNEUW,
22283 IX86_BUILTIN_VPCOMLTUW,
22284 IX86_BUILTIN_VPCOMLEUW,
22285 IX86_BUILTIN_VPCOMGTUW,
22286 IX86_BUILTIN_VPCOMGEUW,
22287 IX86_BUILTIN_VPCOMFALSEUW,
22288 IX86_BUILTIN_VPCOMTRUEUW,
22290 IX86_BUILTIN_VPCOMEQUD,
22291 IX86_BUILTIN_VPCOMNEUD,
22292 IX86_BUILTIN_VPCOMLTUD,
22293 IX86_BUILTIN_VPCOMLEUD,
22294 IX86_BUILTIN_VPCOMGTUD,
22295 IX86_BUILTIN_VPCOMGEUD,
22296 IX86_BUILTIN_VPCOMFALSEUD,
22297 IX86_BUILTIN_VPCOMTRUEUD,
22299 IX86_BUILTIN_VPCOMEQUQ,
22300 IX86_BUILTIN_VPCOMNEUQ,
22301 IX86_BUILTIN_VPCOMLTUQ,
22302 IX86_BUILTIN_VPCOMLEUQ,
22303 IX86_BUILTIN_VPCOMGTUQ,
22304 IX86_BUILTIN_VPCOMGEUQ,
22305 IX86_BUILTIN_VPCOMFALSEUQ,
22306 IX86_BUILTIN_VPCOMTRUEUQ,
22308 IX86_BUILTIN_VPCOMEQB,
22309 IX86_BUILTIN_VPCOMNEB,
22310 IX86_BUILTIN_VPCOMLTB,
22311 IX86_BUILTIN_VPCOMLEB,
22312 IX86_BUILTIN_VPCOMGTB,
22313 IX86_BUILTIN_VPCOMGEB,
22314 IX86_BUILTIN_VPCOMFALSEB,
22315 IX86_BUILTIN_VPCOMTRUEB,
22317 IX86_BUILTIN_VPCOMEQW,
22318 IX86_BUILTIN_VPCOMNEW,
22319 IX86_BUILTIN_VPCOMLTW,
22320 IX86_BUILTIN_VPCOMLEW,
22321 IX86_BUILTIN_VPCOMGTW,
22322 IX86_BUILTIN_VPCOMGEW,
22323 IX86_BUILTIN_VPCOMFALSEW,
22324 IX86_BUILTIN_VPCOMTRUEW,
22326 IX86_BUILTIN_VPCOMEQD,
22327 IX86_BUILTIN_VPCOMNED,
22328 IX86_BUILTIN_VPCOMLTD,
22329 IX86_BUILTIN_VPCOMLED,
22330 IX86_BUILTIN_VPCOMGTD,
22331 IX86_BUILTIN_VPCOMGED,
22332 IX86_BUILTIN_VPCOMFALSED,
22333 IX86_BUILTIN_VPCOMTRUED,
22335 IX86_BUILTIN_VPCOMEQQ,
22336 IX86_BUILTIN_VPCOMNEQ,
22337 IX86_BUILTIN_VPCOMLTQ,
22338 IX86_BUILTIN_VPCOMLEQ,
22339 IX86_BUILTIN_VPCOMGTQ,
22340 IX86_BUILTIN_VPCOMGEQ,
22341 IX86_BUILTIN_VPCOMFALSEQ,
22342 IX86_BUILTIN_VPCOMTRUEQ,
22344 /* LWP instructions. */
22345 IX86_BUILTIN_LLWPCB,
22346 IX86_BUILTIN_SLWPCB,
22347 IX86_BUILTIN_LWPVAL32,
22348 IX86_BUILTIN_LWPVAL64,
22349 IX86_BUILTIN_LWPINS32,
22350 IX86_BUILTIN_LWPINS64,
22354 /* FSGSBASE instructions. */
22355 IX86_BUILTIN_RDFSBASE32,
22356 IX86_BUILTIN_RDFSBASE64,
22357 IX86_BUILTIN_RDGSBASE32,
22358 IX86_BUILTIN_RDGSBASE64,
22359 IX86_BUILTIN_WRFSBASE32,
22360 IX86_BUILTIN_WRFSBASE64,
22361 IX86_BUILTIN_WRGSBASE32,
22362 IX86_BUILTIN_WRGSBASE64,
22364 /* RDRND instructions. */
22365 IX86_BUILTIN_RDRAND16,
22366 IX86_BUILTIN_RDRAND32,
22367 IX86_BUILTIN_RDRAND64,
22369 /* F16C instructions. */
22370 IX86_BUILTIN_CVTPH2PS,
22371 IX86_BUILTIN_CVTPH2PS256,
22372 IX86_BUILTIN_CVTPS2PH,
22373 IX86_BUILTIN_CVTPS2PH256,
22378 /* Table for the ix86 builtin decls. */
22379 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22381 /* Table of all of the builtin functions that are possible with different ISA's
22382 but are waiting to be built until a function is declared to use that
22384 struct builtin_isa {
22385 const char *name; /* function name */
22386 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22387 int isa; /* isa_flags this builtin is defined for */
22388 bool const_p; /* true if the declaration is constant */
22389 bool set_and_not_built_p;
22392 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22395 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22396 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22397 function decl in the ix86_builtins array. Returns the function decl or
22398 NULL_TREE, if the builtin was not added.
22400 If the front end has a special hook for builtin functions, delay adding
22401 builtin functions that aren't in the current ISA until the ISA is changed
22402 with function specific optimization. Doing so, can save about 300K for the
22403 default compiler. When the builtin is expanded, check at that time whether
22406 If the front end doesn't have a special hook, record all builtins, even if
22407 it isn't an instruction set in the current ISA in case the user uses
22408 function specific options for a different ISA, so that we don't get scope
22409 errors if a builtin is added in the middle of a function scope. */
22412 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22413 enum ix86_builtins code)
22415 tree decl = NULL_TREE;
22417 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22419 ix86_builtins_isa[(int) code].isa = mask;
22421 mask &= ~OPTION_MASK_ISA_64BIT;
22423 || (mask & ix86_isa_flags) != 0
22424 || (lang_hooks.builtin_function
22425 == lang_hooks.builtin_function_ext_scope))
22428 tree type = ix86_get_builtin_func_type (tcode);
22429 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22431 ix86_builtins[(int) code] = decl;
22432 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22436 ix86_builtins[(int) code] = NULL_TREE;
22437 ix86_builtins_isa[(int) code].tcode = tcode;
22438 ix86_builtins_isa[(int) code].name = name;
22439 ix86_builtins_isa[(int) code].const_p = false;
22440 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22447 /* Like def_builtin, but also marks the function decl "const". */
22450 def_builtin_const (int mask, const char *name,
22451 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22453 tree decl = def_builtin (mask, name, tcode, code);
22455 TREE_READONLY (decl) = 1;
22457 ix86_builtins_isa[(int) code].const_p = true;
22462 /* Add any new builtin functions for a given ISA that may not have been
22463 declared. This saves a bit of space compared to adding all of the
22464 declarations to the tree, even if we didn't use them. */
22467 ix86_add_new_builtins (int isa)
22471 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22473 if ((ix86_builtins_isa[i].isa & isa) != 0
22474 && ix86_builtins_isa[i].set_and_not_built_p)
22478 /* Don't define the builtin again. */
22479 ix86_builtins_isa[i].set_and_not_built_p = false;
22481 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22482 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
22483 type, i, BUILT_IN_MD, NULL,
22486 ix86_builtins[i] = decl;
22487 if (ix86_builtins_isa[i].const_p)
22488 TREE_READONLY (decl) = 1;
22493 /* Bits for builtin_description.flag. */
22495 /* Set when we don't support the comparison natively, and should
22496 swap_comparison in order to support it. */
22497 #define BUILTIN_DESC_SWAP_OPERANDS 1
22499 struct builtin_description
22501 const unsigned int mask;
22502 const enum insn_code icode;
22503 const char *const name;
22504 const enum ix86_builtins code;
22505 const enum rtx_code comparison;
22509 static const struct builtin_description bdesc_comi[] =
22511 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
22512 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
22513 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
22514 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
22515 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
22516 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
22517 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
22518 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
22519 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
22520 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
22521 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
22522 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
22523 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
22524 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
22525 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
22526 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
22527 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
22528 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
22529 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
22530 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
22531 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
22532 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
22533 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
22534 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
22537 static const struct builtin_description bdesc_pcmpestr[] =
22540 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
22541 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
22542 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
22543 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
22544 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
22545 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
22546 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
22549 static const struct builtin_description bdesc_pcmpistr[] =
22552 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
22553 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
22554 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
22555 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
22556 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
22557 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
22558 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
22561 /* Special builtins with variable number of arguments. */
22562 static const struct builtin_description bdesc_special_args[] =
22564 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
22565 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
22568 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22571 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22575 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22576 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22578 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22580 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22581 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22583 /* SSE or 3DNow!A */
22584 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22585 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
22588 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22589 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22590 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22591 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
22592 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22593 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
22594 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
22595 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
22596 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22598 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22599 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22602 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22605 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
22608 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22609 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22612 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
22613 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
22615 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22616 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22617 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
22619 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
22621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22623 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
22627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22629 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
22630 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22631 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22633 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
22634 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
22635 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
22636 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
22637 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
22638 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
22639 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
22640 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
22642 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
22643 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
22644 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
22645 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
22646 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
22647 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
22650 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22651 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22652 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22653 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22654 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22655 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22656 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22657 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22660 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
22661 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22662 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22665 /* Builtins with variable number of arguments. */
22666 static const struct builtin_description bdesc_args[] =
22668 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
22669 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
22670 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
22671 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22672 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22673 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22674 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22677 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22678 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22679 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22680 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22681 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22682 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22684 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22685 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22686 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22687 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22688 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22689 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22690 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22691 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22693 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22694 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22696 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22697 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22698 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22699 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22701 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22702 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22703 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22704 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22705 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22706 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22708 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22709 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22710 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22711 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22712 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
22713 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
22715 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22716 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
22717 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22719 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
22721 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22722 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22723 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22724 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22725 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22726 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22728 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22729 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22730 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22731 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22732 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22733 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22735 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22736 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22737 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22738 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22741 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22742 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22743 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22744 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22746 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22747 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22748 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22749 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22750 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22751 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22752 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22753 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22754 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22755 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22756 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22757 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22758 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22759 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22760 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22763 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22764 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22765 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22766 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22767 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22768 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22771 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
22772 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22773 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22775 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22776 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22777 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22778 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22779 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22780 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22781 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22782 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22784 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22786 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22787 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22788 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22789 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22790 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22791 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22792 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22793 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22795 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22796 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22797 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22798 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22799 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22800 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22801 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22802 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22803 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22804 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22805 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
22806 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22807 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22808 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22809 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22810 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22811 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22812 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22813 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22814 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22815 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22816 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22818 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22819 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22820 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22821 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22823 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22824 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22825 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22826 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22828 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22830 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22831 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22832 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22833 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22834 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22836 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
22837 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
22838 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
22840 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
22842 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22843 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22844 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22846 /* SSE MMX or 3Dnow!A */
22847 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22848 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22849 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22851 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22852 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22853 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22854 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22856 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
22857 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
22859 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
22862 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22864 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
22865 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
22866 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
22867 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
22868 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
22869 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22870 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
22871 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
22872 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
22873 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
22874 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
22875 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
22877 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
22878 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
22879 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
22880 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
22881 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22882 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22884 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22885 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22886 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
22887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22890 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
22892 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22893 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22894 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22895 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22897 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22898 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
22899 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22901 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22902 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22903 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22904 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22905 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22906 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22907 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22910 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22911 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22912 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22913 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22914 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
22915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22917 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22918 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22920 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22921 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22922 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22923 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22926 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22927 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22928 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22929 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22931 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22932 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22933 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22934 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22936 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22937 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22938 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22939 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22941 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22943 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22944 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22945 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22947 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
22949 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22950 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22951 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22952 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22953 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22954 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22955 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22956 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22963 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22964 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22965 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22967 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22968 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
22970 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22972 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22973 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22981 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22982 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22985 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22986 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22987 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22988 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22990 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22991 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22992 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22993 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22994 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22995 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22996 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22997 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23000 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
23003 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
23006 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
23007 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23009 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
23011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
23012 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
23013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
23014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
23016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23017 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23018 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23019 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23020 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23021 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23022 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
23025 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23026 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23027 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
23028 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23029 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23030 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
23032 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
23033 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
23034 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
23035 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
23037 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
23038 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23039 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
23041 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
23043 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
23044 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
23046 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23049 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23050 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
23053 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
23054 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
23056 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23057 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23058 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23059 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23060 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
23061 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
23064 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
23065 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
23066 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23067 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
23068 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
23069 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
23071 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23072 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23073 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23074 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23075 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23076 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23077 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23078 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23079 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23080 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23081 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23082 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23083 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
23084 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
23085 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23086 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23087 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23088 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23089 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23090 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
23091 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23092 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
23093 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23094 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
23097 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
23098 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
23101 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23102 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23103 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
23104 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
23105 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23106 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23107 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23108 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
23109 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
23110 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
23112 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23113 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23114 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23115 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23116 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23117 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_sign_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23118 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
23119 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
23120 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
23121 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
23122 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
23123 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
23124 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
23126 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
23127 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23128 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23129 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23130 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23131 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23132 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
23133 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23134 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23135 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
23136 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
23137 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
23140 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23141 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23142 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23143 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23145 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23146 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23147 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
23150 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23151 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
23152 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
23153 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
23154 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
23157 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
23158 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
23159 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
23160 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23163 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
23164 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
23166 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23167 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23168 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23169 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
23172 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
23175 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23176 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23177 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23178 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23179 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23180 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23181 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23182 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23183 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23184 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23185 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23186 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23187 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23188 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23189 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23190 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23191 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23192 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23193 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23194 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23195 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23196 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23197 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23198 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23199 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23200 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23202 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
23203 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
23204 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
23205 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
23207 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23208 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23209 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23210 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23211 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23212 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23213 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23214 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23215 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23216 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23217 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23218 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23219 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23220 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23221 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23222 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23223 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23224 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23225 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23226 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23227 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23228 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23229 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23230 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23231 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23232 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23233 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23234 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23235 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23236 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23237 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23238 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23239 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23240 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23242 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23243 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23244 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23246 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23247 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23248 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23249 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23250 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23252 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23254 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23255 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23257 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23258 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23259 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23260 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23262 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23263 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23264 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23265 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23266 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23267 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23269 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23270 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23271 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23272 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23273 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23274 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23275 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23276 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23277 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23278 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23279 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23280 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23281 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23282 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23283 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23285 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23286 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23288 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23291 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23292 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23293 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23294 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23297 /* FMA4 and XOP. */
23298 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23299 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23300 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23301 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23302 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23303 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23304 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23305 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23306 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23307 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23308 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23309 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23310 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23311 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23312 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23313 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23314 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23315 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23316 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23317 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23318 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23319 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23320 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23321 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23322 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23323 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23324 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23325 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23326 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23327 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23328 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23329 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23330 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23331 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23332 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23333 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23334 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23335 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23336 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23337 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23338 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23339 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23340 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23341 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23342 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23343 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23344 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23345 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23346 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23347 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23348 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23349 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23351 static const struct builtin_description bdesc_multi_arg[] =
23353 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23354 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23355 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23356 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23357 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23358 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23359 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23360 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23362 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23363 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23364 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23365 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23366 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23367 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23368 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23369 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23371 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23372 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23373 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23374 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23376 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23377 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23378 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23379 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23381 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23382 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23383 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23384 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23386 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23387 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23388 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23389 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23391 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23392 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23393 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23394 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23395 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23396 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23397 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23399 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23400 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23401 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23402 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23403 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23404 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23405 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23411 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23412 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23413 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23414 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23415 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23416 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23417 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23418 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23419 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23420 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23422 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23423 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23424 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23425 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23426 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23427 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23428 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23429 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23430 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23431 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23432 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23433 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23434 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23435 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23436 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23437 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23439 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23440 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23441 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23442 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23443 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23444 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23446 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23447 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23448 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23449 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23450 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23451 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23452 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23453 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23454 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23455 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23456 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23457 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23458 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23459 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23460 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23462 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23463 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23464 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23465 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23466 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23467 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23468 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23470 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23471 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23472 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23473 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23474 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23475 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23476 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23478 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23479 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23480 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23481 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23482 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
23483 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
23484 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
23486 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23487 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23488 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23489 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
23490 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
23491 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
23492 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
23494 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
23495 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23496 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23497 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
23498 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
23499 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
23500 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
23502 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
23503 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23504 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23505 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
23506 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
23507 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
23508 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
23510 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
23511 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23512 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23513 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
23514 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
23515 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
23516 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
23518 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23519 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23520 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23521 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
23522 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
23523 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
23524 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
23526 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23527 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23528 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23529 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23530 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23531 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23532 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23533 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23535 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23536 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23537 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23538 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23539 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23540 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23541 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23542 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23544 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
23545 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
23546 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
23547 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
23551 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
23552 in the current target ISA to allow the user to compile particular modules
23553 with different target specific options that differ from the command line
23556 ix86_init_mmx_sse_builtins (void)
23558 const struct builtin_description * d;
23559 enum ix86_builtin_func_type ftype;
23562 /* Add all special builtins with variable number of operands. */
23563 for (i = 0, d = bdesc_special_args;
23564 i < ARRAY_SIZE (bdesc_special_args);
23570 ftype = (enum ix86_builtin_func_type) d->flag;
23571 def_builtin (d->mask, d->name, ftype, d->code);
23574 /* Add all builtins with variable number of operands. */
23575 for (i = 0, d = bdesc_args;
23576 i < ARRAY_SIZE (bdesc_args);
23582 ftype = (enum ix86_builtin_func_type) d->flag;
23583 def_builtin_const (d->mask, d->name, ftype, d->code);
23586 /* pcmpestr[im] insns. */
23587 for (i = 0, d = bdesc_pcmpestr;
23588 i < ARRAY_SIZE (bdesc_pcmpestr);
23591 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23592 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
23594 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
23595 def_builtin_const (d->mask, d->name, ftype, d->code);
23598 /* pcmpistr[im] insns. */
23599 for (i = 0, d = bdesc_pcmpistr;
23600 i < ARRAY_SIZE (bdesc_pcmpistr);
23603 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23604 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
23606 ftype = INT_FTYPE_V16QI_V16QI_INT;
23607 def_builtin_const (d->mask, d->name, ftype, d->code);
23610 /* comi/ucomi insns. */
23611 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23613 if (d->mask == OPTION_MASK_ISA_SSE2)
23614 ftype = INT_FTYPE_V2DF_V2DF;
23616 ftype = INT_FTYPE_V4SF_V4SF;
23617 def_builtin_const (d->mask, d->name, ftype, d->code);
23621 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
23622 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
23623 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
23624 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
23626 /* SSE or 3DNow!A */
23627 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23628 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
23629 IX86_BUILTIN_MASKMOVQ);
23632 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
23633 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
23635 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
23636 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
23637 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
23638 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
23641 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
23642 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
23643 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
23644 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
23647 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
23648 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
23649 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
23650 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
23651 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
23652 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
23653 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
23654 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
23655 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
23656 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
23657 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
23658 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
23661 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
23662 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
23664 /* MMX access to the vec_init patterns. */
23665 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
23666 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
23668 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
23669 V4HI_FTYPE_HI_HI_HI_HI,
23670 IX86_BUILTIN_VEC_INIT_V4HI);
23672 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
23673 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
23674 IX86_BUILTIN_VEC_INIT_V8QI);
23676 /* Access to the vec_extract patterns. */
23677 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
23678 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
23679 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
23680 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
23681 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
23682 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
23683 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
23684 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
23685 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
23686 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
23688 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23689 "__builtin_ia32_vec_ext_v4hi",
23690 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
23692 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
23693 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
23695 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
23696 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
23698 /* Access to the vec_set patterns. */
23699 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
23700 "__builtin_ia32_vec_set_v2di",
23701 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
23703 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
23704 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
23706 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
23707 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
23709 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
23710 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
23712 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23713 "__builtin_ia32_vec_set_v4hi",
23714 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
23716 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
23717 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
23719 /* Add FMA4 multi-arg argument instructions */
23720 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23725 ftype = (enum ix86_builtin_func_type) d->flag;
23726 def_builtin_const (d->mask, d->name, ftype, d->code);
23730 /* Internal method for ix86_init_builtins. */
23733 ix86_init_builtins_va_builtins_abi (void)
23735 tree ms_va_ref, sysv_va_ref;
23736 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23737 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23738 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23739 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23743 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23744 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23745 ms_va_ref = build_reference_type (ms_va_list_type_node);
23747 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23750 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23751 fnvoid_va_start_ms =
23752 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23753 fnvoid_va_end_sysv =
23754 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23755 fnvoid_va_start_sysv =
23756 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23758 fnvoid_va_copy_ms =
23759 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23761 fnvoid_va_copy_sysv =
23762 build_function_type_list (void_type_node, sysv_va_ref,
23763 sysv_va_ref, NULL_TREE);
23765 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23766 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23767 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23768 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23769 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23770 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23771 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23772 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23773 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23774 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23775 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23776 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23780 ix86_init_builtin_types (void)
23782 tree float128_type_node, float80_type_node;
23784 /* The __float80 type. */
23785 float80_type_node = long_double_type_node;
23786 if (TYPE_MODE (float80_type_node) != XFmode)
23788 /* The __float80 type. */
23789 float80_type_node = make_node (REAL_TYPE);
23791 TYPE_PRECISION (float80_type_node) = 80;
23792 layout_type (float80_type_node);
23794 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
23796 /* The __float128 type. */
23797 float128_type_node = make_node (REAL_TYPE);
23798 TYPE_PRECISION (float128_type_node) = 128;
23799 layout_type (float128_type_node);
23800 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
23802 /* This macro is built by i386-builtin-types.awk. */
23803 DEFINE_BUILTIN_PRIMITIVE_TYPES;
23807 ix86_init_builtins (void)
23811 ix86_init_builtin_types ();
23813 /* TFmode support builtins. */
23814 def_builtin_const (0, "__builtin_infq",
23815 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
23816 def_builtin_const (0, "__builtin_huge_valq",
23817 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
23819 /* We will expand them to normal call if SSE2 isn't available since
23820 they are used by libgcc. */
23821 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
23822 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
23823 BUILT_IN_MD, "__fabstf2", NULL_TREE);
23824 TREE_READONLY (t) = 1;
23825 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
23827 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
23828 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
23829 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
23830 TREE_READONLY (t) = 1;
23831 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
23833 ix86_init_mmx_sse_builtins ();
23836 ix86_init_builtins_va_builtins_abi ();
23839 /* Return the ix86 builtin for CODE. */
23842 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
23844 if (code >= IX86_BUILTIN_MAX)
23845 return error_mark_node;
23847 return ix86_builtins[code];
23850 /* Errors in the source file can cause expand_expr to return const0_rtx
23851 where we expect a vector. To avoid crashing, use one of the vector
23852 clear instructions. */
23854 safe_vector_operand (rtx x, enum machine_mode mode)
23856 if (x == const0_rtx)
23857 x = CONST0_RTX (mode);
23861 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23864 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23867 tree arg0 = CALL_EXPR_ARG (exp, 0);
23868 tree arg1 = CALL_EXPR_ARG (exp, 1);
23869 rtx op0 = expand_normal (arg0);
23870 rtx op1 = expand_normal (arg1);
23871 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23872 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23873 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23875 if (VECTOR_MODE_P (mode0))
23876 op0 = safe_vector_operand (op0, mode0);
23877 if (VECTOR_MODE_P (mode1))
23878 op1 = safe_vector_operand (op1, mode1);
23880 if (optimize || !target
23881 || GET_MODE (target) != tmode
23882 || !insn_data[icode].operand[0].predicate (target, tmode))
23883 target = gen_reg_rtx (tmode);
23885 if (GET_MODE (op1) == SImode && mode1 == TImode)
23887 rtx x = gen_reg_rtx (V4SImode);
23888 emit_insn (gen_sse2_loadd (x, op1));
23889 op1 = gen_lowpart (TImode, x);
23892 if (!insn_data[icode].operand[1].predicate (op0, mode0))
23893 op0 = copy_to_mode_reg (mode0, op0);
23894 if (!insn_data[icode].operand[2].predicate (op1, mode1))
23895 op1 = copy_to_mode_reg (mode1, op1);
23897 pat = GEN_FCN (icode) (target, op0, op1);
23906 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23909 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23910 enum ix86_builtin_func_type m_type,
23911 enum rtx_code sub_code)
23916 bool comparison_p = false;
23918 bool last_arg_constant = false;
23919 int num_memory = 0;
23922 enum machine_mode mode;
23925 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23929 case MULTI_ARG_4_DF2_DI_I:
23930 case MULTI_ARG_4_DF2_DI_I1:
23931 case MULTI_ARG_4_SF2_SI_I:
23932 case MULTI_ARG_4_SF2_SI_I1:
23934 last_arg_constant = true;
23937 case MULTI_ARG_3_SF:
23938 case MULTI_ARG_3_DF:
23939 case MULTI_ARG_3_SF2:
23940 case MULTI_ARG_3_DF2:
23941 case MULTI_ARG_3_DI:
23942 case MULTI_ARG_3_SI:
23943 case MULTI_ARG_3_SI_DI:
23944 case MULTI_ARG_3_HI:
23945 case MULTI_ARG_3_HI_SI:
23946 case MULTI_ARG_3_QI:
23947 case MULTI_ARG_3_DI2:
23948 case MULTI_ARG_3_SI2:
23949 case MULTI_ARG_3_HI2:
23950 case MULTI_ARG_3_QI2:
23954 case MULTI_ARG_2_SF:
23955 case MULTI_ARG_2_DF:
23956 case MULTI_ARG_2_DI:
23957 case MULTI_ARG_2_SI:
23958 case MULTI_ARG_2_HI:
23959 case MULTI_ARG_2_QI:
23963 case MULTI_ARG_2_DI_IMM:
23964 case MULTI_ARG_2_SI_IMM:
23965 case MULTI_ARG_2_HI_IMM:
23966 case MULTI_ARG_2_QI_IMM:
23968 last_arg_constant = true;
23971 case MULTI_ARG_1_SF:
23972 case MULTI_ARG_1_DF:
23973 case MULTI_ARG_1_SF2:
23974 case MULTI_ARG_1_DF2:
23975 case MULTI_ARG_1_DI:
23976 case MULTI_ARG_1_SI:
23977 case MULTI_ARG_1_HI:
23978 case MULTI_ARG_1_QI:
23979 case MULTI_ARG_1_SI_DI:
23980 case MULTI_ARG_1_HI_DI:
23981 case MULTI_ARG_1_HI_SI:
23982 case MULTI_ARG_1_QI_DI:
23983 case MULTI_ARG_1_QI_SI:
23984 case MULTI_ARG_1_QI_HI:
23988 case MULTI_ARG_2_DI_CMP:
23989 case MULTI_ARG_2_SI_CMP:
23990 case MULTI_ARG_2_HI_CMP:
23991 case MULTI_ARG_2_QI_CMP:
23993 comparison_p = true;
23996 case MULTI_ARG_2_SF_TF:
23997 case MULTI_ARG_2_DF_TF:
23998 case MULTI_ARG_2_DI_TF:
23999 case MULTI_ARG_2_SI_TF:
24000 case MULTI_ARG_2_HI_TF:
24001 case MULTI_ARG_2_QI_TF:
24007 gcc_unreachable ();
24010 if (optimize || !target
24011 || GET_MODE (target) != tmode
24012 || !insn_data[icode].operand[0].predicate (target, tmode))
24013 target = gen_reg_rtx (tmode);
24015 gcc_assert (nargs <= 4);
24017 for (i = 0; i < nargs; i++)
24019 tree arg = CALL_EXPR_ARG (exp, i);
24020 rtx op = expand_normal (arg);
24021 int adjust = (comparison_p) ? 1 : 0;
24022 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
24024 if (last_arg_constant && i == nargs-1)
24026 if (!CONST_INT_P (op))
24028 error ("last argument must be an immediate");
24029 return gen_reg_rtx (tmode);
24034 if (VECTOR_MODE_P (mode))
24035 op = safe_vector_operand (op, mode);
24037 /* If we aren't optimizing, only allow one memory operand to be
24039 if (memory_operand (op, mode))
24042 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
24045 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
24047 op = force_reg (mode, op);
24051 args[i].mode = mode;
24057 pat = GEN_FCN (icode) (target, args[0].op);
24062 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
24063 GEN_INT ((int)sub_code));
24064 else if (! comparison_p)
24065 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24068 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
24072 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
24077 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24081 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
24085 gcc_unreachable ();
24095 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
24096 insns with vec_merge. */
24099 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
24103 tree arg0 = CALL_EXPR_ARG (exp, 0);
24104 rtx op1, op0 = expand_normal (arg0);
24105 enum machine_mode tmode = insn_data[icode].operand[0].mode;
24106 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
24108 if (optimize || !target
24109 || GET_MODE (target) != tmode
24110 || !insn_data[icode].operand[0].predicate (target, tmode))
24111 target = gen_reg_rtx (tmode);
24113 if (VECTOR_MODE_P (mode0))
24114 op0 = safe_vector_operand (op0, mode0);
24116 if ((optimize && !register_operand (op0, mode0))
24117 || !insn_data[icode].operand[1].predicate (op0, mode0))
24118 op0 = copy_to_mode_reg (mode0, op0);
24121 if (!insn_data[icode].operand[2].predicate (op1, mode0))
24122 op1 = copy_to_mode_reg (mode0, op1);
24124 pat = GEN_FCN (icode) (target, op0, op1);
24131 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
24134 ix86_expand_sse_compare (const struct builtin_description *d,
24135 tree exp, rtx target, bool swap)
24138 tree arg0 = CALL_EXPR_ARG (exp, 0);
24139 tree arg1 = CALL_EXPR_ARG (exp, 1);
24140 rtx op0 = expand_normal (arg0);
24141 rtx op1 = expand_normal (arg1);
24143 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
24144 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
24145 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
24146 enum rtx_code comparison = d->comparison;
24148 if (VECTOR_MODE_P (mode0))
24149 op0 = safe_vector_operand (op0, mode0);
24150 if (VECTOR_MODE_P (mode1))
24151 op1 = safe_vector_operand (op1, mode1);
24153 /* Swap operands if we have a comparison that isn't available in
24157 rtx tmp = gen_reg_rtx (mode1);
24158 emit_move_insn (tmp, op1);
24163 if (optimize || !target
24164 || GET_MODE (target) != tmode
24165 || !insn_data[d->icode].operand[0].predicate (target, tmode))
24166 target = gen_reg_rtx (tmode);
24168 if ((optimize && !register_operand (op0, mode0))
24169 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
24170 op0 = copy_to_mode_reg (mode0, op0);
24171 if ((optimize && !register_operand (op1, mode1))
24172 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
24173 op1 = copy_to_mode_reg (mode1, op1);
24175 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
24176 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
24183 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
24186 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
24190 tree arg0 = CALL_EXPR_ARG (exp, 0);
24191 tree arg1 = CALL_EXPR_ARG (exp, 1);
24192 rtx op0 = expand_normal (arg0);
24193 rtx op1 = expand_normal (arg1);
24194 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24195 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24196 enum rtx_code comparison = d->comparison;
24198 if (VECTOR_MODE_P (mode0))
24199 op0 = safe_vector_operand (op0, mode0);
24200 if (VECTOR_MODE_P (mode1))
24201 op1 = safe_vector_operand (op1, mode1);
24203 /* Swap operands if we have a comparison that isn't available in
24205 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24212 target = gen_reg_rtx (SImode);
24213 emit_move_insn (target, const0_rtx);
24214 target = gen_rtx_SUBREG (QImode, target, 0);
24216 if ((optimize && !register_operand (op0, mode0))
24217 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24218 op0 = copy_to_mode_reg (mode0, op0);
24219 if ((optimize && !register_operand (op1, mode1))
24220 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24221 op1 = copy_to_mode_reg (mode1, op1);
24223 pat = GEN_FCN (d->icode) (op0, op1);
24227 emit_insn (gen_rtx_SET (VOIDmode,
24228 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24229 gen_rtx_fmt_ee (comparison, QImode,
24233 return SUBREG_REG (target);
24236 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24239 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24243 tree arg0 = CALL_EXPR_ARG (exp, 0);
24244 tree arg1 = CALL_EXPR_ARG (exp, 1);
24245 rtx op0 = expand_normal (arg0);
24246 rtx op1 = expand_normal (arg1);
24247 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24248 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24249 enum rtx_code comparison = d->comparison;
24251 if (VECTOR_MODE_P (mode0))
24252 op0 = safe_vector_operand (op0, mode0);
24253 if (VECTOR_MODE_P (mode1))
24254 op1 = safe_vector_operand (op1, mode1);
24256 target = gen_reg_rtx (SImode);
24257 emit_move_insn (target, const0_rtx);
24258 target = gen_rtx_SUBREG (QImode, target, 0);
24260 if ((optimize && !register_operand (op0, mode0))
24261 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24262 op0 = copy_to_mode_reg (mode0, op0);
24263 if ((optimize && !register_operand (op1, mode1))
24264 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24265 op1 = copy_to_mode_reg (mode1, op1);
24267 pat = GEN_FCN (d->icode) (op0, op1);
24271 emit_insn (gen_rtx_SET (VOIDmode,
24272 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24273 gen_rtx_fmt_ee (comparison, QImode,
24277 return SUBREG_REG (target);
24280 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24283 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24284 tree exp, rtx target)
24287 tree arg0 = CALL_EXPR_ARG (exp, 0);
24288 tree arg1 = CALL_EXPR_ARG (exp, 1);
24289 tree arg2 = CALL_EXPR_ARG (exp, 2);
24290 tree arg3 = CALL_EXPR_ARG (exp, 3);
24291 tree arg4 = CALL_EXPR_ARG (exp, 4);
24292 rtx scratch0, scratch1;
24293 rtx op0 = expand_normal (arg0);
24294 rtx op1 = expand_normal (arg1);
24295 rtx op2 = expand_normal (arg2);
24296 rtx op3 = expand_normal (arg3);
24297 rtx op4 = expand_normal (arg4);
24298 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24300 tmode0 = insn_data[d->icode].operand[0].mode;
24301 tmode1 = insn_data[d->icode].operand[1].mode;
24302 modev2 = insn_data[d->icode].operand[2].mode;
24303 modei3 = insn_data[d->icode].operand[3].mode;
24304 modev4 = insn_data[d->icode].operand[4].mode;
24305 modei5 = insn_data[d->icode].operand[5].mode;
24306 modeimm = insn_data[d->icode].operand[6].mode;
24308 if (VECTOR_MODE_P (modev2))
24309 op0 = safe_vector_operand (op0, modev2);
24310 if (VECTOR_MODE_P (modev4))
24311 op2 = safe_vector_operand (op2, modev4);
24313 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24314 op0 = copy_to_mode_reg (modev2, op0);
24315 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24316 op1 = copy_to_mode_reg (modei3, op1);
24317 if ((optimize && !register_operand (op2, modev4))
24318 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24319 op2 = copy_to_mode_reg (modev4, op2);
24320 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24321 op3 = copy_to_mode_reg (modei5, op3);
24323 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24325 error ("the fifth argument must be a 8-bit immediate");
24329 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24331 if (optimize || !target
24332 || GET_MODE (target) != tmode0
24333 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24334 target = gen_reg_rtx (tmode0);
24336 scratch1 = gen_reg_rtx (tmode1);
24338 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24340 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24342 if (optimize || !target
24343 || GET_MODE (target) != tmode1
24344 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24345 target = gen_reg_rtx (tmode1);
24347 scratch0 = gen_reg_rtx (tmode0);
24349 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24353 gcc_assert (d->flag);
24355 scratch0 = gen_reg_rtx (tmode0);
24356 scratch1 = gen_reg_rtx (tmode1);
24358 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24368 target = gen_reg_rtx (SImode);
24369 emit_move_insn (target, const0_rtx);
24370 target = gen_rtx_SUBREG (QImode, target, 0);
24373 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24374 gen_rtx_fmt_ee (EQ, QImode,
24375 gen_rtx_REG ((enum machine_mode) d->flag,
24378 return SUBREG_REG (target);
24385 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24388 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24389 tree exp, rtx target)
24392 tree arg0 = CALL_EXPR_ARG (exp, 0);
24393 tree arg1 = CALL_EXPR_ARG (exp, 1);
24394 tree arg2 = CALL_EXPR_ARG (exp, 2);
24395 rtx scratch0, scratch1;
24396 rtx op0 = expand_normal (arg0);
24397 rtx op1 = expand_normal (arg1);
24398 rtx op2 = expand_normal (arg2);
24399 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24401 tmode0 = insn_data[d->icode].operand[0].mode;
24402 tmode1 = insn_data[d->icode].operand[1].mode;
24403 modev2 = insn_data[d->icode].operand[2].mode;
24404 modev3 = insn_data[d->icode].operand[3].mode;
24405 modeimm = insn_data[d->icode].operand[4].mode;
24407 if (VECTOR_MODE_P (modev2))
24408 op0 = safe_vector_operand (op0, modev2);
24409 if (VECTOR_MODE_P (modev3))
24410 op1 = safe_vector_operand (op1, modev3);
24412 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24413 op0 = copy_to_mode_reg (modev2, op0);
24414 if ((optimize && !register_operand (op1, modev3))
24415 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24416 op1 = copy_to_mode_reg (modev3, op1);
24418 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24420 error ("the third argument must be a 8-bit immediate");
24424 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24426 if (optimize || !target
24427 || GET_MODE (target) != tmode0
24428 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24429 target = gen_reg_rtx (tmode0);
24431 scratch1 = gen_reg_rtx (tmode1);
24433 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24435 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24437 if (optimize || !target
24438 || GET_MODE (target) != tmode1
24439 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24440 target = gen_reg_rtx (tmode1);
24442 scratch0 = gen_reg_rtx (tmode0);
24444 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24448 gcc_assert (d->flag);
24450 scratch0 = gen_reg_rtx (tmode0);
24451 scratch1 = gen_reg_rtx (tmode1);
24453 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24463 target = gen_reg_rtx (SImode);
24464 emit_move_insn (target, const0_rtx);
24465 target = gen_rtx_SUBREG (QImode, target, 0);
24468 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24469 gen_rtx_fmt_ee (EQ, QImode,
24470 gen_rtx_REG ((enum machine_mode) d->flag,
24473 return SUBREG_REG (target);
24479 /* Subroutine of ix86_expand_builtin to take care of insns with
24480 variable number of operands. */
24483 ix86_expand_args_builtin (const struct builtin_description *d,
24484 tree exp, rtx target)
24486 rtx pat, real_target;
24487 unsigned int i, nargs;
24488 unsigned int nargs_constant = 0;
24489 int num_memory = 0;
24493 enum machine_mode mode;
24495 bool last_arg_count = false;
24496 enum insn_code icode = d->icode;
24497 const struct insn_data_d *insn_p = &insn_data[icode];
24498 enum machine_mode tmode = insn_p->operand[0].mode;
24499 enum machine_mode rmode = VOIDmode;
24501 enum rtx_code comparison = d->comparison;
24503 switch ((enum ix86_builtin_func_type) d->flag)
24505 case INT_FTYPE_V8SF_V8SF_PTEST:
24506 case INT_FTYPE_V4DI_V4DI_PTEST:
24507 case INT_FTYPE_V4DF_V4DF_PTEST:
24508 case INT_FTYPE_V4SF_V4SF_PTEST:
24509 case INT_FTYPE_V2DI_V2DI_PTEST:
24510 case INT_FTYPE_V2DF_V2DF_PTEST:
24511 return ix86_expand_sse_ptest (d, exp, target);
24512 case FLOAT128_FTYPE_FLOAT128:
24513 case FLOAT_FTYPE_FLOAT:
24514 case INT_FTYPE_INT:
24515 case UINT64_FTYPE_INT:
24516 case UINT16_FTYPE_UINT16:
24517 case INT64_FTYPE_INT64:
24518 case INT64_FTYPE_V4SF:
24519 case INT64_FTYPE_V2DF:
24520 case INT_FTYPE_V16QI:
24521 case INT_FTYPE_V8QI:
24522 case INT_FTYPE_V8SF:
24523 case INT_FTYPE_V4DF:
24524 case INT_FTYPE_V4SF:
24525 case INT_FTYPE_V2DF:
24526 case V16QI_FTYPE_V16QI:
24527 case V8SI_FTYPE_V8SF:
24528 case V8SI_FTYPE_V4SI:
24529 case V8HI_FTYPE_V8HI:
24530 case V8HI_FTYPE_V16QI:
24531 case V8QI_FTYPE_V8QI:
24532 case V8SF_FTYPE_V8SF:
24533 case V8SF_FTYPE_V8SI:
24534 case V8SF_FTYPE_V4SF:
24535 case V8SF_FTYPE_V8HI:
24536 case V4SI_FTYPE_V4SI:
24537 case V4SI_FTYPE_V16QI:
24538 case V4SI_FTYPE_V4SF:
24539 case V4SI_FTYPE_V8SI:
24540 case V4SI_FTYPE_V8HI:
24541 case V4SI_FTYPE_V4DF:
24542 case V4SI_FTYPE_V2DF:
24543 case V4HI_FTYPE_V4HI:
24544 case V4DF_FTYPE_V4DF:
24545 case V4DF_FTYPE_V4SI:
24546 case V4DF_FTYPE_V4SF:
24547 case V4DF_FTYPE_V2DF:
24548 case V4SF_FTYPE_V4SF:
24549 case V4SF_FTYPE_V4SI:
24550 case V4SF_FTYPE_V8SF:
24551 case V4SF_FTYPE_V4DF:
24552 case V4SF_FTYPE_V8HI:
24553 case V4SF_FTYPE_V2DF:
24554 case V2DI_FTYPE_V2DI:
24555 case V2DI_FTYPE_V16QI:
24556 case V2DI_FTYPE_V8HI:
24557 case V2DI_FTYPE_V4SI:
24558 case V2DF_FTYPE_V2DF:
24559 case V2DF_FTYPE_V4SI:
24560 case V2DF_FTYPE_V4DF:
24561 case V2DF_FTYPE_V4SF:
24562 case V2DF_FTYPE_V2SI:
24563 case V2SI_FTYPE_V2SI:
24564 case V2SI_FTYPE_V4SF:
24565 case V2SI_FTYPE_V2SF:
24566 case V2SI_FTYPE_V2DF:
24567 case V2SF_FTYPE_V2SF:
24568 case V2SF_FTYPE_V2SI:
24571 case V4SF_FTYPE_V4SF_VEC_MERGE:
24572 case V2DF_FTYPE_V2DF_VEC_MERGE:
24573 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24574 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24575 case V16QI_FTYPE_V16QI_V16QI:
24576 case V16QI_FTYPE_V8HI_V8HI:
24577 case V8QI_FTYPE_V8QI_V8QI:
24578 case V8QI_FTYPE_V4HI_V4HI:
24579 case V8HI_FTYPE_V8HI_V8HI:
24580 case V8HI_FTYPE_V16QI_V16QI:
24581 case V8HI_FTYPE_V4SI_V4SI:
24582 case V8SF_FTYPE_V8SF_V8SF:
24583 case V8SF_FTYPE_V8SF_V8SI:
24584 case V4SI_FTYPE_V4SI_V4SI:
24585 case V4SI_FTYPE_V8HI_V8HI:
24586 case V4SI_FTYPE_V4SF_V4SF:
24587 case V4SI_FTYPE_V2DF_V2DF:
24588 case V4HI_FTYPE_V4HI_V4HI:
24589 case V4HI_FTYPE_V8QI_V8QI:
24590 case V4HI_FTYPE_V2SI_V2SI:
24591 case V4DF_FTYPE_V4DF_V4DF:
24592 case V4DF_FTYPE_V4DF_V4DI:
24593 case V4SF_FTYPE_V4SF_V4SF:
24594 case V4SF_FTYPE_V4SF_V4SI:
24595 case V4SF_FTYPE_V4SF_V2SI:
24596 case V4SF_FTYPE_V4SF_V2DF:
24597 case V4SF_FTYPE_V4SF_DI:
24598 case V4SF_FTYPE_V4SF_SI:
24599 case V2DI_FTYPE_V2DI_V2DI:
24600 case V2DI_FTYPE_V16QI_V16QI:
24601 case V2DI_FTYPE_V4SI_V4SI:
24602 case V2DI_FTYPE_V2DI_V16QI:
24603 case V2DI_FTYPE_V2DF_V2DF:
24604 case V2SI_FTYPE_V2SI_V2SI:
24605 case V2SI_FTYPE_V4HI_V4HI:
24606 case V2SI_FTYPE_V2SF_V2SF:
24607 case V2DF_FTYPE_V2DF_V2DF:
24608 case V2DF_FTYPE_V2DF_V4SF:
24609 case V2DF_FTYPE_V2DF_V2DI:
24610 case V2DF_FTYPE_V2DF_DI:
24611 case V2DF_FTYPE_V2DF_SI:
24612 case V2SF_FTYPE_V2SF_V2SF:
24613 case V1DI_FTYPE_V1DI_V1DI:
24614 case V1DI_FTYPE_V8QI_V8QI:
24615 case V1DI_FTYPE_V2SI_V2SI:
24616 if (comparison == UNKNOWN)
24617 return ix86_expand_binop_builtin (icode, exp, target);
24620 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24621 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24622 gcc_assert (comparison != UNKNOWN);
24626 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24627 case V8HI_FTYPE_V8HI_SI_COUNT:
24628 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24629 case V4SI_FTYPE_V4SI_SI_COUNT:
24630 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24631 case V4HI_FTYPE_V4HI_SI_COUNT:
24632 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24633 case V2DI_FTYPE_V2DI_SI_COUNT:
24634 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24635 case V2SI_FTYPE_V2SI_SI_COUNT:
24636 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24637 case V1DI_FTYPE_V1DI_SI_COUNT:
24639 last_arg_count = true;
24641 case UINT64_FTYPE_UINT64_UINT64:
24642 case UINT_FTYPE_UINT_UINT:
24643 case UINT_FTYPE_UINT_USHORT:
24644 case UINT_FTYPE_UINT_UCHAR:
24645 case UINT16_FTYPE_UINT16_INT:
24646 case UINT8_FTYPE_UINT8_INT:
24649 case V2DI_FTYPE_V2DI_INT_CONVERT:
24652 nargs_constant = 1;
24654 case V8HI_FTYPE_V8HI_INT:
24655 case V8HI_FTYPE_V8SF_INT:
24656 case V8HI_FTYPE_V4SF_INT:
24657 case V8SF_FTYPE_V8SF_INT:
24658 case V4SI_FTYPE_V4SI_INT:
24659 case V4SI_FTYPE_V8SI_INT:
24660 case V4HI_FTYPE_V4HI_INT:
24661 case V4DF_FTYPE_V4DF_INT:
24662 case V4SF_FTYPE_V4SF_INT:
24663 case V4SF_FTYPE_V8SF_INT:
24664 case V2DI_FTYPE_V2DI_INT:
24665 case V2DF_FTYPE_V2DF_INT:
24666 case V2DF_FTYPE_V4DF_INT:
24668 nargs_constant = 1;
24670 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24671 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24672 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24673 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24674 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24677 case V16QI_FTYPE_V16QI_V16QI_INT:
24678 case V8HI_FTYPE_V8HI_V8HI_INT:
24679 case V8SI_FTYPE_V8SI_V8SI_INT:
24680 case V8SI_FTYPE_V8SI_V4SI_INT:
24681 case V8SF_FTYPE_V8SF_V8SF_INT:
24682 case V8SF_FTYPE_V8SF_V4SF_INT:
24683 case V4SI_FTYPE_V4SI_V4SI_INT:
24684 case V4DF_FTYPE_V4DF_V4DF_INT:
24685 case V4DF_FTYPE_V4DF_V2DF_INT:
24686 case V4SF_FTYPE_V4SF_V4SF_INT:
24687 case V2DI_FTYPE_V2DI_V2DI_INT:
24688 case V2DF_FTYPE_V2DF_V2DF_INT:
24690 nargs_constant = 1;
24692 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
24695 nargs_constant = 1;
24697 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
24700 nargs_constant = 1;
24702 case V2DI_FTYPE_V2DI_UINT_UINT:
24704 nargs_constant = 2;
24706 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
24707 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
24708 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
24709 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
24711 nargs_constant = 1;
24713 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24715 nargs_constant = 2;
24718 gcc_unreachable ();
24721 gcc_assert (nargs <= ARRAY_SIZE (args));
24723 if (comparison != UNKNOWN)
24725 gcc_assert (nargs == 2);
24726 return ix86_expand_sse_compare (d, exp, target, swap);
24729 if (rmode == VOIDmode || rmode == tmode)
24733 || GET_MODE (target) != tmode
24734 || !insn_p->operand[0].predicate (target, tmode))
24735 target = gen_reg_rtx (tmode);
24736 real_target = target;
24740 target = gen_reg_rtx (rmode);
24741 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24744 for (i = 0; i < nargs; i++)
24746 tree arg = CALL_EXPR_ARG (exp, i);
24747 rtx op = expand_normal (arg);
24748 enum machine_mode mode = insn_p->operand[i + 1].mode;
24749 bool match = insn_p->operand[i + 1].predicate (op, mode);
24751 if (last_arg_count && (i + 1) == nargs)
24753 /* SIMD shift insns take either an 8-bit immediate or
24754 register as count. But builtin functions take int as
24755 count. If count doesn't match, we put it in register. */
24758 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24759 if (!insn_p->operand[i + 1].predicate (op, mode))
24760 op = copy_to_reg (op);
24763 else if ((nargs - i) <= nargs_constant)
24768 case CODE_FOR_sse4_1_roundpd:
24769 case CODE_FOR_sse4_1_roundps:
24770 case CODE_FOR_sse4_1_roundsd:
24771 case CODE_FOR_sse4_1_roundss:
24772 case CODE_FOR_sse4_1_blendps:
24773 case CODE_FOR_avx_blendpd256:
24774 case CODE_FOR_avx_vpermilv4df:
24775 case CODE_FOR_avx_roundpd256:
24776 case CODE_FOR_avx_roundps256:
24777 error ("the last argument must be a 4-bit immediate");
24780 case CODE_FOR_sse4_1_blendpd:
24781 case CODE_FOR_avx_vpermilv2df:
24782 case CODE_FOR_xop_vpermil2v2df3:
24783 case CODE_FOR_xop_vpermil2v4sf3:
24784 case CODE_FOR_xop_vpermil2v4df3:
24785 case CODE_FOR_xop_vpermil2v8sf3:
24786 error ("the last argument must be a 2-bit immediate");
24789 case CODE_FOR_avx_vextractf128v4df:
24790 case CODE_FOR_avx_vextractf128v8sf:
24791 case CODE_FOR_avx_vextractf128v8si:
24792 case CODE_FOR_avx_vinsertf128v4df:
24793 case CODE_FOR_avx_vinsertf128v8sf:
24794 case CODE_FOR_avx_vinsertf128v8si:
24795 error ("the last argument must be a 1-bit immediate");
24798 case CODE_FOR_avx_cmpsdv2df3:
24799 case CODE_FOR_avx_cmpssv4sf3:
24800 case CODE_FOR_avx_cmppdv2df3:
24801 case CODE_FOR_avx_cmppsv4sf3:
24802 case CODE_FOR_avx_cmppdv4df3:
24803 case CODE_FOR_avx_cmppsv8sf3:
24804 error ("the last argument must be a 5-bit immediate");
24808 switch (nargs_constant)
24811 if ((nargs - i) == nargs_constant)
24813 error ("the next to last argument must be an 8-bit immediate");
24817 error ("the last argument must be an 8-bit immediate");
24820 gcc_unreachable ();
24827 if (VECTOR_MODE_P (mode))
24828 op = safe_vector_operand (op, mode);
24830 /* If we aren't optimizing, only allow one memory operand to
24832 if (memory_operand (op, mode))
24835 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24837 if (optimize || !match || num_memory > 1)
24838 op = copy_to_mode_reg (mode, op);
24842 op = copy_to_reg (op);
24843 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24848 args[i].mode = mode;
24854 pat = GEN_FCN (icode) (real_target, args[0].op);
24857 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24860 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24864 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24865 args[2].op, args[3].op);
24868 gcc_unreachable ();
24878 /* Subroutine of ix86_expand_builtin to take care of special insns
24879 with variable number of operands. */
24882 ix86_expand_special_args_builtin (const struct builtin_description *d,
24883 tree exp, rtx target)
24887 unsigned int i, nargs, arg_adjust, memory;
24891 enum machine_mode mode;
24893 enum insn_code icode = d->icode;
24894 bool last_arg_constant = false;
24895 const struct insn_data_d *insn_p = &insn_data[icode];
24896 enum machine_mode tmode = insn_p->operand[0].mode;
24897 enum { load, store } klass;
24899 switch ((enum ix86_builtin_func_type) d->flag)
24901 case VOID_FTYPE_VOID:
24902 emit_insn (GEN_FCN (icode) (target));
24904 case VOID_FTYPE_UINT64:
24905 case VOID_FTYPE_UNSIGNED:
24911 case UINT64_FTYPE_VOID:
24912 case UNSIGNED_FTYPE_VOID:
24913 case UINT16_FTYPE_VOID:
24918 case UINT64_FTYPE_PUNSIGNED:
24919 case V2DI_FTYPE_PV2DI:
24920 case V32QI_FTYPE_PCCHAR:
24921 case V16QI_FTYPE_PCCHAR:
24922 case V8SF_FTYPE_PCV4SF:
24923 case V8SF_FTYPE_PCFLOAT:
24924 case V4SF_FTYPE_PCFLOAT:
24925 case V4DF_FTYPE_PCV2DF:
24926 case V4DF_FTYPE_PCDOUBLE:
24927 case V2DF_FTYPE_PCDOUBLE:
24928 case VOID_FTYPE_PVOID:
24933 case VOID_FTYPE_PV2SF_V4SF:
24934 case VOID_FTYPE_PV4DI_V4DI:
24935 case VOID_FTYPE_PV2DI_V2DI:
24936 case VOID_FTYPE_PCHAR_V32QI:
24937 case VOID_FTYPE_PCHAR_V16QI:
24938 case VOID_FTYPE_PFLOAT_V8SF:
24939 case VOID_FTYPE_PFLOAT_V4SF:
24940 case VOID_FTYPE_PDOUBLE_V4DF:
24941 case VOID_FTYPE_PDOUBLE_V2DF:
24942 case VOID_FTYPE_PULONGLONG_ULONGLONG:
24943 case VOID_FTYPE_PINT_INT:
24946 /* Reserve memory operand for target. */
24947 memory = ARRAY_SIZE (args);
24949 case V4SF_FTYPE_V4SF_PCV2SF:
24950 case V2DF_FTYPE_V2DF_PCDOUBLE:
24955 case V8SF_FTYPE_PCV8SF_V8SF:
24956 case V4DF_FTYPE_PCV4DF_V4DF:
24957 case V4SF_FTYPE_PCV4SF_V4SF:
24958 case V2DF_FTYPE_PCV2DF_V2DF:
24963 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24964 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24965 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24966 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24969 /* Reserve memory operand for target. */
24970 memory = ARRAY_SIZE (args);
24972 case VOID_FTYPE_UINT_UINT_UINT:
24973 case VOID_FTYPE_UINT64_UINT_UINT:
24974 case UCHAR_FTYPE_UINT_UINT_UINT:
24975 case UCHAR_FTYPE_UINT64_UINT_UINT:
24978 memory = ARRAY_SIZE (args);
24979 last_arg_constant = true;
24982 gcc_unreachable ();
24985 gcc_assert (nargs <= ARRAY_SIZE (args));
24987 if (klass == store)
24989 arg = CALL_EXPR_ARG (exp, 0);
24990 op = expand_normal (arg);
24991 gcc_assert (target == 0);
24993 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24995 target = force_reg (tmode, op);
25003 || GET_MODE (target) != tmode
25004 || !insn_p->operand[0].predicate (target, tmode))
25005 target = gen_reg_rtx (tmode);
25008 for (i = 0; i < nargs; i++)
25010 enum machine_mode mode = insn_p->operand[i + 1].mode;
25013 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
25014 op = expand_normal (arg);
25015 match = insn_p->operand[i + 1].predicate (op, mode);
25017 if (last_arg_constant && (i + 1) == nargs)
25021 if (icode == CODE_FOR_lwp_lwpvalsi3
25022 || icode == CODE_FOR_lwp_lwpinssi3
25023 || icode == CODE_FOR_lwp_lwpvaldi3
25024 || icode == CODE_FOR_lwp_lwpinsdi3)
25025 error ("the last argument must be a 32-bit immediate");
25027 error ("the last argument must be an 8-bit immediate");
25035 /* This must be the memory operand. */
25036 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
25037 gcc_assert (GET_MODE (op) == mode
25038 || GET_MODE (op) == VOIDmode);
25042 /* This must be register. */
25043 if (VECTOR_MODE_P (mode))
25044 op = safe_vector_operand (op, mode);
25046 gcc_assert (GET_MODE (op) == mode
25047 || GET_MODE (op) == VOIDmode);
25048 op = copy_to_mode_reg (mode, op);
25053 args[i].mode = mode;
25059 pat = GEN_FCN (icode) (target);
25062 pat = GEN_FCN (icode) (target, args[0].op);
25065 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
25068 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
25071 gcc_unreachable ();
25077 return klass == store ? 0 : target;
25080 /* Return the integer constant in ARG. Constrain it to be in the range
25081 of the subparts of VEC_TYPE; issue an error if not. */
25084 get_element_number (tree vec_type, tree arg)
25086 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
25088 if (!host_integerp (arg, 1)
25089 || (elt = tree_low_cst (arg, 1), elt > max))
25091 error ("selector must be an integer constant in the range 0..%wi", max);
25098 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25099 ix86_expand_vector_init. We DO have language-level syntax for this, in
25100 the form of (type){ init-list }. Except that since we can't place emms
25101 instructions from inside the compiler, we can't allow the use of MMX
25102 registers unless the user explicitly asks for it. So we do *not* define
25103 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
25104 we have builtins invoked by mmintrin.h that gives us license to emit
25105 these sorts of instructions. */
25108 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
25110 enum machine_mode tmode = TYPE_MODE (type);
25111 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
25112 int i, n_elt = GET_MODE_NUNITS (tmode);
25113 rtvec v = rtvec_alloc (n_elt);
25115 gcc_assert (VECTOR_MODE_P (tmode));
25116 gcc_assert (call_expr_nargs (exp) == n_elt);
25118 for (i = 0; i < n_elt; ++i)
25120 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
25121 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
25124 if (!target || !register_operand (target, tmode))
25125 target = gen_reg_rtx (tmode);
25127 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
25131 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25132 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
25133 had a language-level syntax for referencing vector elements. */
25136 ix86_expand_vec_ext_builtin (tree exp, rtx target)
25138 enum machine_mode tmode, mode0;
25143 arg0 = CALL_EXPR_ARG (exp, 0);
25144 arg1 = CALL_EXPR_ARG (exp, 1);
25146 op0 = expand_normal (arg0);
25147 elt = get_element_number (TREE_TYPE (arg0), arg1);
25149 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25150 mode0 = TYPE_MODE (TREE_TYPE (arg0));
25151 gcc_assert (VECTOR_MODE_P (mode0));
25153 op0 = force_reg (mode0, op0);
25155 if (optimize || !target || !register_operand (target, tmode))
25156 target = gen_reg_rtx (tmode);
25158 ix86_expand_vector_extract (true, target, op0, elt);
25163 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
25164 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
25165 a language-level syntax for referencing vector elements. */
25168 ix86_expand_vec_set_builtin (tree exp)
25170 enum machine_mode tmode, mode1;
25171 tree arg0, arg1, arg2;
25173 rtx op0, op1, target;
25175 arg0 = CALL_EXPR_ARG (exp, 0);
25176 arg1 = CALL_EXPR_ARG (exp, 1);
25177 arg2 = CALL_EXPR_ARG (exp, 2);
25179 tmode = TYPE_MODE (TREE_TYPE (arg0));
25180 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
25181 gcc_assert (VECTOR_MODE_P (tmode));
25183 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
25184 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
25185 elt = get_element_number (TREE_TYPE (arg0), arg2);
25187 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
25188 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
25190 op0 = force_reg (tmode, op0);
25191 op1 = force_reg (mode1, op1);
25193 /* OP0 is the source of these builtin functions and shouldn't be
25194 modified. Create a copy, use it and return it as target. */
25195 target = gen_reg_rtx (tmode);
25196 emit_move_insn (target, op0);
25197 ix86_expand_vector_set (true, target, op1, elt);
25202 /* Expand an expression EXP that calls a built-in function,
25203 with result going to TARGET if that's convenient
25204 (and in mode MODE if that's convenient).
25205 SUBTARGET may be used as the target for computing one of EXP's operands.
25206 IGNORE is nonzero if the value is to be ignored. */
25209 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25210 enum machine_mode mode ATTRIBUTE_UNUSED,
25211 int ignore ATTRIBUTE_UNUSED)
25213 const struct builtin_description *d;
25215 enum insn_code icode;
25216 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25217 tree arg0, arg1, arg2;
25218 rtx op0, op1, op2, pat;
25219 enum machine_mode mode0, mode1, mode2;
25220 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25222 /* Determine whether the builtin function is available under the current ISA.
25223 Originally the builtin was not created if it wasn't applicable to the
25224 current ISA based on the command line switches. With function specific
25225 options, we need to check in the context of the function making the call
25226 whether it is supported. */
25227 if (ix86_builtins_isa[fcode].isa
25228 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25230 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25231 NULL, NULL, false);
25234 error ("%qE needs unknown isa option", fndecl);
25237 gcc_assert (opts != NULL);
25238 error ("%qE needs isa option %s", fndecl, opts);
25246 case IX86_BUILTIN_MASKMOVQ:
25247 case IX86_BUILTIN_MASKMOVDQU:
25248 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25249 ? CODE_FOR_mmx_maskmovq
25250 : CODE_FOR_sse2_maskmovdqu);
25251 /* Note the arg order is different from the operand order. */
25252 arg1 = CALL_EXPR_ARG (exp, 0);
25253 arg2 = CALL_EXPR_ARG (exp, 1);
25254 arg0 = CALL_EXPR_ARG (exp, 2);
25255 op0 = expand_normal (arg0);
25256 op1 = expand_normal (arg1);
25257 op2 = expand_normal (arg2);
25258 mode0 = insn_data[icode].operand[0].mode;
25259 mode1 = insn_data[icode].operand[1].mode;
25260 mode2 = insn_data[icode].operand[2].mode;
25262 op0 = force_reg (Pmode, op0);
25263 op0 = gen_rtx_MEM (mode1, op0);
25265 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25266 op0 = copy_to_mode_reg (mode0, op0);
25267 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25268 op1 = copy_to_mode_reg (mode1, op1);
25269 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25270 op2 = copy_to_mode_reg (mode2, op2);
25271 pat = GEN_FCN (icode) (op0, op1, op2);
25277 case IX86_BUILTIN_LDMXCSR:
25278 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25279 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25280 emit_move_insn (target, op0);
25281 emit_insn (gen_sse_ldmxcsr (target));
25284 case IX86_BUILTIN_STMXCSR:
25285 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25286 emit_insn (gen_sse_stmxcsr (target));
25287 return copy_to_mode_reg (SImode, target);
25289 case IX86_BUILTIN_CLFLUSH:
25290 arg0 = CALL_EXPR_ARG (exp, 0);
25291 op0 = expand_normal (arg0);
25292 icode = CODE_FOR_sse2_clflush;
25293 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25294 op0 = copy_to_mode_reg (Pmode, op0);
25296 emit_insn (gen_sse2_clflush (op0));
25299 case IX86_BUILTIN_MONITOR:
25300 arg0 = CALL_EXPR_ARG (exp, 0);
25301 arg1 = CALL_EXPR_ARG (exp, 1);
25302 arg2 = CALL_EXPR_ARG (exp, 2);
25303 op0 = expand_normal (arg0);
25304 op1 = expand_normal (arg1);
25305 op2 = expand_normal (arg2);
25307 op0 = copy_to_mode_reg (Pmode, op0);
25309 op1 = copy_to_mode_reg (SImode, op1);
25311 op2 = copy_to_mode_reg (SImode, op2);
25312 emit_insn (ix86_gen_monitor (op0, op1, op2));
25315 case IX86_BUILTIN_MWAIT:
25316 arg0 = CALL_EXPR_ARG (exp, 0);
25317 arg1 = CALL_EXPR_ARG (exp, 1);
25318 op0 = expand_normal (arg0);
25319 op1 = expand_normal (arg1);
25321 op0 = copy_to_mode_reg (SImode, op0);
25323 op1 = copy_to_mode_reg (SImode, op1);
25324 emit_insn (gen_sse3_mwait (op0, op1));
25327 case IX86_BUILTIN_VEC_INIT_V2SI:
25328 case IX86_BUILTIN_VEC_INIT_V4HI:
25329 case IX86_BUILTIN_VEC_INIT_V8QI:
25330 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25332 case IX86_BUILTIN_VEC_EXT_V2DF:
25333 case IX86_BUILTIN_VEC_EXT_V2DI:
25334 case IX86_BUILTIN_VEC_EXT_V4SF:
25335 case IX86_BUILTIN_VEC_EXT_V4SI:
25336 case IX86_BUILTIN_VEC_EXT_V8HI:
25337 case IX86_BUILTIN_VEC_EXT_V2SI:
25338 case IX86_BUILTIN_VEC_EXT_V4HI:
25339 case IX86_BUILTIN_VEC_EXT_V16QI:
25340 return ix86_expand_vec_ext_builtin (exp, target);
25342 case IX86_BUILTIN_VEC_SET_V2DI:
25343 case IX86_BUILTIN_VEC_SET_V4SF:
25344 case IX86_BUILTIN_VEC_SET_V4SI:
25345 case IX86_BUILTIN_VEC_SET_V8HI:
25346 case IX86_BUILTIN_VEC_SET_V4HI:
25347 case IX86_BUILTIN_VEC_SET_V16QI:
25348 return ix86_expand_vec_set_builtin (exp);
25350 case IX86_BUILTIN_VEC_PERM_V2DF:
25351 case IX86_BUILTIN_VEC_PERM_V4SF:
25352 case IX86_BUILTIN_VEC_PERM_V2DI:
25353 case IX86_BUILTIN_VEC_PERM_V4SI:
25354 case IX86_BUILTIN_VEC_PERM_V8HI:
25355 case IX86_BUILTIN_VEC_PERM_V16QI:
25356 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25357 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25358 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25359 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25360 case IX86_BUILTIN_VEC_PERM_V4DF:
25361 case IX86_BUILTIN_VEC_PERM_V8SF:
25362 return ix86_expand_vec_perm_builtin (exp);
25364 case IX86_BUILTIN_INFQ:
25365 case IX86_BUILTIN_HUGE_VALQ:
25367 REAL_VALUE_TYPE inf;
25371 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25373 tmp = validize_mem (force_const_mem (mode, tmp));
25376 target = gen_reg_rtx (mode);
25378 emit_move_insn (target, tmp);
25382 case IX86_BUILTIN_LLWPCB:
25383 arg0 = CALL_EXPR_ARG (exp, 0);
25384 op0 = expand_normal (arg0);
25385 icode = CODE_FOR_lwp_llwpcb;
25386 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25387 op0 = copy_to_mode_reg (Pmode, op0);
25388 emit_insn (gen_lwp_llwpcb (op0));
25391 case IX86_BUILTIN_SLWPCB:
25392 icode = CODE_FOR_lwp_slwpcb;
25394 || !insn_data[icode].operand[0].predicate (target, Pmode))
25395 target = gen_reg_rtx (Pmode);
25396 emit_insn (gen_lwp_slwpcb (target));
25403 for (i = 0, d = bdesc_special_args;
25404 i < ARRAY_SIZE (bdesc_special_args);
25406 if (d->code == fcode)
25407 return ix86_expand_special_args_builtin (d, exp, target);
25409 for (i = 0, d = bdesc_args;
25410 i < ARRAY_SIZE (bdesc_args);
25412 if (d->code == fcode)
25415 case IX86_BUILTIN_FABSQ:
25416 case IX86_BUILTIN_COPYSIGNQ:
25418 /* Emit a normal call if SSE2 isn't available. */
25419 return expand_call (exp, target, ignore);
25421 return ix86_expand_args_builtin (d, exp, target);
25424 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25425 if (d->code == fcode)
25426 return ix86_expand_sse_comi (d, exp, target);
25428 for (i = 0, d = bdesc_pcmpestr;
25429 i < ARRAY_SIZE (bdesc_pcmpestr);
25431 if (d->code == fcode)
25432 return ix86_expand_sse_pcmpestr (d, exp, target);
25434 for (i = 0, d = bdesc_pcmpistr;
25435 i < ARRAY_SIZE (bdesc_pcmpistr);
25437 if (d->code == fcode)
25438 return ix86_expand_sse_pcmpistr (d, exp, target);
25440 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25441 if (d->code == fcode)
25442 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25443 (enum ix86_builtin_func_type)
25444 d->flag, d->comparison);
25446 gcc_unreachable ();
25449 /* Returns a function decl for a vectorized version of the builtin function
25450 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25451 if it is not available. */
25454 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25457 enum machine_mode in_mode, out_mode;
25459 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25461 if (TREE_CODE (type_out) != VECTOR_TYPE
25462 || TREE_CODE (type_in) != VECTOR_TYPE
25463 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25466 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25467 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25468 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25469 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25473 case BUILT_IN_SQRT:
25474 if (out_mode == DFmode && out_n == 2
25475 && in_mode == DFmode && in_n == 2)
25476 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25479 case BUILT_IN_SQRTF:
25480 if (out_mode == SFmode && out_n == 4
25481 && in_mode == SFmode && in_n == 4)
25482 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25485 case BUILT_IN_LRINT:
25486 if (out_mode == SImode && out_n == 4
25487 && in_mode == DFmode && in_n == 2)
25488 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25491 case BUILT_IN_LRINTF:
25492 if (out_mode == SImode && out_n == 4
25493 && in_mode == SFmode && in_n == 4)
25494 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25497 case BUILT_IN_COPYSIGN:
25498 if (out_mode == DFmode && out_n == 2
25499 && in_mode == DFmode && in_n == 2)
25500 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
25503 case BUILT_IN_COPYSIGNF:
25504 if (out_mode == SFmode && out_n == 4
25505 && in_mode == SFmode && in_n == 4)
25506 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
25513 /* Dispatch to a handler for a vectorization library. */
25514 if (ix86_veclib_handler)
25515 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
25521 /* Handler for an SVML-style interface to
25522 a library with vectorized intrinsics. */
25525 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25528 tree fntype, new_fndecl, args;
25531 enum machine_mode el_mode, in_mode;
25534 /* The SVML is suitable for unsafe math only. */
25535 if (!flag_unsafe_math_optimizations)
25538 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25539 n = TYPE_VECTOR_SUBPARTS (type_out);
25540 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25541 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25542 if (el_mode != in_mode
25550 case BUILT_IN_LOG10:
25552 case BUILT_IN_TANH:
25554 case BUILT_IN_ATAN:
25555 case BUILT_IN_ATAN2:
25556 case BUILT_IN_ATANH:
25557 case BUILT_IN_CBRT:
25558 case BUILT_IN_SINH:
25560 case BUILT_IN_ASINH:
25561 case BUILT_IN_ASIN:
25562 case BUILT_IN_COSH:
25564 case BUILT_IN_ACOSH:
25565 case BUILT_IN_ACOS:
25566 if (el_mode != DFmode || n != 2)
25570 case BUILT_IN_EXPF:
25571 case BUILT_IN_LOGF:
25572 case BUILT_IN_LOG10F:
25573 case BUILT_IN_POWF:
25574 case BUILT_IN_TANHF:
25575 case BUILT_IN_TANF:
25576 case BUILT_IN_ATANF:
25577 case BUILT_IN_ATAN2F:
25578 case BUILT_IN_ATANHF:
25579 case BUILT_IN_CBRTF:
25580 case BUILT_IN_SINHF:
25581 case BUILT_IN_SINF:
25582 case BUILT_IN_ASINHF:
25583 case BUILT_IN_ASINF:
25584 case BUILT_IN_COSHF:
25585 case BUILT_IN_COSF:
25586 case BUILT_IN_ACOSHF:
25587 case BUILT_IN_ACOSF:
25588 if (el_mode != SFmode || n != 4)
25596 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25598 if (fn == BUILT_IN_LOGF)
25599 strcpy (name, "vmlsLn4");
25600 else if (fn == BUILT_IN_LOG)
25601 strcpy (name, "vmldLn2");
25604 sprintf (name, "vmls%s", bname+10);
25605 name[strlen (name)-1] = '4';
25608 sprintf (name, "vmld%s2", bname+10);
25610 /* Convert to uppercase. */
25614 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25615 args = TREE_CHAIN (args))
25619 fntype = build_function_type_list (type_out, type_in, NULL);
25621 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25623 /* Build a function declaration for the vectorized function. */
25624 new_fndecl = build_decl (BUILTINS_LOCATION,
25625 FUNCTION_DECL, get_identifier (name), fntype);
25626 TREE_PUBLIC (new_fndecl) = 1;
25627 DECL_EXTERNAL (new_fndecl) = 1;
25628 DECL_IS_NOVOPS (new_fndecl) = 1;
25629 TREE_READONLY (new_fndecl) = 1;
25634 /* Handler for an ACML-style interface to
25635 a library with vectorized intrinsics. */
25638 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25640 char name[20] = "__vr.._";
25641 tree fntype, new_fndecl, args;
25644 enum machine_mode el_mode, in_mode;
25647 /* The ACML is 64bits only and suitable for unsafe math only as
25648 it does not correctly support parts of IEEE with the required
25649 precision such as denormals. */
25651 || !flag_unsafe_math_optimizations)
25654 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25655 n = TYPE_VECTOR_SUBPARTS (type_out);
25656 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25657 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25658 if (el_mode != in_mode
25668 case BUILT_IN_LOG2:
25669 case BUILT_IN_LOG10:
25672 if (el_mode != DFmode
25677 case BUILT_IN_SINF:
25678 case BUILT_IN_COSF:
25679 case BUILT_IN_EXPF:
25680 case BUILT_IN_POWF:
25681 case BUILT_IN_LOGF:
25682 case BUILT_IN_LOG2F:
25683 case BUILT_IN_LOG10F:
25686 if (el_mode != SFmode
25695 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25696 sprintf (name + 7, "%s", bname+10);
25699 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25700 args = TREE_CHAIN (args))
25704 fntype = build_function_type_list (type_out, type_in, NULL);
25706 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25708 /* Build a function declaration for the vectorized function. */
25709 new_fndecl = build_decl (BUILTINS_LOCATION,
25710 FUNCTION_DECL, get_identifier (name), fntype);
25711 TREE_PUBLIC (new_fndecl) = 1;
25712 DECL_EXTERNAL (new_fndecl) = 1;
25713 DECL_IS_NOVOPS (new_fndecl) = 1;
25714 TREE_READONLY (new_fndecl) = 1;
25720 /* Returns a decl of a function that implements conversion of an integer vector
25721 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
25722 are the types involved when converting according to CODE.
25723 Return NULL_TREE if it is not available. */
25726 ix86_vectorize_builtin_conversion (unsigned int code,
25727 tree dest_type, tree src_type)
25735 switch (TYPE_MODE (src_type))
25738 switch (TYPE_MODE (dest_type))
25741 return (TYPE_UNSIGNED (src_type)
25742 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
25743 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25745 return (TYPE_UNSIGNED (src_type)
25747 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
25753 switch (TYPE_MODE (dest_type))
25756 return (TYPE_UNSIGNED (src_type)
25758 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25767 case FIX_TRUNC_EXPR:
25768 switch (TYPE_MODE (dest_type))
25771 switch (TYPE_MODE (src_type))
25774 return (TYPE_UNSIGNED (dest_type)
25776 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
25778 return (TYPE_UNSIGNED (dest_type)
25780 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
25787 switch (TYPE_MODE (src_type))
25790 return (TYPE_UNSIGNED (dest_type)
25792 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
25809 /* Returns a code for a target-specific builtin that implements
25810 reciprocal of the function, or NULL_TREE if not available. */
25813 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25814 bool sqrt ATTRIBUTE_UNUSED)
25816 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
25817 && flag_finite_math_only && !flag_trapping_math
25818 && flag_unsafe_math_optimizations))
25822 /* Machine dependent builtins. */
25825 /* Vectorized version of sqrt to rsqrt conversion. */
25826 case IX86_BUILTIN_SQRTPS_NR:
25827 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25833 /* Normal builtins. */
25836 /* Sqrt to rsqrt conversion. */
25837 case BUILT_IN_SQRTF:
25838 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25845 /* Helper for avx_vpermilps256_operand et al. This is also used by
25846 the expansion functions to turn the parallel back into a mask.
25847 The return value is 0 for no match and the imm8+1 for a match. */
25850 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
25852 unsigned i, nelt = GET_MODE_NUNITS (mode);
25854 unsigned char ipar[8];
25856 if (XVECLEN (par, 0) != (int) nelt)
25859 /* Validate that all of the elements are constants, and not totally
25860 out of range. Copy the data into an integral array to make the
25861 subsequent checks easier. */
25862 for (i = 0; i < nelt; ++i)
25864 rtx er = XVECEXP (par, 0, i);
25865 unsigned HOST_WIDE_INT ei;
25867 if (!CONST_INT_P (er))
25878 /* In the 256-bit DFmode case, we can only move elements within
25880 for (i = 0; i < 2; ++i)
25884 mask |= ipar[i] << i;
25886 for (i = 2; i < 4; ++i)
25890 mask |= (ipar[i] - 2) << i;
25895 /* In the 256-bit SFmode case, we have full freedom of movement
25896 within the low 128-bit lane, but the high 128-bit lane must
25897 mirror the exact same pattern. */
25898 for (i = 0; i < 4; ++i)
25899 if (ipar[i] + 4 != ipar[i + 4])
25906 /* In the 128-bit case, we've full freedom in the placement of
25907 the elements from the source operand. */
25908 for (i = 0; i < nelt; ++i)
25909 mask |= ipar[i] << (i * (nelt / 2));
25913 gcc_unreachable ();
25916 /* Make sure success has a non-zero value by adding one. */
25920 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
25921 the expansion functions to turn the parallel back into a mask.
25922 The return value is 0 for no match and the imm8+1 for a match. */
25925 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
25927 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
25929 unsigned char ipar[8];
25931 if (XVECLEN (par, 0) != (int) nelt)
25934 /* Validate that all of the elements are constants, and not totally
25935 out of range. Copy the data into an integral array to make the
25936 subsequent checks easier. */
25937 for (i = 0; i < nelt; ++i)
25939 rtx er = XVECEXP (par, 0, i);
25940 unsigned HOST_WIDE_INT ei;
25942 if (!CONST_INT_P (er))
25945 if (ei >= 2 * nelt)
25950 /* Validate that the halves of the permute are halves. */
25951 for (i = 0; i < nelt2 - 1; ++i)
25952 if (ipar[i] + 1 != ipar[i + 1])
25954 for (i = nelt2; i < nelt - 1; ++i)
25955 if (ipar[i] + 1 != ipar[i + 1])
25958 /* Reconstruct the mask. */
25959 for (i = 0; i < 2; ++i)
25961 unsigned e = ipar[i * nelt2];
25965 mask |= e << (i * 4);
25968 /* Make sure success has a non-zero value by adding one. */
25973 /* Store OPERAND to the memory after reload is completed. This means
25974 that we can't easily use assign_stack_local. */
25976 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25980 gcc_assert (reload_completed);
25981 if (ix86_using_red_zone ())
25983 result = gen_rtx_MEM (mode,
25984 gen_rtx_PLUS (Pmode,
25986 GEN_INT (-RED_ZONE_SIZE)));
25987 emit_move_insn (result, operand);
25989 else if (TARGET_64BIT)
25995 operand = gen_lowpart (DImode, operand);
25999 gen_rtx_SET (VOIDmode,
26000 gen_rtx_MEM (DImode,
26001 gen_rtx_PRE_DEC (DImode,
26002 stack_pointer_rtx)),
26006 gcc_unreachable ();
26008 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26017 split_di (&operand, 1, operands, operands + 1);
26019 gen_rtx_SET (VOIDmode,
26020 gen_rtx_MEM (SImode,
26021 gen_rtx_PRE_DEC (Pmode,
26022 stack_pointer_rtx)),
26025 gen_rtx_SET (VOIDmode,
26026 gen_rtx_MEM (SImode,
26027 gen_rtx_PRE_DEC (Pmode,
26028 stack_pointer_rtx)),
26033 /* Store HImodes as SImodes. */
26034 operand = gen_lowpart (SImode, operand);
26038 gen_rtx_SET (VOIDmode,
26039 gen_rtx_MEM (GET_MODE (operand),
26040 gen_rtx_PRE_DEC (SImode,
26041 stack_pointer_rtx)),
26045 gcc_unreachable ();
26047 result = gen_rtx_MEM (mode, stack_pointer_rtx);
26052 /* Free operand from the memory. */
26054 ix86_free_from_memory (enum machine_mode mode)
26056 if (!ix86_using_red_zone ())
26060 if (mode == DImode || TARGET_64BIT)
26064 /* Use LEA to deallocate stack space. In peephole2 it will be converted
26065 to pop or add instruction if registers are available. */
26066 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
26067 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
26072 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
26073 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
26075 static const reg_class_t *
26076 i386_ira_cover_classes (void)
26078 static const reg_class_t sse_fpmath_classes[] = {
26079 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
26081 static const reg_class_t no_sse_fpmath_classes[] = {
26082 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
26085 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
26088 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
26089 QImode must go into class Q_REGS.
26090 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
26091 movdf to do mem-to-mem moves through integer regs. */
26093 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
26095 enum machine_mode mode = GET_MODE (x);
26097 /* We're only allowed to return a subclass of CLASS. Many of the
26098 following checks fail for NO_REGS, so eliminate that early. */
26099 if (regclass == NO_REGS)
26102 /* All classes can load zeros. */
26103 if (x == CONST0_RTX (mode))
26106 /* Force constants into memory if we are loading a (nonzero) constant into
26107 an MMX or SSE register. This is because there are no MMX/SSE instructions
26108 to load from a constant. */
26110 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
26113 /* Prefer SSE regs only, if we can use them for math. */
26114 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
26115 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
26117 /* Floating-point constants need more complex checks. */
26118 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
26120 /* General regs can load everything. */
26121 if (reg_class_subset_p (regclass, GENERAL_REGS))
26124 /* Floats can load 0 and 1 plus some others. Note that we eliminated
26125 zero above. We only want to wind up preferring 80387 registers if
26126 we plan on doing computation with them. */
26128 && standard_80387_constant_p (x))
26130 /* Limit class to non-sse. */
26131 if (regclass == FLOAT_SSE_REGS)
26133 if (regclass == FP_TOP_SSE_REGS)
26135 if (regclass == FP_SECOND_SSE_REGS)
26136 return FP_SECOND_REG;
26137 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
26144 /* Generally when we see PLUS here, it's the function invariant
26145 (plus soft-fp const_int). Which can only be computed into general
26147 if (GET_CODE (x) == PLUS)
26148 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
26150 /* QImode constants are easy to load, but non-constant QImode data
26151 must go into Q_REGS. */
26152 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
26154 if (reg_class_subset_p (regclass, Q_REGS))
26156 if (reg_class_subset_p (Q_REGS, regclass))
26164 /* Discourage putting floating-point values in SSE registers unless
26165 SSE math is being used, and likewise for the 387 registers. */
26167 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
26169 enum machine_mode mode = GET_MODE (x);
26171 /* Restrict the output reload class to the register bank that we are doing
26172 math on. If we would like not to return a subset of CLASS, reject this
26173 alternative: if reload cannot do this, it will still use its choice. */
26174 mode = GET_MODE (x);
26175 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
26176 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
26178 if (X87_FLOAT_MODE_P (mode))
26180 if (regclass == FP_TOP_SSE_REGS)
26182 else if (regclass == FP_SECOND_SSE_REGS)
26183 return FP_SECOND_REG;
26185 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
26192 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
26193 enum machine_mode mode,
26194 secondary_reload_info *sri ATTRIBUTE_UNUSED)
26196 /* QImode spills from non-QI registers require
26197 intermediate register on 32bit targets. */
26198 if (!in_p && mode == QImode && !TARGET_64BIT
26199 && (rclass == GENERAL_REGS
26200 || rclass == LEGACY_REGS
26201 || rclass == INDEX_REGS))
26210 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26211 regno = true_regnum (x);
26213 /* Return Q_REGS if the operand is in memory. */
26221 /* If we are copying between general and FP registers, we need a memory
26222 location. The same is true for SSE and MMX registers.
26224 To optimize register_move_cost performance, allow inline variant.
26226 The macro can't work reliably when one of the CLASSES is class containing
26227 registers from multiple units (SSE, MMX, integer). We avoid this by never
26228 combining those units in single alternative in the machine description.
26229 Ensure that this constraint holds to avoid unexpected surprises.
26231 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26232 enforce these sanity checks. */
26235 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26236 enum machine_mode mode, int strict)
26238 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26239 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26240 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26241 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26242 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26243 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26245 gcc_assert (!strict);
26249 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26252 /* ??? This is a lie. We do have moves between mmx/general, and for
26253 mmx/sse2. But by saying we need secondary memory we discourage the
26254 register allocator from using the mmx registers unless needed. */
26255 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26258 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26260 /* SSE1 doesn't have any direct moves from other classes. */
26264 /* If the target says that inter-unit moves are more expensive
26265 than moving through memory, then don't generate them. */
26266 if (!TARGET_INTER_UNIT_MOVES)
26269 /* Between SSE and general, we have moves no larger than word size. */
26270 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26278 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26279 enum machine_mode mode, int strict)
26281 return inline_secondary_memory_needed (class1, class2, mode, strict);
26284 /* Return true if the registers in CLASS cannot represent the change from
26285 modes FROM to TO. */
26288 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26289 enum reg_class regclass)
26294 /* x87 registers can't do subreg at all, as all values are reformatted
26295 to extended precision. */
26296 if (MAYBE_FLOAT_CLASS_P (regclass))
26299 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26301 /* Vector registers do not support QI or HImode loads. If we don't
26302 disallow a change to these modes, reload will assume it's ok to
26303 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26304 the vec_dupv4hi pattern. */
26305 if (GET_MODE_SIZE (from) < 4)
26308 /* Vector registers do not support subreg with nonzero offsets, which
26309 are otherwise valid for integer registers. Since we can't see
26310 whether we have a nonzero offset from here, prohibit all
26311 nonparadoxical subregs changing size. */
26312 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26319 /* Return the cost of moving data of mode M between a
26320 register and memory. A value of 2 is the default; this cost is
26321 relative to those in `REGISTER_MOVE_COST'.
26323 This function is used extensively by register_move_cost that is used to
26324 build tables at startup. Make it inline in this case.
26325 When IN is 2, return maximum of in and out move cost.
26327 If moving between registers and memory is more expensive than
26328 between two registers, you should define this macro to express the
26331 Model also increased moving costs of QImode registers in non
26335 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26339 if (FLOAT_CLASS_P (regclass))
26357 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26358 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26360 if (SSE_CLASS_P (regclass))
26363 switch (GET_MODE_SIZE (mode))
26378 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26379 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26381 if (MMX_CLASS_P (regclass))
26384 switch (GET_MODE_SIZE (mode))
26396 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26397 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26399 switch (GET_MODE_SIZE (mode))
26402 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26405 return ix86_cost->int_store[0];
26406 if (TARGET_PARTIAL_REG_DEPENDENCY
26407 && optimize_function_for_speed_p (cfun))
26408 cost = ix86_cost->movzbl_load;
26410 cost = ix86_cost->int_load[0];
26412 return MAX (cost, ix86_cost->int_store[0]);
26418 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26420 return ix86_cost->movzbl_load;
26422 return ix86_cost->int_store[0] + 4;
26427 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26428 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26430 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26431 if (mode == TFmode)
26434 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26436 cost = ix86_cost->int_load[2];
26438 cost = ix86_cost->int_store[2];
26439 return (cost * (((int) GET_MODE_SIZE (mode)
26440 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26445 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26448 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26452 /* Return the cost of moving data from a register in class CLASS1 to
26453 one in class CLASS2.
26455 It is not required that the cost always equal 2 when FROM is the same as TO;
26456 on some machines it is expensive to move between registers if they are not
26457 general registers. */
26460 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
26461 reg_class_t class2_i)
26463 enum reg_class class1 = (enum reg_class) class1_i;
26464 enum reg_class class2 = (enum reg_class) class2_i;
26466 /* In case we require secondary memory, compute cost of the store followed
26467 by load. In order to avoid bad register allocation choices, we need
26468 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26470 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26474 cost += inline_memory_move_cost (mode, class1, 2);
26475 cost += inline_memory_move_cost (mode, class2, 2);
26477 /* In case of copying from general_purpose_register we may emit multiple
26478 stores followed by single load causing memory size mismatch stall.
26479 Count this as arbitrarily high cost of 20. */
26480 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26483 /* In the case of FP/MMX moves, the registers actually overlap, and we
26484 have to switch modes in order to treat them differently. */
26485 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26486 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26492 /* Moves between SSE/MMX and integer unit are expensive. */
26493 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26494 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26496 /* ??? By keeping returned value relatively high, we limit the number
26497 of moves between integer and MMX/SSE registers for all targets.
26498 Additionally, high value prevents problem with x86_modes_tieable_p(),
26499 where integer modes in MMX/SSE registers are not tieable
26500 because of missing QImode and HImode moves to, from or between
26501 MMX/SSE registers. */
26502 return MAX (8, ix86_cost->mmxsse_to_integer);
26504 if (MAYBE_FLOAT_CLASS_P (class1))
26505 return ix86_cost->fp_move;
26506 if (MAYBE_SSE_CLASS_P (class1))
26507 return ix86_cost->sse_move;
26508 if (MAYBE_MMX_CLASS_P (class1))
26509 return ix86_cost->mmx_move;
26513 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26516 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26518 /* Flags and only flags can only hold CCmode values. */
26519 if (CC_REGNO_P (regno))
26520 return GET_MODE_CLASS (mode) == MODE_CC;
26521 if (GET_MODE_CLASS (mode) == MODE_CC
26522 || GET_MODE_CLASS (mode) == MODE_RANDOM
26523 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26525 if (FP_REGNO_P (regno))
26526 return VALID_FP_MODE_P (mode);
26527 if (SSE_REGNO_P (regno))
26529 /* We implement the move patterns for all vector modes into and
26530 out of SSE registers, even when no operation instructions
26531 are available. OImode move is available only when AVX is
26533 return ((TARGET_AVX && mode == OImode)
26534 || VALID_AVX256_REG_MODE (mode)
26535 || VALID_SSE_REG_MODE (mode)
26536 || VALID_SSE2_REG_MODE (mode)
26537 || VALID_MMX_REG_MODE (mode)
26538 || VALID_MMX_REG_MODE_3DNOW (mode));
26540 if (MMX_REGNO_P (regno))
26542 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26543 so if the register is available at all, then we can move data of
26544 the given mode into or out of it. */
26545 return (VALID_MMX_REG_MODE (mode)
26546 || VALID_MMX_REG_MODE_3DNOW (mode));
26549 if (mode == QImode)
26551 /* Take care for QImode values - they can be in non-QI regs,
26552 but then they do cause partial register stalls. */
26553 if (regno <= BX_REG || TARGET_64BIT)
26555 if (!TARGET_PARTIAL_REG_STALL)
26557 return reload_in_progress || reload_completed;
26559 /* We handle both integer and floats in the general purpose registers. */
26560 else if (VALID_INT_MODE_P (mode))
26562 else if (VALID_FP_MODE_P (mode))
26564 else if (VALID_DFP_MODE_P (mode))
26566 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26567 on to use that value in smaller contexts, this can easily force a
26568 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26569 supporting DImode, allow it. */
26570 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26576 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26577 tieable integer mode. */
26580 ix86_tieable_integer_mode_p (enum machine_mode mode)
26589 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26592 return TARGET_64BIT;
26599 /* Return true if MODE1 is accessible in a register that can hold MODE2
26600 without copying. That is, all register classes that can hold MODE2
26601 can also hold MODE1. */
26604 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26606 if (mode1 == mode2)
26609 if (ix86_tieable_integer_mode_p (mode1)
26610 && ix86_tieable_integer_mode_p (mode2))
26613 /* MODE2 being XFmode implies fp stack or general regs, which means we
26614 can tie any smaller floating point modes to it. Note that we do not
26615 tie this with TFmode. */
26616 if (mode2 == XFmode)
26617 return mode1 == SFmode || mode1 == DFmode;
26619 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26620 that we can tie it with SFmode. */
26621 if (mode2 == DFmode)
26622 return mode1 == SFmode;
26624 /* If MODE2 is only appropriate for an SSE register, then tie with
26625 any other mode acceptable to SSE registers. */
26626 if (GET_MODE_SIZE (mode2) == 16
26627 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26628 return (GET_MODE_SIZE (mode1) == 16
26629 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26631 /* If MODE2 is appropriate for an MMX register, then tie
26632 with any other mode acceptable to MMX registers. */
26633 if (GET_MODE_SIZE (mode2) == 8
26634 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26635 return (GET_MODE_SIZE (mode1) == 8
26636 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26641 /* Compute a (partial) cost for rtx X. Return true if the complete
26642 cost has been computed, and false if subexpressions should be
26643 scanned. In either case, *TOTAL contains the cost result. */
26646 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26648 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26649 enum machine_mode mode = GET_MODE (x);
26650 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26658 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26660 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26662 else if (flag_pic && SYMBOLIC_CONST (x)
26664 || (!GET_CODE (x) != LABEL_REF
26665 && (GET_CODE (x) != SYMBOL_REF
26666 || !SYMBOL_REF_LOCAL_P (x)))))
26673 if (mode == VOIDmode)
26676 switch (standard_80387_constant_p (x))
26681 default: /* Other constants */
26686 /* Start with (MEM (SYMBOL_REF)), since that's where
26687 it'll probably end up. Add a penalty for size. */
26688 *total = (COSTS_N_INSNS (1)
26689 + (flag_pic != 0 && !TARGET_64BIT)
26690 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26696 /* The zero extensions is often completely free on x86_64, so make
26697 it as cheap as possible. */
26698 if (TARGET_64BIT && mode == DImode
26699 && GET_MODE (XEXP (x, 0)) == SImode)
26701 else if (TARGET_ZERO_EXTEND_WITH_AND)
26702 *total = cost->add;
26704 *total = cost->movzx;
26708 *total = cost->movsx;
26712 if (CONST_INT_P (XEXP (x, 1))
26713 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26715 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26718 *total = cost->add;
26721 if ((value == 2 || value == 3)
26722 && cost->lea <= cost->shift_const)
26724 *total = cost->lea;
26734 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26736 if (CONST_INT_P (XEXP (x, 1)))
26738 if (INTVAL (XEXP (x, 1)) > 32)
26739 *total = cost->shift_const + COSTS_N_INSNS (2);
26741 *total = cost->shift_const * 2;
26745 if (GET_CODE (XEXP (x, 1)) == AND)
26746 *total = cost->shift_var * 2;
26748 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26753 if (CONST_INT_P (XEXP (x, 1)))
26754 *total = cost->shift_const;
26756 *total = cost->shift_var;
26761 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26763 /* ??? SSE scalar cost should be used here. */
26764 *total = cost->fmul;
26767 else if (X87_FLOAT_MODE_P (mode))
26769 *total = cost->fmul;
26772 else if (FLOAT_MODE_P (mode))
26774 /* ??? SSE vector cost should be used here. */
26775 *total = cost->fmul;
26780 rtx op0 = XEXP (x, 0);
26781 rtx op1 = XEXP (x, 1);
26783 if (CONST_INT_P (XEXP (x, 1)))
26785 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26786 for (nbits = 0; value != 0; value &= value - 1)
26790 /* This is arbitrary. */
26793 /* Compute costs correctly for widening multiplication. */
26794 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26795 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26796 == GET_MODE_SIZE (mode))
26798 int is_mulwiden = 0;
26799 enum machine_mode inner_mode = GET_MODE (op0);
26801 if (GET_CODE (op0) == GET_CODE (op1))
26802 is_mulwiden = 1, op1 = XEXP (op1, 0);
26803 else if (CONST_INT_P (op1))
26805 if (GET_CODE (op0) == SIGN_EXTEND)
26806 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26809 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26813 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26816 *total = (cost->mult_init[MODE_INDEX (mode)]
26817 + nbits * cost->mult_bit
26818 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26827 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26828 /* ??? SSE cost should be used here. */
26829 *total = cost->fdiv;
26830 else if (X87_FLOAT_MODE_P (mode))
26831 *total = cost->fdiv;
26832 else if (FLOAT_MODE_P (mode))
26833 /* ??? SSE vector cost should be used here. */
26834 *total = cost->fdiv;
26836 *total = cost->divide[MODE_INDEX (mode)];
26840 if (GET_MODE_CLASS (mode) == MODE_INT
26841 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26843 if (GET_CODE (XEXP (x, 0)) == PLUS
26844 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26845 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26846 && CONSTANT_P (XEXP (x, 1)))
26848 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26849 if (val == 2 || val == 4 || val == 8)
26851 *total = cost->lea;
26852 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26853 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26854 outer_code, speed);
26855 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26859 else if (GET_CODE (XEXP (x, 0)) == MULT
26860 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26862 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26863 if (val == 2 || val == 4 || val == 8)
26865 *total = cost->lea;
26866 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26867 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26871 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26873 *total = cost->lea;
26874 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26875 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26876 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26883 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26885 /* ??? SSE cost should be used here. */
26886 *total = cost->fadd;
26889 else if (X87_FLOAT_MODE_P (mode))
26891 *total = cost->fadd;
26894 else if (FLOAT_MODE_P (mode))
26896 /* ??? SSE vector cost should be used here. */
26897 *total = cost->fadd;
26905 if (!TARGET_64BIT && mode == DImode)
26907 *total = (cost->add * 2
26908 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26909 << (GET_MODE (XEXP (x, 0)) != DImode))
26910 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26911 << (GET_MODE (XEXP (x, 1)) != DImode)));
26917 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26919 /* ??? SSE cost should be used here. */
26920 *total = cost->fchs;
26923 else if (X87_FLOAT_MODE_P (mode))
26925 *total = cost->fchs;
26928 else if (FLOAT_MODE_P (mode))
26930 /* ??? SSE vector cost should be used here. */
26931 *total = cost->fchs;
26937 if (!TARGET_64BIT && mode == DImode)
26938 *total = cost->add * 2;
26940 *total = cost->add;
26944 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26945 && XEXP (XEXP (x, 0), 1) == const1_rtx
26946 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26947 && XEXP (x, 1) == const0_rtx)
26949 /* This kind of construct is implemented using test[bwl].
26950 Treat it as if we had an AND. */
26951 *total = (cost->add
26952 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26953 + rtx_cost (const1_rtx, outer_code, speed));
26959 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26964 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26965 /* ??? SSE cost should be used here. */
26966 *total = cost->fabs;
26967 else if (X87_FLOAT_MODE_P (mode))
26968 *total = cost->fabs;
26969 else if (FLOAT_MODE_P (mode))
26970 /* ??? SSE vector cost should be used here. */
26971 *total = cost->fabs;
26975 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26976 /* ??? SSE cost should be used here. */
26977 *total = cost->fsqrt;
26978 else if (X87_FLOAT_MODE_P (mode))
26979 *total = cost->fsqrt;
26980 else if (FLOAT_MODE_P (mode))
26981 /* ??? SSE vector cost should be used here. */
26982 *total = cost->fsqrt;
26986 if (XINT (x, 1) == UNSPEC_TP)
26993 case VEC_DUPLICATE:
26994 /* ??? Assume all of these vector manipulation patterns are
26995 recognizable. In which case they all pretty much have the
26997 *total = COSTS_N_INSNS (1);
27007 static int current_machopic_label_num;
27009 /* Given a symbol name and its associated stub, write out the
27010 definition of the stub. */
27013 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27015 unsigned int length;
27016 char *binder_name, *symbol_name, lazy_ptr_name[32];
27017 int label = ++current_machopic_label_num;
27019 /* For 64-bit we shouldn't get here. */
27020 gcc_assert (!TARGET_64BIT);
27022 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27023 symb = targetm.strip_name_encoding (symb);
27025 length = strlen (stub);
27026 binder_name = XALLOCAVEC (char, length + 32);
27027 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
27029 length = strlen (symb);
27030 symbol_name = XALLOCAVEC (char, length + 32);
27031 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27033 sprintf (lazy_ptr_name, "L%d$lz", label);
27036 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
27038 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
27040 fprintf (file, "%s:\n", stub);
27041 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27045 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
27046 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
27047 fprintf (file, "\tjmp\t*%%edx\n");
27050 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
27052 fprintf (file, "%s:\n", binder_name);
27056 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
27057 fputs ("\tpushl\t%eax\n", file);
27060 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
27062 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
27064 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27065 fprintf (file, "%s:\n", lazy_ptr_name);
27066 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27067 fprintf (file, ASM_LONG "%s\n", binder_name);
27069 #endif /* TARGET_MACHO */
27071 /* Order the registers for register allocator. */
27074 x86_order_regs_for_local_alloc (void)
27079 /* First allocate the local general purpose registers. */
27080 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27081 if (GENERAL_REGNO_P (i) && call_used_regs[i])
27082 reg_alloc_order [pos++] = i;
27084 /* Global general purpose registers. */
27085 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
27086 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
27087 reg_alloc_order [pos++] = i;
27089 /* x87 registers come first in case we are doing FP math
27091 if (!TARGET_SSE_MATH)
27092 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27093 reg_alloc_order [pos++] = i;
27095 /* SSE registers. */
27096 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
27097 reg_alloc_order [pos++] = i;
27098 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
27099 reg_alloc_order [pos++] = i;
27101 /* x87 registers. */
27102 if (TARGET_SSE_MATH)
27103 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
27104 reg_alloc_order [pos++] = i;
27106 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
27107 reg_alloc_order [pos++] = i;
27109 /* Initialize the rest of array as we do not allocate some registers
27111 while (pos < FIRST_PSEUDO_REGISTER)
27112 reg_alloc_order [pos++] = 0;
27115 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
27116 struct attribute_spec.handler. */
27118 ix86_handle_abi_attribute (tree *node, tree name,
27119 tree args ATTRIBUTE_UNUSED,
27120 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27122 if (TREE_CODE (*node) != FUNCTION_TYPE
27123 && TREE_CODE (*node) != METHOD_TYPE
27124 && TREE_CODE (*node) != FIELD_DECL
27125 && TREE_CODE (*node) != TYPE_DECL)
27127 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27129 *no_add_attrs = true;
27134 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
27136 *no_add_attrs = true;
27140 /* Can combine regparm with all attributes but fastcall. */
27141 if (is_attribute_p ("ms_abi", name))
27143 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
27145 error ("ms_abi and sysv_abi attributes are not compatible");
27150 else if (is_attribute_p ("sysv_abi", name))
27152 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
27154 error ("ms_abi and sysv_abi attributes are not compatible");
27163 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
27164 struct attribute_spec.handler. */
27166 ix86_handle_struct_attribute (tree *node, tree name,
27167 tree args ATTRIBUTE_UNUSED,
27168 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27171 if (DECL_P (*node))
27173 if (TREE_CODE (*node) == TYPE_DECL)
27174 type = &TREE_TYPE (*node);
27179 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
27180 || TREE_CODE (*type) == UNION_TYPE)))
27182 warning (OPT_Wattributes, "%qE attribute ignored",
27184 *no_add_attrs = true;
27187 else if ((is_attribute_p ("ms_struct", name)
27188 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
27189 || ((is_attribute_p ("gcc_struct", name)
27190 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
27192 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
27194 *no_add_attrs = true;
27201 ix86_handle_fndecl_attribute (tree *node, tree name,
27202 tree args ATTRIBUTE_UNUSED,
27203 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
27205 if (TREE_CODE (*node) != FUNCTION_DECL)
27207 warning (OPT_Wattributes, "%qE attribute only applies to functions",
27209 *no_add_attrs = true;
27215 ix86_ms_bitfield_layout_p (const_tree record_type)
27217 return ((TARGET_MS_BITFIELD_LAYOUT
27218 && !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27219 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)));
27222 /* Returns an expression indicating where the this parameter is
27223 located on entry to the FUNCTION. */
27226 x86_this_parameter (tree function)
27228 tree type = TREE_TYPE (function);
27229 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27234 const int *parm_regs;
27236 if (ix86_function_type_abi (type) == MS_ABI)
27237 parm_regs = x86_64_ms_abi_int_parameter_registers;
27239 parm_regs = x86_64_int_parameter_registers;
27240 return gen_rtx_REG (DImode, parm_regs[aggr]);
27243 nregs = ix86_function_regparm (type, function);
27245 if (nregs > 0 && !stdarg_p (type))
27249 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27250 regno = aggr ? DX_REG : CX_REG;
27251 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27255 return gen_rtx_MEM (SImode,
27256 plus_constant (stack_pointer_rtx, 4));
27265 return gen_rtx_MEM (SImode,
27266 plus_constant (stack_pointer_rtx, 4));
27269 return gen_rtx_REG (SImode, regno);
27272 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27275 /* Determine whether x86_output_mi_thunk can succeed. */
27278 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27279 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27280 HOST_WIDE_INT vcall_offset, const_tree function)
27282 /* 64-bit can handle anything. */
27286 /* For 32-bit, everything's fine if we have one free register. */
27287 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27290 /* Need a free register for vcall_offset. */
27294 /* Need a free register for GOT references. */
27295 if (flag_pic && !targetm.binds_local_p (function))
27298 /* Otherwise ok. */
27302 /* Output the assembler code for a thunk function. THUNK_DECL is the
27303 declaration for the thunk function itself, FUNCTION is the decl for
27304 the target function. DELTA is an immediate constant offset to be
27305 added to THIS. If VCALL_OFFSET is nonzero, the word at
27306 *(*this + vcall_offset) should be added to THIS. */
27309 x86_output_mi_thunk (FILE *file,
27310 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27311 HOST_WIDE_INT vcall_offset, tree function)
27314 rtx this_param = x86_this_parameter (function);
27317 /* Make sure unwind info is emitted for the thunk if needed. */
27318 final_start_function (emit_barrier (), file, 1);
27320 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27321 pull it in now and let DELTA benefit. */
27322 if (REG_P (this_param))
27323 this_reg = this_param;
27324 else if (vcall_offset)
27326 /* Put the this parameter into %eax. */
27327 xops[0] = this_param;
27328 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27329 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27332 this_reg = NULL_RTX;
27334 /* Adjust the this parameter by a fixed constant. */
27337 xops[0] = GEN_INT (delta);
27338 xops[1] = this_reg ? this_reg : this_param;
27341 if (!x86_64_general_operand (xops[0], DImode))
27343 tmp = gen_rtx_REG (DImode, R10_REG);
27345 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27347 xops[1] = this_param;
27349 if (x86_maybe_negate_const_int (&xops[0], DImode))
27350 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27352 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27354 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27355 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27357 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27360 /* Adjust the this parameter by a value stored in the vtable. */
27364 tmp = gen_rtx_REG (DImode, R10_REG);
27367 int tmp_regno = CX_REG;
27368 if (lookup_attribute ("fastcall",
27369 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27370 || lookup_attribute ("thiscall",
27371 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27372 tmp_regno = AX_REG;
27373 tmp = gen_rtx_REG (SImode, tmp_regno);
27376 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27378 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27380 /* Adjust the this parameter. */
27381 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27382 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27384 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27385 xops[0] = GEN_INT (vcall_offset);
27387 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27388 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27390 xops[1] = this_reg;
27391 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27394 /* If necessary, drop THIS back to its stack slot. */
27395 if (this_reg && this_reg != this_param)
27397 xops[0] = this_reg;
27398 xops[1] = this_param;
27399 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27402 xops[0] = XEXP (DECL_RTL (function), 0);
27405 if (!flag_pic || targetm.binds_local_p (function))
27406 output_asm_insn ("jmp\t%P0", xops);
27407 /* All thunks should be in the same object as their target,
27408 and thus binds_local_p should be true. */
27409 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27410 gcc_unreachable ();
27413 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27414 tmp = gen_rtx_CONST (Pmode, tmp);
27415 tmp = gen_rtx_MEM (QImode, tmp);
27417 output_asm_insn ("jmp\t%A0", xops);
27422 if (!flag_pic || targetm.binds_local_p (function))
27423 output_asm_insn ("jmp\t%P0", xops);
27428 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27429 if (TARGET_MACHO_BRANCH_ISLANDS)
27430 sym_ref = (gen_rtx_SYMBOL_REF
27432 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27433 tmp = gen_rtx_MEM (QImode, sym_ref);
27435 output_asm_insn ("jmp\t%0", xops);
27438 #endif /* TARGET_MACHO */
27440 tmp = gen_rtx_REG (SImode, CX_REG);
27441 output_set_got (tmp, NULL_RTX);
27444 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27445 output_asm_insn ("jmp\t{*}%1", xops);
27448 final_end_function ();
27452 x86_file_start (void)
27454 default_file_start ();
27456 darwin_file_start ();
27458 if (X86_FILE_START_VERSION_DIRECTIVE)
27459 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27460 if (X86_FILE_START_FLTUSED)
27461 fputs ("\t.global\t__fltused\n", asm_out_file);
27462 if (ix86_asm_dialect == ASM_INTEL)
27463 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27467 x86_field_alignment (tree field, int computed)
27469 enum machine_mode mode;
27470 tree type = TREE_TYPE (field);
27472 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27474 mode = TYPE_MODE (strip_array_types (type));
27475 if (mode == DFmode || mode == DCmode
27476 || GET_MODE_CLASS (mode) == MODE_INT
27477 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27478 return MIN (32, computed);
27482 /* Output assembler code to FILE to increment profiler label # LABELNO
27483 for profiling a function entry. */
27485 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27487 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
27492 #ifndef NO_PROFILE_COUNTERS
27493 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
27496 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27497 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
27499 fprintf (file, "\tcall\t%s\n", mcount_name);
27503 #ifndef NO_PROFILE_COUNTERS
27504 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
27507 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
27511 #ifndef NO_PROFILE_COUNTERS
27512 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
27515 fprintf (file, "\tcall\t%s\n", mcount_name);
27519 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27520 /* We don't have exact information about the insn sizes, but we may assume
27521 quite safely that we are informed about all 1 byte insns and memory
27522 address sizes. This is enough to eliminate unnecessary padding in
27526 min_insn_size (rtx insn)
27530 if (!INSN_P (insn) || !active_insn_p (insn))
27533 /* Discard alignments we've emit and jump instructions. */
27534 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27535 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27537 if (JUMP_TABLE_DATA_P (insn))
27540 /* Important case - calls are always 5 bytes.
27541 It is common to have many calls in the row. */
27543 && symbolic_reference_mentioned_p (PATTERN (insn))
27544 && !SIBLING_CALL_P (insn))
27546 len = get_attr_length (insn);
27550 /* For normal instructions we rely on get_attr_length being exact,
27551 with a few exceptions. */
27552 if (!JUMP_P (insn))
27554 enum attr_type type = get_attr_type (insn);
27559 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27560 || asm_noperands (PATTERN (insn)) >= 0)
27567 /* Otherwise trust get_attr_length. */
27571 l = get_attr_length_address (insn);
27572 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27581 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27585 ix86_avoid_jump_mispredicts (void)
27587 rtx insn, start = get_insns ();
27588 int nbytes = 0, njumps = 0;
27591 /* Look for all minimal intervals of instructions containing 4 jumps.
27592 The intervals are bounded by START and INSN. NBYTES is the total
27593 size of instructions in the interval including INSN and not including
27594 START. When the NBYTES is smaller than 16 bytes, it is possible
27595 that the end of START and INSN ends up in the same 16byte page.
27597 The smallest offset in the page INSN can start is the case where START
27598 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27599 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27601 for (insn = start; insn; insn = NEXT_INSN (insn))
27605 if (LABEL_P (insn))
27607 int align = label_to_alignment (insn);
27608 int max_skip = label_to_max_skip (insn);
27612 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27613 already in the current 16 byte page, because otherwise
27614 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27615 bytes to reach 16 byte boundary. */
27617 || (align <= 3 && max_skip != (1 << align) - 1))
27620 fprintf (dump_file, "Label %i with max_skip %i\n",
27621 INSN_UID (insn), max_skip);
27624 while (nbytes + max_skip >= 16)
27626 start = NEXT_INSN (start);
27627 if ((JUMP_P (start)
27628 && GET_CODE (PATTERN (start)) != ADDR_VEC
27629 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27631 njumps--, isjump = 1;
27634 nbytes -= min_insn_size (start);
27640 min_size = min_insn_size (insn);
27641 nbytes += min_size;
27643 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27644 INSN_UID (insn), min_size);
27646 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27647 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27655 start = NEXT_INSN (start);
27656 if ((JUMP_P (start)
27657 && GET_CODE (PATTERN (start)) != ADDR_VEC
27658 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27660 njumps--, isjump = 1;
27663 nbytes -= min_insn_size (start);
27665 gcc_assert (njumps >= 0);
27667 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27668 INSN_UID (start), INSN_UID (insn), nbytes);
27670 if (njumps == 3 && isjump && nbytes < 16)
27672 int padsize = 15 - nbytes + min_insn_size (insn);
27675 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27676 INSN_UID (insn), padsize);
27677 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27683 /* AMD Athlon works faster
27684 when RET is not destination of conditional jump or directly preceded
27685 by other jump instruction. We avoid the penalty by inserting NOP just
27686 before the RET instructions in such cases. */
27688 ix86_pad_returns (void)
27693 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27695 basic_block bb = e->src;
27696 rtx ret = BB_END (bb);
27698 bool replace = false;
27700 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27701 || optimize_bb_for_size_p (bb))
27703 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27704 if (active_insn_p (prev) || LABEL_P (prev))
27706 if (prev && LABEL_P (prev))
27711 FOR_EACH_EDGE (e, ei, bb->preds)
27712 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27713 && !(e->flags & EDGE_FALLTHRU))
27718 prev = prev_active_insn (ret);
27720 && ((JUMP_P (prev) && any_condjump_p (prev))
27723 /* Empty functions get branch mispredict even when the jump destination
27724 is not visible to us. */
27725 if (!prev && !optimize_function_for_size_p (cfun))
27730 emit_jump_insn_before (gen_return_internal_long (), ret);
27736 /* Implement machine specific optimizations. We implement padding of returns
27737 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27741 if (optimize && optimize_function_for_speed_p (cfun))
27743 if (TARGET_PAD_RETURNS)
27744 ix86_pad_returns ();
27745 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27746 if (TARGET_FOUR_JUMP_LIMIT)
27747 ix86_avoid_jump_mispredicts ();
27752 /* Return nonzero when QImode register that must be represented via REX prefix
27755 x86_extended_QIreg_mentioned_p (rtx insn)
27758 extract_insn_cached (insn);
27759 for (i = 0; i < recog_data.n_operands; i++)
27760 if (REG_P (recog_data.operand[i])
27761 && REGNO (recog_data.operand[i]) > BX_REG)
27766 /* Return nonzero when P points to register encoded via REX prefix.
27767 Called via for_each_rtx. */
27769 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27771 unsigned int regno;
27774 regno = REGNO (*p);
27775 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27778 /* Return true when INSN mentions register that must be encoded using REX
27781 x86_extended_reg_mentioned_p (rtx insn)
27783 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27784 extended_reg_mentioned_1, NULL);
27787 /* If profitable, negate (without causing overflow) integer constant
27788 of mode MODE at location LOC. Return true in this case. */
27790 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
27794 if (!CONST_INT_P (*loc))
27800 /* DImode x86_64 constants must fit in 32 bits. */
27801 gcc_assert (x86_64_immediate_operand (*loc, mode));
27812 gcc_unreachable ();
27815 /* Avoid overflows. */
27816 if (mode_signbit_p (mode, *loc))
27819 val = INTVAL (*loc);
27821 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
27822 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
27823 if ((val < 0 && val != -128)
27826 *loc = GEN_INT (-val);
27833 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27834 optabs would emit if we didn't have TFmode patterns. */
27837 x86_emit_floatuns (rtx operands[2])
27839 rtx neglab, donelab, i0, i1, f0, in, out;
27840 enum machine_mode mode, inmode;
27842 inmode = GET_MODE (operands[1]);
27843 gcc_assert (inmode == SImode || inmode == DImode);
27846 in = force_reg (inmode, operands[1]);
27847 mode = GET_MODE (out);
27848 neglab = gen_label_rtx ();
27849 donelab = gen_label_rtx ();
27850 f0 = gen_reg_rtx (mode);
27852 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27854 expand_float (out, in, 0);
27856 emit_jump_insn (gen_jump (donelab));
27859 emit_label (neglab);
27861 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27863 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27865 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27867 expand_float (f0, i0, 0);
27869 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27871 emit_label (donelab);
27874 /* AVX does not support 32-byte integer vector operations,
27875 thus the longest vector we are faced with is V16QImode. */
27876 #define MAX_VECT_LEN 16
27878 struct expand_vec_perm_d
27880 rtx target, op0, op1;
27881 unsigned char perm[MAX_VECT_LEN];
27882 enum machine_mode vmode;
27883 unsigned char nelt;
27887 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
27888 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
27890 /* Get a vector mode of the same size as the original but with elements
27891 twice as wide. This is only guaranteed to apply to integral vectors. */
27893 static inline enum machine_mode
27894 get_mode_wider_vector (enum machine_mode o)
27896 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
27897 enum machine_mode n = GET_MODE_WIDER_MODE (o);
27898 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
27899 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
27903 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27904 with all elements equal to VAR. Return true if successful. */
27907 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27908 rtx target, rtx val)
27931 /* First attempt to recognize VAL as-is. */
27932 dup = gen_rtx_VEC_DUPLICATE (mode, val);
27933 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
27934 if (recog_memoized (insn) < 0)
27937 /* If that fails, force VAL into a register. */
27940 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
27941 seq = get_insns ();
27944 emit_insn_before (seq, insn);
27946 ok = recog_memoized (insn) >= 0;
27955 if (TARGET_SSE || TARGET_3DNOW_A)
27959 val = gen_lowpart (SImode, val);
27960 x = gen_rtx_TRUNCATE (HImode, val);
27961 x = gen_rtx_VEC_DUPLICATE (mode, x);
27962 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27975 struct expand_vec_perm_d dperm;
27979 memset (&dperm, 0, sizeof (dperm));
27980 dperm.target = target;
27981 dperm.vmode = mode;
27982 dperm.nelt = GET_MODE_NUNITS (mode);
27983 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
27985 /* Extend to SImode using a paradoxical SUBREG. */
27986 tmp1 = gen_reg_rtx (SImode);
27987 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27989 /* Insert the SImode value as low element of a V4SImode vector. */
27990 tmp2 = gen_lowpart (V4SImode, dperm.op0);
27991 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
27993 ok = (expand_vec_perm_1 (&dperm)
27994 || expand_vec_perm_broadcast_1 (&dperm));
28006 /* Replicate the value once into the next wider mode and recurse. */
28008 enum machine_mode smode, wsmode, wvmode;
28011 smode = GET_MODE_INNER (mode);
28012 wvmode = get_mode_wider_vector (mode);
28013 wsmode = GET_MODE_INNER (wvmode);
28015 val = convert_modes (wsmode, smode, val, true);
28016 x = expand_simple_binop (wsmode, ASHIFT, val,
28017 GEN_INT (GET_MODE_BITSIZE (smode)),
28018 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28019 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
28021 x = gen_lowpart (wvmode, target);
28022 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
28030 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
28031 rtx x = gen_reg_rtx (hvmode);
28033 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
28036 x = gen_rtx_VEC_CONCAT (mode, x, x);
28037 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28046 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28047 whose ONE_VAR element is VAR, and other elements are zero. Return true
28051 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
28052 rtx target, rtx var, int one_var)
28054 enum machine_mode vsimode;
28057 bool use_vector_set = false;
28062 /* For SSE4.1, we normally use vector set. But if the second
28063 element is zero and inter-unit moves are OK, we use movq
28065 use_vector_set = (TARGET_64BIT
28067 && !(TARGET_INTER_UNIT_MOVES
28073 use_vector_set = TARGET_SSE4_1;
28076 use_vector_set = TARGET_SSE2;
28079 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
28086 use_vector_set = TARGET_AVX;
28089 /* Use ix86_expand_vector_set in 64bit mode only. */
28090 use_vector_set = TARGET_AVX && TARGET_64BIT;
28096 if (use_vector_set)
28098 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
28099 var = force_reg (GET_MODE_INNER (mode), var);
28100 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28116 var = force_reg (GET_MODE_INNER (mode), var);
28117 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
28118 emit_insn (gen_rtx_SET (VOIDmode, target, x));
28123 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
28124 new_target = gen_reg_rtx (mode);
28126 new_target = target;
28127 var = force_reg (GET_MODE_INNER (mode), var);
28128 x = gen_rtx_VEC_DUPLICATE (mode, var);
28129 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
28130 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
28133 /* We need to shuffle the value to the correct position, so
28134 create a new pseudo to store the intermediate result. */
28136 /* With SSE2, we can use the integer shuffle insns. */
28137 if (mode != V4SFmode && TARGET_SSE2)
28139 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
28141 GEN_INT (one_var == 1 ? 0 : 1),
28142 GEN_INT (one_var == 2 ? 0 : 1),
28143 GEN_INT (one_var == 3 ? 0 : 1)));
28144 if (target != new_target)
28145 emit_move_insn (target, new_target);
28149 /* Otherwise convert the intermediate result to V4SFmode and
28150 use the SSE1 shuffle instructions. */
28151 if (mode != V4SFmode)
28153 tmp = gen_reg_rtx (V4SFmode);
28154 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
28159 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
28161 GEN_INT (one_var == 1 ? 0 : 1),
28162 GEN_INT (one_var == 2 ? 0+4 : 1+4),
28163 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
28165 if (mode != V4SFmode)
28166 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
28167 else if (tmp != target)
28168 emit_move_insn (target, tmp);
28170 else if (target != new_target)
28171 emit_move_insn (target, new_target);
28176 vsimode = V4SImode;
28182 vsimode = V2SImode;
28188 /* Zero extend the variable element to SImode and recurse. */
28189 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
28191 x = gen_reg_rtx (vsimode);
28192 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
28194 gcc_unreachable ();
28196 emit_move_insn (target, gen_lowpart (mode, x));
28204 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28205 consisting of the values in VALS. It is known that all elements
28206 except ONE_VAR are constants. Return true if successful. */
28209 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28210 rtx target, rtx vals, int one_var)
28212 rtx var = XVECEXP (vals, 0, one_var);
28213 enum machine_mode wmode;
28216 const_vec = copy_rtx (vals);
28217 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28218 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28226 /* For the two element vectors, it's just as easy to use
28227 the general case. */
28231 /* Use ix86_expand_vector_set in 64bit mode only. */
28254 /* There's no way to set one QImode entry easily. Combine
28255 the variable value with its adjacent constant value, and
28256 promote to an HImode set. */
28257 x = XVECEXP (vals, 0, one_var ^ 1);
28260 var = convert_modes (HImode, QImode, var, true);
28261 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28262 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28263 x = GEN_INT (INTVAL (x) & 0xff);
28267 var = convert_modes (HImode, QImode, var, true);
28268 x = gen_int_mode (INTVAL (x) << 8, HImode);
28270 if (x != const0_rtx)
28271 var = expand_simple_binop (HImode, IOR, var, x, var,
28272 1, OPTAB_LIB_WIDEN);
28274 x = gen_reg_rtx (wmode);
28275 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28276 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28278 emit_move_insn (target, gen_lowpart (mode, x));
28285 emit_move_insn (target, const_vec);
28286 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28290 /* A subroutine of ix86_expand_vector_init_general. Use vector
28291 concatenate to handle the most general case: all values variable,
28292 and none identical. */
28295 ix86_expand_vector_init_concat (enum machine_mode mode,
28296 rtx target, rtx *ops, int n)
28298 enum machine_mode cmode, hmode = VOIDmode;
28299 rtx first[8], second[4];
28339 gcc_unreachable ();
28342 if (!register_operand (ops[1], cmode))
28343 ops[1] = force_reg (cmode, ops[1]);
28344 if (!register_operand (ops[0], cmode))
28345 ops[0] = force_reg (cmode, ops[0]);
28346 emit_insn (gen_rtx_SET (VOIDmode, target,
28347 gen_rtx_VEC_CONCAT (mode, ops[0],
28367 gcc_unreachable ();
28383 gcc_unreachable ();
28388 /* FIXME: We process inputs backward to help RA. PR 36222. */
28391 for (; i > 0; i -= 2, j--)
28393 first[j] = gen_reg_rtx (cmode);
28394 v = gen_rtvec (2, ops[i - 1], ops[i]);
28395 ix86_expand_vector_init (false, first[j],
28396 gen_rtx_PARALLEL (cmode, v));
28402 gcc_assert (hmode != VOIDmode);
28403 for (i = j = 0; i < n; i += 2, j++)
28405 second[j] = gen_reg_rtx (hmode);
28406 ix86_expand_vector_init_concat (hmode, second [j],
28410 ix86_expand_vector_init_concat (mode, target, second, n);
28413 ix86_expand_vector_init_concat (mode, target, first, n);
28417 gcc_unreachable ();
28421 /* A subroutine of ix86_expand_vector_init_general. Use vector
28422 interleave to handle the most general case: all values variable,
28423 and none identical. */
28426 ix86_expand_vector_init_interleave (enum machine_mode mode,
28427 rtx target, rtx *ops, int n)
28429 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28432 rtx (*gen_load_even) (rtx, rtx, rtx);
28433 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28434 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28439 gen_load_even = gen_vec_setv8hi;
28440 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28441 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28442 inner_mode = HImode;
28443 first_imode = V4SImode;
28444 second_imode = V2DImode;
28445 third_imode = VOIDmode;
28448 gen_load_even = gen_vec_setv16qi;
28449 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28450 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28451 inner_mode = QImode;
28452 first_imode = V8HImode;
28453 second_imode = V4SImode;
28454 third_imode = V2DImode;
28457 gcc_unreachable ();
28460 for (i = 0; i < n; i++)
28462 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28463 op0 = gen_reg_rtx (SImode);
28464 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28466 /* Insert the SImode value as low element of V4SImode vector. */
28467 op1 = gen_reg_rtx (V4SImode);
28468 op0 = gen_rtx_VEC_MERGE (V4SImode,
28469 gen_rtx_VEC_DUPLICATE (V4SImode,
28471 CONST0_RTX (V4SImode),
28473 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28475 /* Cast the V4SImode vector back to a vector in orignal mode. */
28476 op0 = gen_reg_rtx (mode);
28477 emit_move_insn (op0, gen_lowpart (mode, op1));
28479 /* Load even elements into the second positon. */
28480 emit_insn (gen_load_even (op0,
28481 force_reg (inner_mode,
28485 /* Cast vector to FIRST_IMODE vector. */
28486 ops[i] = gen_reg_rtx (first_imode);
28487 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28490 /* Interleave low FIRST_IMODE vectors. */
28491 for (i = j = 0; i < n; i += 2, j++)
28493 op0 = gen_reg_rtx (first_imode);
28494 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
28496 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28497 ops[j] = gen_reg_rtx (second_imode);
28498 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28501 /* Interleave low SECOND_IMODE vectors. */
28502 switch (second_imode)
28505 for (i = j = 0; i < n / 2; i += 2, j++)
28507 op0 = gen_reg_rtx (second_imode);
28508 emit_insn (gen_interleave_second_low (op0, ops[i],
28511 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28513 ops[j] = gen_reg_rtx (third_imode);
28514 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28516 second_imode = V2DImode;
28517 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28521 op0 = gen_reg_rtx (second_imode);
28522 emit_insn (gen_interleave_second_low (op0, ops[0],
28525 /* Cast the SECOND_IMODE vector back to a vector on original
28527 emit_insn (gen_rtx_SET (VOIDmode, target,
28528 gen_lowpart (mode, op0)));
28532 gcc_unreachable ();
28536 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28537 all values variable, and none identical. */
28540 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28541 rtx target, rtx vals)
28543 rtx ops[32], op0, op1;
28544 enum machine_mode half_mode = VOIDmode;
28551 if (!mmx_ok && !TARGET_SSE)
28563 n = GET_MODE_NUNITS (mode);
28564 for (i = 0; i < n; i++)
28565 ops[i] = XVECEXP (vals, 0, i);
28566 ix86_expand_vector_init_concat (mode, target, ops, n);
28570 half_mode = V16QImode;
28574 half_mode = V8HImode;
28578 n = GET_MODE_NUNITS (mode);
28579 for (i = 0; i < n; i++)
28580 ops[i] = XVECEXP (vals, 0, i);
28581 op0 = gen_reg_rtx (half_mode);
28582 op1 = gen_reg_rtx (half_mode);
28583 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28585 ix86_expand_vector_init_interleave (half_mode, op1,
28586 &ops [n >> 1], n >> 2);
28587 emit_insn (gen_rtx_SET (VOIDmode, target,
28588 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28592 if (!TARGET_SSE4_1)
28600 /* Don't use ix86_expand_vector_init_interleave if we can't
28601 move from GPR to SSE register directly. */
28602 if (!TARGET_INTER_UNIT_MOVES)
28605 n = GET_MODE_NUNITS (mode);
28606 for (i = 0; i < n; i++)
28607 ops[i] = XVECEXP (vals, 0, i);
28608 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28616 gcc_unreachable ();
28620 int i, j, n_elts, n_words, n_elt_per_word;
28621 enum machine_mode inner_mode;
28622 rtx words[4], shift;
28624 inner_mode = GET_MODE_INNER (mode);
28625 n_elts = GET_MODE_NUNITS (mode);
28626 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28627 n_elt_per_word = n_elts / n_words;
28628 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28630 for (i = 0; i < n_words; ++i)
28632 rtx word = NULL_RTX;
28634 for (j = 0; j < n_elt_per_word; ++j)
28636 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28637 elt = convert_modes (word_mode, inner_mode, elt, true);
28643 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28644 word, 1, OPTAB_LIB_WIDEN);
28645 word = expand_simple_binop (word_mode, IOR, word, elt,
28646 word, 1, OPTAB_LIB_WIDEN);
28654 emit_move_insn (target, gen_lowpart (mode, words[0]));
28655 else if (n_words == 2)
28657 rtx tmp = gen_reg_rtx (mode);
28658 emit_clobber (tmp);
28659 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28660 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28661 emit_move_insn (target, tmp);
28663 else if (n_words == 4)
28665 rtx tmp = gen_reg_rtx (V4SImode);
28666 gcc_assert (word_mode == SImode);
28667 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28668 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28669 emit_move_insn (target, gen_lowpart (mode, tmp));
28672 gcc_unreachable ();
28676 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28677 instructions unless MMX_OK is true. */
28680 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28682 enum machine_mode mode = GET_MODE (target);
28683 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28684 int n_elts = GET_MODE_NUNITS (mode);
28685 int n_var = 0, one_var = -1;
28686 bool all_same = true, all_const_zero = true;
28690 for (i = 0; i < n_elts; ++i)
28692 x = XVECEXP (vals, 0, i);
28693 if (!(CONST_INT_P (x)
28694 || GET_CODE (x) == CONST_DOUBLE
28695 || GET_CODE (x) == CONST_FIXED))
28696 n_var++, one_var = i;
28697 else if (x != CONST0_RTX (inner_mode))
28698 all_const_zero = false;
28699 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28703 /* Constants are best loaded from the constant pool. */
28706 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28710 /* If all values are identical, broadcast the value. */
28712 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28713 XVECEXP (vals, 0, 0)))
28716 /* Values where only one field is non-constant are best loaded from
28717 the pool and overwritten via move later. */
28721 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28722 XVECEXP (vals, 0, one_var),
28726 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28730 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28734 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28736 enum machine_mode mode = GET_MODE (target);
28737 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28738 enum machine_mode half_mode;
28739 bool use_vec_merge = false;
28741 static rtx (*gen_extract[6][2]) (rtx, rtx)
28743 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28744 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28745 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28746 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28747 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28748 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28750 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28752 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28753 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28754 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28755 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28756 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28757 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28767 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28768 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28770 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28772 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28773 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28779 use_vec_merge = TARGET_SSE4_1;
28787 /* For the two element vectors, we implement a VEC_CONCAT with
28788 the extraction of the other element. */
28790 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28791 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28794 op0 = val, op1 = tmp;
28796 op0 = tmp, op1 = val;
28798 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28799 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28804 use_vec_merge = TARGET_SSE4_1;
28811 use_vec_merge = true;
28815 /* tmp = target = A B C D */
28816 tmp = copy_to_reg (target);
28817 /* target = A A B B */
28818 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
28819 /* target = X A B B */
28820 ix86_expand_vector_set (false, target, val, 0);
28821 /* target = A X C D */
28822 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28823 const1_rtx, const0_rtx,
28824 GEN_INT (2+4), GEN_INT (3+4)));
28828 /* tmp = target = A B C D */
28829 tmp = copy_to_reg (target);
28830 /* tmp = X B C D */
28831 ix86_expand_vector_set (false, tmp, val, 0);
28832 /* target = A B X D */
28833 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28834 const0_rtx, const1_rtx,
28835 GEN_INT (0+4), GEN_INT (3+4)));
28839 /* tmp = target = A B C D */
28840 tmp = copy_to_reg (target);
28841 /* tmp = X B C D */
28842 ix86_expand_vector_set (false, tmp, val, 0);
28843 /* target = A B X D */
28844 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28845 const0_rtx, const1_rtx,
28846 GEN_INT (2+4), GEN_INT (0+4)));
28850 gcc_unreachable ();
28855 use_vec_merge = TARGET_SSE4_1;
28859 /* Element 0 handled by vec_merge below. */
28862 use_vec_merge = true;
28868 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28869 store into element 0, then shuffle them back. */
28873 order[0] = GEN_INT (elt);
28874 order[1] = const1_rtx;
28875 order[2] = const2_rtx;
28876 order[3] = GEN_INT (3);
28877 order[elt] = const0_rtx;
28879 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28880 order[1], order[2], order[3]));
28882 ix86_expand_vector_set (false, target, val, 0);
28884 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28885 order[1], order[2], order[3]));
28889 /* For SSE1, we have to reuse the V4SF code. */
28890 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28891 gen_lowpart (SFmode, val), elt);
28896 use_vec_merge = TARGET_SSE2;
28899 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28903 use_vec_merge = TARGET_SSE4_1;
28910 half_mode = V16QImode;
28916 half_mode = V8HImode;
28922 half_mode = V4SImode;
28928 half_mode = V2DImode;
28934 half_mode = V4SFmode;
28940 half_mode = V2DFmode;
28946 /* Compute offset. */
28950 gcc_assert (i <= 1);
28952 /* Extract the half. */
28953 tmp = gen_reg_rtx (half_mode);
28954 emit_insn (gen_extract[j][i] (tmp, target));
28956 /* Put val in tmp at elt. */
28957 ix86_expand_vector_set (false, tmp, val, elt);
28960 emit_insn (gen_insert[j][i] (target, target, tmp));
28969 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28970 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28971 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28975 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28977 emit_move_insn (mem, target);
28979 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28980 emit_move_insn (tmp, val);
28982 emit_move_insn (target, mem);
28987 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28989 enum machine_mode mode = GET_MODE (vec);
28990 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28991 bool use_vec_extr = false;
29004 use_vec_extr = true;
29008 use_vec_extr = TARGET_SSE4_1;
29020 tmp = gen_reg_rtx (mode);
29021 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
29022 GEN_INT (elt), GEN_INT (elt),
29023 GEN_INT (elt+4), GEN_INT (elt+4)));
29027 tmp = gen_reg_rtx (mode);
29028 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
29032 gcc_unreachable ();
29035 use_vec_extr = true;
29040 use_vec_extr = TARGET_SSE4_1;
29054 tmp = gen_reg_rtx (mode);
29055 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
29056 GEN_INT (elt), GEN_INT (elt),
29057 GEN_INT (elt), GEN_INT (elt)));
29061 tmp = gen_reg_rtx (mode);
29062 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
29066 gcc_unreachable ();
29069 use_vec_extr = true;
29074 /* For SSE1, we have to reuse the V4SF code. */
29075 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
29076 gen_lowpart (V4SFmode, vec), elt);
29082 use_vec_extr = TARGET_SSE2;
29085 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
29089 use_vec_extr = TARGET_SSE4_1;
29093 /* ??? Could extract the appropriate HImode element and shift. */
29100 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
29101 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
29103 /* Let the rtl optimizers know about the zero extension performed. */
29104 if (inner_mode == QImode || inner_mode == HImode)
29106 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
29107 target = gen_lowpart (SImode, target);
29110 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
29114 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
29116 emit_move_insn (mem, vec);
29118 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
29119 emit_move_insn (target, tmp);
29123 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
29124 pattern to reduce; DEST is the destination; IN is the input vector. */
29127 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
29129 rtx tmp1, tmp2, tmp3;
29131 tmp1 = gen_reg_rtx (V4SFmode);
29132 tmp2 = gen_reg_rtx (V4SFmode);
29133 tmp3 = gen_reg_rtx (V4SFmode);
29135 emit_insn (gen_sse_movhlps (tmp1, in, in));
29136 emit_insn (fn (tmp2, tmp1, in));
29138 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
29139 const1_rtx, const1_rtx,
29140 GEN_INT (1+4), GEN_INT (1+4)));
29141 emit_insn (fn (dest, tmp2, tmp3));
29144 /* Target hook for scalar_mode_supported_p. */
29146 ix86_scalar_mode_supported_p (enum machine_mode mode)
29148 if (DECIMAL_FLOAT_MODE_P (mode))
29149 return default_decimal_float_supported_p ();
29150 else if (mode == TFmode)
29153 return default_scalar_mode_supported_p (mode);
29156 /* Implements target hook vector_mode_supported_p. */
29158 ix86_vector_mode_supported_p (enum machine_mode mode)
29160 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
29162 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
29164 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
29166 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
29168 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
29173 /* Target hook for c_mode_for_suffix. */
29174 static enum machine_mode
29175 ix86_c_mode_for_suffix (char suffix)
29185 /* Worker function for TARGET_MD_ASM_CLOBBERS.
29187 We do this in the new i386 backend to maintain source compatibility
29188 with the old cc0-based compiler. */
29191 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
29192 tree inputs ATTRIBUTE_UNUSED,
29195 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
29197 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
29202 /* Implements target vector targetm.asm.encode_section_info. This
29203 is not used by netware. */
29205 static void ATTRIBUTE_UNUSED
29206 ix86_encode_section_info (tree decl, rtx rtl, int first)
29208 default_encode_section_info (decl, rtl, first);
29210 if (TREE_CODE (decl) == VAR_DECL
29211 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29212 && ix86_in_large_data_p (decl))
29213 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29216 /* Worker function for REVERSE_CONDITION. */
29219 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29221 return (mode != CCFPmode && mode != CCFPUmode
29222 ? reverse_condition (code)
29223 : reverse_condition_maybe_unordered (code));
29226 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29230 output_387_reg_move (rtx insn, rtx *operands)
29232 if (REG_P (operands[0]))
29234 if (REG_P (operands[1])
29235 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29237 if (REGNO (operands[0]) == FIRST_STACK_REG)
29238 return output_387_ffreep (operands, 0);
29239 return "fstp\t%y0";
29241 if (STACK_TOP_P (operands[0]))
29242 return "fld%Z1\t%y1";
29245 else if (MEM_P (operands[0]))
29247 gcc_assert (REG_P (operands[1]));
29248 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29249 return "fstp%Z0\t%y0";
29252 /* There is no non-popping store to memory for XFmode.
29253 So if we need one, follow the store with a load. */
29254 if (GET_MODE (operands[0]) == XFmode)
29255 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29257 return "fst%Z0\t%y0";
29264 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29265 FP status register is set. */
29268 ix86_emit_fp_unordered_jump (rtx label)
29270 rtx reg = gen_reg_rtx (HImode);
29273 emit_insn (gen_x86_fnstsw_1 (reg));
29275 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29277 emit_insn (gen_x86_sahf_1 (reg));
29279 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29280 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29284 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29286 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29287 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29290 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29291 gen_rtx_LABEL_REF (VOIDmode, label),
29293 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29295 emit_jump_insn (temp);
29296 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29299 /* Output code to perform a log1p XFmode calculation. */
29301 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29303 rtx label1 = gen_label_rtx ();
29304 rtx label2 = gen_label_rtx ();
29306 rtx tmp = gen_reg_rtx (XFmode);
29307 rtx tmp2 = gen_reg_rtx (XFmode);
29310 emit_insn (gen_absxf2 (tmp, op1));
29311 test = gen_rtx_GE (VOIDmode, tmp,
29312 CONST_DOUBLE_FROM_REAL_VALUE (
29313 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29315 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29317 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29318 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29319 emit_jump (label2);
29321 emit_label (label1);
29322 emit_move_insn (tmp, CONST1_RTX (XFmode));
29323 emit_insn (gen_addxf3 (tmp, op1, tmp));
29324 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29325 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29327 emit_label (label2);
29330 /* Output code to perform a Newton-Rhapson approximation of a single precision
29331 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29333 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29335 rtx x0, x1, e0, e1, two;
29337 x0 = gen_reg_rtx (mode);
29338 e0 = gen_reg_rtx (mode);
29339 e1 = gen_reg_rtx (mode);
29340 x1 = gen_reg_rtx (mode);
29342 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29344 if (VECTOR_MODE_P (mode))
29345 two = ix86_build_const_vector (SFmode, true, two);
29347 two = force_reg (mode, two);
29349 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29351 /* x0 = rcp(b) estimate */
29352 emit_insn (gen_rtx_SET (VOIDmode, x0,
29353 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29356 emit_insn (gen_rtx_SET (VOIDmode, e0,
29357 gen_rtx_MULT (mode, x0, a)));
29359 emit_insn (gen_rtx_SET (VOIDmode, e1,
29360 gen_rtx_MULT (mode, x0, b)));
29362 emit_insn (gen_rtx_SET (VOIDmode, x1,
29363 gen_rtx_MINUS (mode, two, e1)));
29364 /* res = e0 * x1 */
29365 emit_insn (gen_rtx_SET (VOIDmode, res,
29366 gen_rtx_MULT (mode, e0, x1)));
29369 /* Output code to perform a Newton-Rhapson approximation of a
29370 single precision floating point [reciprocal] square root. */
29372 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29375 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29378 x0 = gen_reg_rtx (mode);
29379 e0 = gen_reg_rtx (mode);
29380 e1 = gen_reg_rtx (mode);
29381 e2 = gen_reg_rtx (mode);
29382 e3 = gen_reg_rtx (mode);
29384 real_from_integer (&r, VOIDmode, -3, -1, 0);
29385 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29387 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29388 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29390 if (VECTOR_MODE_P (mode))
29392 mthree = ix86_build_const_vector (SFmode, true, mthree);
29393 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29396 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29397 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29399 /* x0 = rsqrt(a) estimate */
29400 emit_insn (gen_rtx_SET (VOIDmode, x0,
29401 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29404 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29409 zero = gen_reg_rtx (mode);
29410 mask = gen_reg_rtx (mode);
29412 zero = force_reg (mode, CONST0_RTX(mode));
29413 emit_insn (gen_rtx_SET (VOIDmode, mask,
29414 gen_rtx_NE (mode, zero, a)));
29416 emit_insn (gen_rtx_SET (VOIDmode, x0,
29417 gen_rtx_AND (mode, x0, mask)));
29421 emit_insn (gen_rtx_SET (VOIDmode, e0,
29422 gen_rtx_MULT (mode, x0, a)));
29424 emit_insn (gen_rtx_SET (VOIDmode, e1,
29425 gen_rtx_MULT (mode, e0, x0)));
29428 mthree = force_reg (mode, mthree);
29429 emit_insn (gen_rtx_SET (VOIDmode, e2,
29430 gen_rtx_PLUS (mode, e1, mthree)));
29432 mhalf = force_reg (mode, mhalf);
29434 /* e3 = -.5 * x0 */
29435 emit_insn (gen_rtx_SET (VOIDmode, e3,
29436 gen_rtx_MULT (mode, x0, mhalf)));
29438 /* e3 = -.5 * e0 */
29439 emit_insn (gen_rtx_SET (VOIDmode, e3,
29440 gen_rtx_MULT (mode, e0, mhalf)));
29441 /* ret = e2 * e3 */
29442 emit_insn (gen_rtx_SET (VOIDmode, res,
29443 gen_rtx_MULT (mode, e2, e3)));
29446 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29448 static void ATTRIBUTE_UNUSED
29449 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29452 /* With Binutils 2.15, the "@unwind" marker must be specified on
29453 every occurrence of the ".eh_frame" section, not just the first
29456 && strcmp (name, ".eh_frame") == 0)
29458 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29459 flags & SECTION_WRITE ? "aw" : "a");
29462 default_elf_asm_named_section (name, flags, decl);
29465 /* Return the mangling of TYPE if it is an extended fundamental type. */
29467 static const char *
29468 ix86_mangle_type (const_tree type)
29470 type = TYPE_MAIN_VARIANT (type);
29472 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29473 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29476 switch (TYPE_MODE (type))
29479 /* __float128 is "g". */
29482 /* "long double" or __float80 is "e". */
29489 /* For 32-bit code we can save PIC register setup by using
29490 __stack_chk_fail_local hidden function instead of calling
29491 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29492 register, so it is better to call __stack_chk_fail directly. */
29495 ix86_stack_protect_fail (void)
29497 return TARGET_64BIT
29498 ? default_external_stack_protect_fail ()
29499 : default_hidden_stack_protect_fail ();
29502 /* Select a format to encode pointers in exception handling data. CODE
29503 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29504 true if the symbol may be affected by dynamic relocations.
29506 ??? All x86 object file formats are capable of representing this.
29507 After all, the relocation needed is the same as for the call insn.
29508 Whether or not a particular assembler allows us to enter such, I
29509 guess we'll have to see. */
29511 asm_preferred_eh_data_format (int code, int global)
29515 int type = DW_EH_PE_sdata8;
29517 || ix86_cmodel == CM_SMALL_PIC
29518 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29519 type = DW_EH_PE_sdata4;
29520 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29522 if (ix86_cmodel == CM_SMALL
29523 || (ix86_cmodel == CM_MEDIUM && code))
29524 return DW_EH_PE_udata4;
29525 return DW_EH_PE_absptr;
29528 /* Expand copysign from SIGN to the positive value ABS_VALUE
29529 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29532 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29534 enum machine_mode mode = GET_MODE (sign);
29535 rtx sgn = gen_reg_rtx (mode);
29536 if (mask == NULL_RTX)
29538 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29539 if (!VECTOR_MODE_P (mode))
29541 /* We need to generate a scalar mode mask in this case. */
29542 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29543 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29544 mask = gen_reg_rtx (mode);
29545 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29549 mask = gen_rtx_NOT (mode, mask);
29550 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29551 gen_rtx_AND (mode, mask, sign)));
29552 emit_insn (gen_rtx_SET (VOIDmode, result,
29553 gen_rtx_IOR (mode, abs_value, sgn)));
29556 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29557 mask for masking out the sign-bit is stored in *SMASK, if that is
29560 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29562 enum machine_mode mode = GET_MODE (op0);
29565 xa = gen_reg_rtx (mode);
29566 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29567 if (!VECTOR_MODE_P (mode))
29569 /* We need to generate a scalar mode mask in this case. */
29570 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29571 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29572 mask = gen_reg_rtx (mode);
29573 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29575 emit_insn (gen_rtx_SET (VOIDmode, xa,
29576 gen_rtx_AND (mode, op0, mask)));
29584 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29585 swapping the operands if SWAP_OPERANDS is true. The expanded
29586 code is a forward jump to a newly created label in case the
29587 comparison is true. The generated label rtx is returned. */
29589 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29590 bool swap_operands)
29601 label = gen_label_rtx ();
29602 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29603 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29604 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29605 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29606 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29607 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29608 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29609 JUMP_LABEL (tmp) = label;
29614 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29615 using comparison code CODE. Operands are swapped for the comparison if
29616 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29618 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29619 bool swap_operands)
29621 enum machine_mode mode = GET_MODE (op0);
29622 rtx mask = gen_reg_rtx (mode);
29631 if (mode == DFmode)
29632 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29633 gen_rtx_fmt_ee (code, mode, op0, op1)));
29635 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29636 gen_rtx_fmt_ee (code, mode, op0, op1)));
29641 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29642 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29644 ix86_gen_TWO52 (enum machine_mode mode)
29646 REAL_VALUE_TYPE TWO52r;
29649 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29650 TWO52 = const_double_from_real_value (TWO52r, mode);
29651 TWO52 = force_reg (mode, TWO52);
29656 /* Expand SSE sequence for computing lround from OP1 storing
29659 ix86_expand_lround (rtx op0, rtx op1)
29661 /* C code for the stuff we're doing below:
29662 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29665 enum machine_mode mode = GET_MODE (op1);
29666 const struct real_format *fmt;
29667 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29670 /* load nextafter (0.5, 0.0) */
29671 fmt = REAL_MODE_FORMAT (mode);
29672 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29673 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29675 /* adj = copysign (0.5, op1) */
29676 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29677 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29679 /* adj = op1 + adj */
29680 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29682 /* op0 = (imode)adj */
29683 expand_fix (op0, adj, 0);
29686 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29689 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29691 /* C code for the stuff we're doing below (for do_floor):
29693 xi -= (double)xi > op1 ? 1 : 0;
29696 enum machine_mode fmode = GET_MODE (op1);
29697 enum machine_mode imode = GET_MODE (op0);
29698 rtx ireg, freg, label, tmp;
29700 /* reg = (long)op1 */
29701 ireg = gen_reg_rtx (imode);
29702 expand_fix (ireg, op1, 0);
29704 /* freg = (double)reg */
29705 freg = gen_reg_rtx (fmode);
29706 expand_float (freg, ireg, 0);
29708 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29709 label = ix86_expand_sse_compare_and_jump (UNLE,
29710 freg, op1, !do_floor);
29711 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29712 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29713 emit_move_insn (ireg, tmp);
29715 emit_label (label);
29716 LABEL_NUSES (label) = 1;
29718 emit_move_insn (op0, ireg);
29721 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29722 result in OPERAND0. */
29724 ix86_expand_rint (rtx operand0, rtx operand1)
29726 /* C code for the stuff we're doing below:
29727 xa = fabs (operand1);
29728 if (!isless (xa, 2**52))
29730 xa = xa + 2**52 - 2**52;
29731 return copysign (xa, operand1);
29733 enum machine_mode mode = GET_MODE (operand0);
29734 rtx res, xa, label, TWO52, mask;
29736 res = gen_reg_rtx (mode);
29737 emit_move_insn (res, operand1);
29739 /* xa = abs (operand1) */
29740 xa = ix86_expand_sse_fabs (res, &mask);
29742 /* if (!isless (xa, TWO52)) goto label; */
29743 TWO52 = ix86_gen_TWO52 (mode);
29744 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29746 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29747 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29749 ix86_sse_copysign_to_positive (res, xa, res, mask);
29751 emit_label (label);
29752 LABEL_NUSES (label) = 1;
29754 emit_move_insn (operand0, res);
29757 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29760 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29762 /* C code for the stuff we expand below.
29763 double xa = fabs (x), x2;
29764 if (!isless (xa, TWO52))
29766 xa = xa + TWO52 - TWO52;
29767 x2 = copysign (xa, x);
29776 enum machine_mode mode = GET_MODE (operand0);
29777 rtx xa, TWO52, tmp, label, one, res, mask;
29779 TWO52 = ix86_gen_TWO52 (mode);
29781 /* Temporary for holding the result, initialized to the input
29782 operand to ease control flow. */
29783 res = gen_reg_rtx (mode);
29784 emit_move_insn (res, operand1);
29786 /* xa = abs (operand1) */
29787 xa = ix86_expand_sse_fabs (res, &mask);
29789 /* if (!isless (xa, TWO52)) goto label; */
29790 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29792 /* xa = xa + TWO52 - TWO52; */
29793 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29794 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29796 /* xa = copysign (xa, operand1) */
29797 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29799 /* generate 1.0 or -1.0 */
29800 one = force_reg (mode,
29801 const_double_from_real_value (do_floor
29802 ? dconst1 : dconstm1, mode));
29804 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29805 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29806 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29807 gen_rtx_AND (mode, one, tmp)));
29808 /* We always need to subtract here to preserve signed zero. */
29809 tmp = expand_simple_binop (mode, MINUS,
29810 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29811 emit_move_insn (res, tmp);
29813 emit_label (label);
29814 LABEL_NUSES (label) = 1;
29816 emit_move_insn (operand0, res);
29819 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29822 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29824 /* C code for the stuff we expand below.
29825 double xa = fabs (x), x2;
29826 if (!isless (xa, TWO52))
29828 x2 = (double)(long)x;
29835 if (HONOR_SIGNED_ZEROS (mode))
29836 return copysign (x2, x);
29839 enum machine_mode mode = GET_MODE (operand0);
29840 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29842 TWO52 = ix86_gen_TWO52 (mode);
29844 /* Temporary for holding the result, initialized to the input
29845 operand to ease control flow. */
29846 res = gen_reg_rtx (mode);
29847 emit_move_insn (res, operand1);
29849 /* xa = abs (operand1) */
29850 xa = ix86_expand_sse_fabs (res, &mask);
29852 /* if (!isless (xa, TWO52)) goto label; */
29853 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29855 /* xa = (double)(long)x */
29856 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29857 expand_fix (xi, res, 0);
29858 expand_float (xa, xi, 0);
29861 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29863 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29864 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29865 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29866 gen_rtx_AND (mode, one, tmp)));
29867 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29868 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29869 emit_move_insn (res, tmp);
29871 if (HONOR_SIGNED_ZEROS (mode))
29872 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29874 emit_label (label);
29875 LABEL_NUSES (label) = 1;
29877 emit_move_insn (operand0, res);
29880 /* Expand SSE sequence for computing round from OPERAND1 storing
29881 into OPERAND0. Sequence that works without relying on DImode truncation
29882 via cvttsd2siq that is only available on 64bit targets. */
29884 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29886 /* C code for the stuff we expand below.
29887 double xa = fabs (x), xa2, x2;
29888 if (!isless (xa, TWO52))
29890 Using the absolute value and copying back sign makes
29891 -0.0 -> -0.0 correct.
29892 xa2 = xa + TWO52 - TWO52;
29897 else if (dxa > 0.5)
29899 x2 = copysign (xa2, x);
29902 enum machine_mode mode = GET_MODE (operand0);
29903 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29905 TWO52 = ix86_gen_TWO52 (mode);
29907 /* Temporary for holding the result, initialized to the input
29908 operand to ease control flow. */
29909 res = gen_reg_rtx (mode);
29910 emit_move_insn (res, operand1);
29912 /* xa = abs (operand1) */
29913 xa = ix86_expand_sse_fabs (res, &mask);
29915 /* if (!isless (xa, TWO52)) goto label; */
29916 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29918 /* xa2 = xa + TWO52 - TWO52; */
29919 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29920 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29922 /* dxa = xa2 - xa; */
29923 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29925 /* generate 0.5, 1.0 and -0.5 */
29926 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29927 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29928 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29932 tmp = gen_reg_rtx (mode);
29933 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29934 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29935 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29936 gen_rtx_AND (mode, one, tmp)));
29937 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29938 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29939 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29940 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29941 gen_rtx_AND (mode, one, tmp)));
29942 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29944 /* res = copysign (xa2, operand1) */
29945 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29947 emit_label (label);
29948 LABEL_NUSES (label) = 1;
29950 emit_move_insn (operand0, res);
29953 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29956 ix86_expand_trunc (rtx operand0, rtx operand1)
29958 /* C code for SSE variant we expand below.
29959 double xa = fabs (x), x2;
29960 if (!isless (xa, TWO52))
29962 x2 = (double)(long)x;
29963 if (HONOR_SIGNED_ZEROS (mode))
29964 return copysign (x2, x);
29967 enum machine_mode mode = GET_MODE (operand0);
29968 rtx xa, xi, TWO52, label, res, mask;
29970 TWO52 = ix86_gen_TWO52 (mode);
29972 /* Temporary for holding the result, initialized to the input
29973 operand to ease control flow. */
29974 res = gen_reg_rtx (mode);
29975 emit_move_insn (res, operand1);
29977 /* xa = abs (operand1) */
29978 xa = ix86_expand_sse_fabs (res, &mask);
29980 /* if (!isless (xa, TWO52)) goto label; */
29981 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29983 /* x = (double)(long)x */
29984 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29985 expand_fix (xi, res, 0);
29986 expand_float (res, xi, 0);
29988 if (HONOR_SIGNED_ZEROS (mode))
29989 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29991 emit_label (label);
29992 LABEL_NUSES (label) = 1;
29994 emit_move_insn (operand0, res);
29997 /* Expand SSE sequence for computing trunc from OPERAND1 storing
30000 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
30002 enum machine_mode mode = GET_MODE (operand0);
30003 rtx xa, mask, TWO52, label, one, res, smask, tmp;
30005 /* C code for SSE variant we expand below.
30006 double xa = fabs (x), x2;
30007 if (!isless (xa, TWO52))
30009 xa2 = xa + TWO52 - TWO52;
30013 x2 = copysign (xa2, x);
30017 TWO52 = ix86_gen_TWO52 (mode);
30019 /* Temporary for holding the result, initialized to the input
30020 operand to ease control flow. */
30021 res = gen_reg_rtx (mode);
30022 emit_move_insn (res, operand1);
30024 /* xa = abs (operand1) */
30025 xa = ix86_expand_sse_fabs (res, &smask);
30027 /* if (!isless (xa, TWO52)) goto label; */
30028 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30030 /* res = xa + TWO52 - TWO52; */
30031 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
30032 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
30033 emit_move_insn (res, tmp);
30036 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
30038 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
30039 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
30040 emit_insn (gen_rtx_SET (VOIDmode, mask,
30041 gen_rtx_AND (mode, mask, one)));
30042 tmp = expand_simple_binop (mode, MINUS,
30043 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
30044 emit_move_insn (res, tmp);
30046 /* res = copysign (res, operand1) */
30047 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
30049 emit_label (label);
30050 LABEL_NUSES (label) = 1;
30052 emit_move_insn (operand0, res);
30055 /* Expand SSE sequence for computing round from OPERAND1 storing
30058 ix86_expand_round (rtx operand0, rtx operand1)
30060 /* C code for the stuff we're doing below:
30061 double xa = fabs (x);
30062 if (!isless (xa, TWO52))
30064 xa = (double)(long)(xa + nextafter (0.5, 0.0));
30065 return copysign (xa, x);
30067 enum machine_mode mode = GET_MODE (operand0);
30068 rtx res, TWO52, xa, label, xi, half, mask;
30069 const struct real_format *fmt;
30070 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
30072 /* Temporary for holding the result, initialized to the input
30073 operand to ease control flow. */
30074 res = gen_reg_rtx (mode);
30075 emit_move_insn (res, operand1);
30077 TWO52 = ix86_gen_TWO52 (mode);
30078 xa = ix86_expand_sse_fabs (res, &mask);
30079 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
30081 /* load nextafter (0.5, 0.0) */
30082 fmt = REAL_MODE_FORMAT (mode);
30083 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
30084 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
30086 /* xa = xa + 0.5 */
30087 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
30088 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
30090 /* xa = (double)(int64_t)xa */
30091 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
30092 expand_fix (xi, xa, 0);
30093 expand_float (xa, xi, 0);
30095 /* res = copysign (xa, operand1) */
30096 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
30098 emit_label (label);
30099 LABEL_NUSES (label) = 1;
30101 emit_move_insn (operand0, res);
30105 /* Table of valid machine attributes. */
30106 static const struct attribute_spec ix86_attribute_table[] =
30108 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
30109 /* Stdcall attribute says callee is responsible for popping arguments
30110 if they are not variable. */
30111 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30112 /* Fastcall attribute says callee is responsible for popping arguments
30113 if they are not variable. */
30114 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30115 /* Thiscall attribute says callee is responsible for popping arguments
30116 if they are not variable. */
30117 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30118 /* Cdecl attribute says the callee is a normal C declaration */
30119 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30120 /* Regparm attribute specifies how many integer arguments are to be
30121 passed in registers. */
30122 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
30123 /* Sseregparm attribute says we are using x86_64 calling conventions
30124 for FP arguments. */
30125 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
30126 /* force_align_arg_pointer says this function realigns the stack at entry. */
30127 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
30128 false, true, true, ix86_handle_cconv_attribute },
30129 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
30130 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
30131 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
30132 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
30134 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30135 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
30136 #ifdef SUBTARGET_ATTRIBUTE_TABLE
30137 SUBTARGET_ATTRIBUTE_TABLE,
30139 /* ms_abi and sysv_abi calling convention function attributes. */
30140 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30141 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
30142 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
30144 { NULL, 0, 0, false, false, false, NULL }
30147 /* Implement targetm.vectorize.builtin_vectorization_cost. */
30149 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
30150 tree vectype ATTRIBUTE_UNUSED,
30151 int misalign ATTRIBUTE_UNUSED)
30153 switch (type_of_cost)
30156 return ix86_cost->scalar_stmt_cost;
30159 return ix86_cost->scalar_load_cost;
30162 return ix86_cost->scalar_store_cost;
30165 return ix86_cost->vec_stmt_cost;
30168 return ix86_cost->vec_align_load_cost;
30171 return ix86_cost->vec_store_cost;
30173 case vec_to_scalar:
30174 return ix86_cost->vec_to_scalar_cost;
30176 case scalar_to_vec:
30177 return ix86_cost->scalar_to_vec_cost;
30179 case unaligned_load:
30180 case unaligned_store:
30181 return ix86_cost->vec_unalign_load_cost;
30183 case cond_branch_taken:
30184 return ix86_cost->cond_taken_branch_cost;
30186 case cond_branch_not_taken:
30187 return ix86_cost->cond_not_taken_branch_cost;
30193 gcc_unreachable ();
30198 /* Implement targetm.vectorize.builtin_vec_perm. */
30201 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30203 tree itype = TREE_TYPE (vec_type);
30204 bool u = TYPE_UNSIGNED (itype);
30205 enum machine_mode vmode = TYPE_MODE (vec_type);
30206 enum ix86_builtins fcode;
30207 bool ok = TARGET_SSE2;
30213 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30216 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30218 itype = ix86_get_builtin_type (IX86_BT_DI);
30223 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30227 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30229 itype = ix86_get_builtin_type (IX86_BT_SI);
30233 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30236 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30239 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30242 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30252 *mask_type = itype;
30253 return ix86_builtins[(int) fcode];
30256 /* Return a vector mode with twice as many elements as VMODE. */
30257 /* ??? Consider moving this to a table generated by genmodes.c. */
30259 static enum machine_mode
30260 doublesize_vector_mode (enum machine_mode vmode)
30264 case V2SFmode: return V4SFmode;
30265 case V1DImode: return V2DImode;
30266 case V2SImode: return V4SImode;
30267 case V4HImode: return V8HImode;
30268 case V8QImode: return V16QImode;
30270 case V2DFmode: return V4DFmode;
30271 case V4SFmode: return V8SFmode;
30272 case V2DImode: return V4DImode;
30273 case V4SImode: return V8SImode;
30274 case V8HImode: return V16HImode;
30275 case V16QImode: return V32QImode;
30277 case V4DFmode: return V8DFmode;
30278 case V8SFmode: return V16SFmode;
30279 case V4DImode: return V8DImode;
30280 case V8SImode: return V16SImode;
30281 case V16HImode: return V32HImode;
30282 case V32QImode: return V64QImode;
30285 gcc_unreachable ();
30289 /* Construct (set target (vec_select op0 (parallel perm))) and
30290 return true if that's a valid instruction in the active ISA. */
30293 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30295 rtx rperm[MAX_VECT_LEN], x;
30298 for (i = 0; i < nelt; ++i)
30299 rperm[i] = GEN_INT (perm[i]);
30301 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30302 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30303 x = gen_rtx_SET (VOIDmode, target, x);
30306 if (recog_memoized (x) < 0)
30314 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30317 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30318 const unsigned char *perm, unsigned nelt)
30320 enum machine_mode v2mode;
30323 v2mode = doublesize_vector_mode (GET_MODE (op0));
30324 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30325 return expand_vselect (target, x, perm, nelt);
30328 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30329 in terms of blendp[sd] / pblendw / pblendvb. */
30332 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30334 enum machine_mode vmode = d->vmode;
30335 unsigned i, mask, nelt = d->nelt;
30336 rtx target, op0, op1, x;
30338 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30340 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
30343 /* This is a blend, not a permute. Elements must stay in their
30344 respective lanes. */
30345 for (i = 0; i < nelt; ++i)
30347 unsigned e = d->perm[i];
30348 if (!(e == i || e == i + nelt))
30355 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
30356 decision should be extracted elsewhere, so that we only try that
30357 sequence once all budget==3 options have been tried. */
30359 /* For bytes, see if bytes move in pairs so we can use pblendw with
30360 an immediate argument, rather than pblendvb with a vector argument. */
30361 if (vmode == V16QImode)
30363 bool pblendw_ok = true;
30364 for (i = 0; i < 16 && pblendw_ok; i += 2)
30365 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
30369 rtx rperm[16], vperm;
30371 for (i = 0; i < nelt; ++i)
30372 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
30374 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30375 vperm = force_reg (V16QImode, vperm);
30377 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
30382 target = d->target;
30394 for (i = 0; i < nelt; ++i)
30395 mask |= (d->perm[i] >= nelt) << i;
30399 for (i = 0; i < 2; ++i)
30400 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
30404 for (i = 0; i < 4; ++i)
30405 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
30409 for (i = 0; i < 8; ++i)
30410 mask |= (d->perm[i * 2] >= 16) << i;
30414 target = gen_lowpart (vmode, target);
30415 op0 = gen_lowpart (vmode, op0);
30416 op1 = gen_lowpart (vmode, op1);
30420 gcc_unreachable ();
30423 /* This matches five different patterns with the different modes. */
30424 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
30425 x = gen_rtx_SET (VOIDmode, target, x);
30431 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30432 in terms of the variable form of vpermilps.
30434 Note that we will have already failed the immediate input vpermilps,
30435 which requires that the high and low part shuffle be identical; the
30436 variable form doesn't require that. */
30439 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
30441 rtx rperm[8], vperm;
30444 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
30447 /* We can only permute within the 128-bit lane. */
30448 for (i = 0; i < 8; ++i)
30450 unsigned e = d->perm[i];
30451 if (i < 4 ? e >= 4 : e < 4)
30458 for (i = 0; i < 8; ++i)
30460 unsigned e = d->perm[i];
30462 /* Within each 128-bit lane, the elements of op0 are numbered
30463 from 0 and the elements of op1 are numbered from 4. */
30469 rperm[i] = GEN_INT (e);
30472 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
30473 vperm = force_reg (V8SImode, vperm);
30474 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
30479 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30480 in terms of pshufb or vpperm. */
30483 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
30485 unsigned i, nelt, eltsz;
30486 rtx rperm[16], vperm, target, op0, op1;
30488 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
30490 if (GET_MODE_SIZE (d->vmode) != 16)
30497 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30499 for (i = 0; i < nelt; ++i)
30501 unsigned j, e = d->perm[i];
30502 for (j = 0; j < eltsz; ++j)
30503 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
30506 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30507 vperm = force_reg (V16QImode, vperm);
30509 target = gen_lowpart (V16QImode, d->target);
30510 op0 = gen_lowpart (V16QImode, d->op0);
30511 if (d->op0 == d->op1)
30512 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
30515 op1 = gen_lowpart (V16QImode, d->op1);
30516 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
30522 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
30523 in a single instruction. */
30526 expand_vec_perm_1 (struct expand_vec_perm_d *d)
30528 unsigned i, nelt = d->nelt;
30529 unsigned char perm2[MAX_VECT_LEN];
30531 /* Check plain VEC_SELECT first, because AVX has instructions that could
30532 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
30533 input where SEL+CONCAT may not. */
30534 if (d->op0 == d->op1)
30536 int mask = nelt - 1;
30538 for (i = 0; i < nelt; i++)
30539 perm2[i] = d->perm[i] & mask;
30541 if (expand_vselect (d->target, d->op0, perm2, nelt))
30544 /* There are plenty of patterns in sse.md that are written for
30545 SEL+CONCAT and are not replicated for a single op. Perhaps
30546 that should be changed, to avoid the nastiness here. */
30548 /* Recognize interleave style patterns, which means incrementing
30549 every other permutation operand. */
30550 for (i = 0; i < nelt; i += 2)
30552 perm2[i] = d->perm[i] & mask;
30553 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
30555 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30558 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
30561 for (i = 0; i < nelt; i += 4)
30563 perm2[i + 0] = d->perm[i + 0] & mask;
30564 perm2[i + 1] = d->perm[i + 1] & mask;
30565 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
30566 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
30569 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30574 /* Finally, try the fully general two operand permute. */
30575 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
30578 /* Recognize interleave style patterns with reversed operands. */
30579 if (d->op0 != d->op1)
30581 for (i = 0; i < nelt; ++i)
30583 unsigned e = d->perm[i];
30591 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
30595 /* Try the SSE4.1 blend variable merge instructions. */
30596 if (expand_vec_perm_blend (d))
30599 /* Try one of the AVX vpermil variable permutations. */
30600 if (expand_vec_perm_vpermil (d))
30603 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
30604 if (expand_vec_perm_pshufb (d))
30610 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30611 in terms of a pair of pshuflw + pshufhw instructions. */
30614 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
30616 unsigned char perm2[MAX_VECT_LEN];
30620 if (d->vmode != V8HImode || d->op0 != d->op1)
30623 /* The two permutations only operate in 64-bit lanes. */
30624 for (i = 0; i < 4; ++i)
30625 if (d->perm[i] >= 4)
30627 for (i = 4; i < 8; ++i)
30628 if (d->perm[i] < 4)
30634 /* Emit the pshuflw. */
30635 memcpy (perm2, d->perm, 4);
30636 for (i = 4; i < 8; ++i)
30638 ok = expand_vselect (d->target, d->op0, perm2, 8);
30641 /* Emit the pshufhw. */
30642 memcpy (perm2 + 4, d->perm + 4, 4);
30643 for (i = 0; i < 4; ++i)
30645 ok = expand_vselect (d->target, d->target, perm2, 8);
30651 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30652 the permutation using the SSSE3 palignr instruction. This succeeds
30653 when all of the elements in PERM fit within one vector and we merely
30654 need to shift them down so that a single vector permutation has a
30655 chance to succeed. */
30658 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
30660 unsigned i, nelt = d->nelt;
30665 /* Even with AVX, palignr only operates on 128-bit vectors. */
30666 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30669 min = nelt, max = 0;
30670 for (i = 0; i < nelt; ++i)
30672 unsigned e = d->perm[i];
30678 if (min == 0 || max - min >= nelt)
30681 /* Given that we have SSSE3, we know we'll be able to implement the
30682 single operand permutation after the palignr with pshufb. */
30686 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
30687 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
30688 gen_lowpart (TImode, d->op1),
30689 gen_lowpart (TImode, d->op0), shift));
30691 d->op0 = d->op1 = d->target;
30694 for (i = 0; i < nelt; ++i)
30696 unsigned e = d->perm[i] - min;
30702 /* Test for the degenerate case where the alignment by itself
30703 produces the desired permutation. */
30707 ok = expand_vec_perm_1 (d);
30713 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30714 a two vector permutation into a single vector permutation by using
30715 an interleave operation to merge the vectors. */
30718 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
30720 struct expand_vec_perm_d dremap, dfinal;
30721 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
30722 unsigned contents, h1, h2, h3, h4;
30723 unsigned char remap[2 * MAX_VECT_LEN];
30727 if (d->op0 == d->op1)
30730 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
30731 lanes. We can use similar techniques with the vperm2f128 instruction,
30732 but it requires slightly different logic. */
30733 if (GET_MODE_SIZE (d->vmode) != 16)
30736 /* Examine from whence the elements come. */
30738 for (i = 0; i < nelt; ++i)
30739 contents |= 1u << d->perm[i];
30741 /* Split the two input vectors into 4 halves. */
30742 h1 = (1u << nelt2) - 1;
30747 memset (remap, 0xff, sizeof (remap));
30750 /* If the elements from the low halves use interleave low, and similarly
30751 for interleave high. If the elements are from mis-matched halves, we
30752 can use shufps for V4SF/V4SI or do a DImode shuffle. */
30753 if ((contents & (h1 | h3)) == contents)
30755 for (i = 0; i < nelt2; ++i)
30758 remap[i + nelt] = i * 2 + 1;
30759 dremap.perm[i * 2] = i;
30760 dremap.perm[i * 2 + 1] = i + nelt;
30763 else if ((contents & (h2 | h4)) == contents)
30765 for (i = 0; i < nelt2; ++i)
30767 remap[i + nelt2] = i * 2;
30768 remap[i + nelt + nelt2] = i * 2 + 1;
30769 dremap.perm[i * 2] = i + nelt2;
30770 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
30773 else if ((contents & (h1 | h4)) == contents)
30775 for (i = 0; i < nelt2; ++i)
30778 remap[i + nelt + nelt2] = i + nelt2;
30779 dremap.perm[i] = i;
30780 dremap.perm[i + nelt2] = i + nelt + nelt2;
30784 dremap.vmode = V2DImode;
30786 dremap.perm[0] = 0;
30787 dremap.perm[1] = 3;
30790 else if ((contents & (h2 | h3)) == contents)
30792 for (i = 0; i < nelt2; ++i)
30794 remap[i + nelt2] = i;
30795 remap[i + nelt] = i + nelt2;
30796 dremap.perm[i] = i + nelt2;
30797 dremap.perm[i + nelt2] = i + nelt;
30801 dremap.vmode = V2DImode;
30803 dremap.perm[0] = 1;
30804 dremap.perm[1] = 2;
30810 /* Use the remapping array set up above to move the elements from their
30811 swizzled locations into their final destinations. */
30813 for (i = 0; i < nelt; ++i)
30815 unsigned e = remap[d->perm[i]];
30816 gcc_assert (e < nelt);
30817 dfinal.perm[i] = e;
30819 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
30820 dfinal.op1 = dfinal.op0;
30821 dremap.target = dfinal.op0;
30823 /* Test if the final remap can be done with a single insn. For V4SFmode or
30824 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
30826 ok = expand_vec_perm_1 (&dfinal);
30827 seq = get_insns ();
30833 if (dremap.vmode != dfinal.vmode)
30835 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
30836 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
30837 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
30840 ok = expand_vec_perm_1 (&dremap);
30847 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
30848 permutation with two pshufb insns and an ior. We should have already
30849 failed all two instruction sequences. */
30852 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
30854 rtx rperm[2][16], vperm, l, h, op, m128;
30855 unsigned int i, nelt, eltsz;
30857 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30859 gcc_assert (d->op0 != d->op1);
30862 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30864 /* Generate two permutation masks. If the required element is within
30865 the given vector it is shuffled into the proper lane. If the required
30866 element is in the other vector, force a zero into the lane by setting
30867 bit 7 in the permutation mask. */
30868 m128 = GEN_INT (-128);
30869 for (i = 0; i < nelt; ++i)
30871 unsigned j, e = d->perm[i];
30872 unsigned which = (e >= nelt);
30876 for (j = 0; j < eltsz; ++j)
30878 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
30879 rperm[1-which][i*eltsz + j] = m128;
30883 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
30884 vperm = force_reg (V16QImode, vperm);
30886 l = gen_reg_rtx (V16QImode);
30887 op = gen_lowpart (V16QImode, d->op0);
30888 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
30890 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
30891 vperm = force_reg (V16QImode, vperm);
30893 h = gen_reg_rtx (V16QImode);
30894 op = gen_lowpart (V16QImode, d->op1);
30895 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
30897 op = gen_lowpart (V16QImode, d->target);
30898 emit_insn (gen_iorv16qi3 (op, l, h));
30903 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
30904 and extract-odd permutations. */
30907 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
30909 rtx t1, t2, t3, t4;
30914 t1 = gen_reg_rtx (V4DFmode);
30915 t2 = gen_reg_rtx (V4DFmode);
30917 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
30918 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
30919 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
30921 /* Now an unpck[lh]pd will produce the result required. */
30923 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
30925 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
30931 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
30932 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
30933 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
30935 t1 = gen_reg_rtx (V8SFmode);
30936 t2 = gen_reg_rtx (V8SFmode);
30937 t3 = gen_reg_rtx (V8SFmode);
30938 t4 = gen_reg_rtx (V8SFmode);
30940 /* Shuffle within the 128-bit lanes to produce:
30941 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
30942 expand_vselect (t1, d->op0, perm1, 8);
30943 expand_vselect (t2, d->op1, perm1, 8);
30945 /* Shuffle the lanes around to produce:
30946 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
30947 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
30948 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
30950 /* Now a vpermil2p will produce the result required. */
30951 /* ??? The vpermil2p requires a vector constant. Another option
30952 is a unpck[lh]ps to merge the two vectors to produce
30953 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
30954 vpermilps to get the elements into the final order. */
30957 memcpy (d->perm, odd ? permo: perme, 8);
30958 expand_vec_perm_vpermil (d);
30966 /* These are always directly implementable by expand_vec_perm_1. */
30967 gcc_unreachable ();
30971 return expand_vec_perm_pshufb2 (d);
30974 /* We need 2*log2(N)-1 operations to achieve odd/even
30975 with interleave. */
30976 t1 = gen_reg_rtx (V8HImode);
30977 t2 = gen_reg_rtx (V8HImode);
30978 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
30979 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
30980 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
30981 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
30983 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
30985 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
30992 return expand_vec_perm_pshufb2 (d);
30995 t1 = gen_reg_rtx (V16QImode);
30996 t2 = gen_reg_rtx (V16QImode);
30997 t3 = gen_reg_rtx (V16QImode);
30998 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
30999 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
31000 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
31001 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
31002 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
31003 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
31005 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
31007 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
31013 gcc_unreachable ();
31019 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31020 extract-even and extract-odd permutations. */
31023 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
31025 unsigned i, odd, nelt = d->nelt;
31028 if (odd != 0 && odd != 1)
31031 for (i = 1; i < nelt; ++i)
31032 if (d->perm[i] != 2 * i + odd)
31035 return expand_vec_perm_even_odd_1 (d, odd);
31038 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
31039 permutations. We assume that expand_vec_perm_1 has already failed. */
31042 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
31044 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
31045 enum machine_mode vmode = d->vmode;
31046 unsigned char perm2[4];
31054 /* These are special-cased in sse.md so that we can optionally
31055 use the vbroadcast instruction. They expand to two insns
31056 if the input happens to be in a register. */
31057 gcc_unreachable ();
31063 /* These are always implementable using standard shuffle patterns. */
31064 gcc_unreachable ();
31068 /* These can be implemented via interleave. We save one insn by
31069 stopping once we have promoted to V4SImode and then use pshufd. */
31072 optab otab = vec_interleave_low_optab;
31076 otab = vec_interleave_high_optab;
31081 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
31082 vmode = get_mode_wider_vector (vmode);
31083 op0 = gen_lowpart (vmode, op0);
31085 while (vmode != V4SImode);
31087 memset (perm2, elt, 4);
31088 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
31093 gcc_unreachable ();
31097 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
31098 broadcast permutations. */
31101 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
31103 unsigned i, elt, nelt = d->nelt;
31105 if (d->op0 != d->op1)
31109 for (i = 1; i < nelt; ++i)
31110 if (d->perm[i] != elt)
31113 return expand_vec_perm_broadcast_1 (d);
31116 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
31117 With all of the interface bits taken care of, perform the expansion
31118 in D and return true on success. */
31121 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
31123 /* Try a single instruction expansion. */
31124 if (expand_vec_perm_1 (d))
31127 /* Try sequences of two instructions. */
31129 if (expand_vec_perm_pshuflw_pshufhw (d))
31132 if (expand_vec_perm_palignr (d))
31135 if (expand_vec_perm_interleave2 (d))
31138 if (expand_vec_perm_broadcast (d))
31141 /* Try sequences of three instructions. */
31143 if (expand_vec_perm_pshufb2 (d))
31146 /* ??? Look for narrow permutations whose element orderings would
31147 allow the promotion to a wider mode. */
31149 /* ??? Look for sequences of interleave or a wider permute that place
31150 the data into the correct lanes for a half-vector shuffle like
31151 pshuf[lh]w or vpermilps. */
31153 /* ??? Look for sequences of interleave that produce the desired results.
31154 The combinatorics of punpck[lh] get pretty ugly... */
31156 if (expand_vec_perm_even_odd (d))
31162 /* Extract the values from the vector CST into the permutation array in D.
31163 Return 0 on error, 1 if all values from the permutation come from the
31164 first vector, 2 if all values from the second vector, and 3 otherwise. */
31167 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
31169 tree list = TREE_VECTOR_CST_ELTS (cst);
31170 unsigned i, nelt = d->nelt;
31173 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
31175 unsigned HOST_WIDE_INT e;
31177 if (!host_integerp (TREE_VALUE (list), 1))
31179 e = tree_low_cst (TREE_VALUE (list), 1);
31183 ret |= (e < nelt ? 1 : 2);
31186 gcc_assert (list == NULL);
31188 /* For all elements from second vector, fold the elements to first. */
31190 for (i = 0; i < nelt; ++i)
31191 d->perm[i] -= nelt;
31197 ix86_expand_vec_perm_builtin (tree exp)
31199 struct expand_vec_perm_d d;
31200 tree arg0, arg1, arg2;
31202 arg0 = CALL_EXPR_ARG (exp, 0);
31203 arg1 = CALL_EXPR_ARG (exp, 1);
31204 arg2 = CALL_EXPR_ARG (exp, 2);
31206 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31207 d.nelt = GET_MODE_NUNITS (d.vmode);
31208 d.testing_p = false;
31209 gcc_assert (VECTOR_MODE_P (d.vmode));
31211 if (TREE_CODE (arg2) != VECTOR_CST)
31213 error_at (EXPR_LOCATION (exp),
31214 "vector permutation requires vector constant");
31218 switch (extract_vec_perm_cst (&d, arg2))
31224 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31228 if (!operand_equal_p (arg0, arg1, 0))
31230 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31231 d.op0 = force_reg (d.vmode, d.op0);
31232 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31233 d.op1 = force_reg (d.vmode, d.op1);
31237 /* The elements of PERM do not suggest that only the first operand
31238 is used, but both operands are identical. Allow easier matching
31239 of the permutation by folding the permutation into the single
31242 unsigned i, nelt = d.nelt;
31243 for (i = 0; i < nelt; ++i)
31244 if (d.perm[i] >= nelt)
31250 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31251 d.op0 = force_reg (d.vmode, d.op0);
31256 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31257 d.op0 = force_reg (d.vmode, d.op0);
31262 d.target = gen_reg_rtx (d.vmode);
31263 if (ix86_expand_vec_perm_builtin_1 (&d))
31266 /* For compiler generated permutations, we should never got here, because
31267 the compiler should also be checking the ok hook. But since this is a
31268 builtin the user has access too, so don't abort. */
31272 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31275 sorry ("vector permutation (%d %d %d %d)",
31276 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31279 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31280 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31281 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31284 sorry ("vector permutation "
31285 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31286 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31287 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31288 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31289 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31292 gcc_unreachable ();
31295 return CONST0_RTX (d.vmode);
31298 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31301 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31303 struct expand_vec_perm_d d;
31307 d.vmode = TYPE_MODE (vec_type);
31308 d.nelt = GET_MODE_NUNITS (d.vmode);
31309 d.testing_p = true;
31311 /* Given sufficient ISA support we can just return true here
31312 for selected vector modes. */
31313 if (GET_MODE_SIZE (d.vmode) == 16)
31315 /* All implementable with a single vpperm insn. */
31318 /* All implementable with 2 pshufb + 1 ior. */
31321 /* All implementable with shufpd or unpck[lh]pd. */
31326 vec_mask = extract_vec_perm_cst (&d, mask);
31328 /* This hook is cannot be called in response to something that the
31329 user does (unlike the builtin expander) so we shouldn't ever see
31330 an error generated from the extract. */
31331 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31332 one_vec = (vec_mask != 3);
31334 /* Implementable with shufps or pshufd. */
31335 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31338 /* Otherwise we have to go through the motions and see if we can
31339 figure out how to generate the requested permutation. */
31340 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31341 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31343 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31346 ret = ix86_expand_vec_perm_builtin_1 (&d);
31353 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
31355 struct expand_vec_perm_d d;
31361 d.vmode = GET_MODE (targ);
31362 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31363 d.testing_p = false;
31365 for (i = 0; i < nelt; ++i)
31366 d.perm[i] = i * 2 + odd;
31368 /* We'll either be able to implement the permutation directly... */
31369 if (expand_vec_perm_1 (&d))
31372 /* ... or we use the special-case patterns. */
31373 expand_vec_perm_even_odd_1 (&d, odd);
31376 /* This function returns the calling abi specific va_list type node.
31377 It returns the FNDECL specific va_list type. */
31380 ix86_fn_abi_va_list (tree fndecl)
31383 return va_list_type_node;
31384 gcc_assert (fndecl != NULL_TREE);
31386 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
31387 return ms_va_list_type_node;
31389 return sysv_va_list_type_node;
31392 /* Returns the canonical va_list type specified by TYPE. If there
31393 is no valid TYPE provided, it return NULL_TREE. */
31396 ix86_canonical_va_list_type (tree type)
31400 /* Resolve references and pointers to va_list type. */
31401 if (TREE_CODE (type) == MEM_REF)
31402 type = TREE_TYPE (type);
31403 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
31404 type = TREE_TYPE (type);
31405 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
31406 type = TREE_TYPE (type);
31410 wtype = va_list_type_node;
31411 gcc_assert (wtype != NULL_TREE);
31413 if (TREE_CODE (wtype) == ARRAY_TYPE)
31415 /* If va_list is an array type, the argument may have decayed
31416 to a pointer type, e.g. by being passed to another function.
31417 In that case, unwrap both types so that we can compare the
31418 underlying records. */
31419 if (TREE_CODE (htype) == ARRAY_TYPE
31420 || POINTER_TYPE_P (htype))
31422 wtype = TREE_TYPE (wtype);
31423 htype = TREE_TYPE (htype);
31426 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31427 return va_list_type_node;
31428 wtype = sysv_va_list_type_node;
31429 gcc_assert (wtype != NULL_TREE);
31431 if (TREE_CODE (wtype) == ARRAY_TYPE)
31433 /* If va_list is an array type, the argument may have decayed
31434 to a pointer type, e.g. by being passed to another function.
31435 In that case, unwrap both types so that we can compare the
31436 underlying records. */
31437 if (TREE_CODE (htype) == ARRAY_TYPE
31438 || POINTER_TYPE_P (htype))
31440 wtype = TREE_TYPE (wtype);
31441 htype = TREE_TYPE (htype);
31444 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31445 return sysv_va_list_type_node;
31446 wtype = ms_va_list_type_node;
31447 gcc_assert (wtype != NULL_TREE);
31449 if (TREE_CODE (wtype) == ARRAY_TYPE)
31451 /* If va_list is an array type, the argument may have decayed
31452 to a pointer type, e.g. by being passed to another function.
31453 In that case, unwrap both types so that we can compare the
31454 underlying records. */
31455 if (TREE_CODE (htype) == ARRAY_TYPE
31456 || POINTER_TYPE_P (htype))
31458 wtype = TREE_TYPE (wtype);
31459 htype = TREE_TYPE (htype);
31462 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31463 return ms_va_list_type_node;
31466 return std_canonical_va_list_type (type);
31469 /* Iterate through the target-specific builtin types for va_list.
31470 IDX denotes the iterator, *PTREE is set to the result type of
31471 the va_list builtin, and *PNAME to its internal type.
31472 Returns zero if there is no element for this index, otherwise
31473 IDX should be increased upon the next call.
31474 Note, do not iterate a base builtin's name like __builtin_va_list.
31475 Used from c_common_nodes_and_builtins. */
31478 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
31488 *ptree = ms_va_list_type_node;
31489 *pname = "__builtin_ms_va_list";
31493 *ptree = sysv_va_list_type_node;
31494 *pname = "__builtin_sysv_va_list";
31502 /* Initialize the GCC target structure. */
31503 #undef TARGET_RETURN_IN_MEMORY
31504 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
31506 #undef TARGET_LEGITIMIZE_ADDRESS
31507 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
31509 #undef TARGET_ATTRIBUTE_TABLE
31510 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
31511 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31512 # undef TARGET_MERGE_DECL_ATTRIBUTES
31513 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
31516 #undef TARGET_COMP_TYPE_ATTRIBUTES
31517 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
31519 #undef TARGET_INIT_BUILTINS
31520 #define TARGET_INIT_BUILTINS ix86_init_builtins
31521 #undef TARGET_BUILTIN_DECL
31522 #define TARGET_BUILTIN_DECL ix86_builtin_decl
31523 #undef TARGET_EXPAND_BUILTIN
31524 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
31526 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
31527 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
31528 ix86_builtin_vectorized_function
31530 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
31531 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
31533 #undef TARGET_BUILTIN_RECIPROCAL
31534 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
31536 #undef TARGET_ASM_FUNCTION_EPILOGUE
31537 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
31539 #undef TARGET_ENCODE_SECTION_INFO
31540 #ifndef SUBTARGET_ENCODE_SECTION_INFO
31541 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
31543 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
31546 #undef TARGET_ASM_OPEN_PAREN
31547 #define TARGET_ASM_OPEN_PAREN ""
31548 #undef TARGET_ASM_CLOSE_PAREN
31549 #define TARGET_ASM_CLOSE_PAREN ""
31551 #undef TARGET_ASM_BYTE_OP
31552 #define TARGET_ASM_BYTE_OP ASM_BYTE
31554 #undef TARGET_ASM_ALIGNED_HI_OP
31555 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
31556 #undef TARGET_ASM_ALIGNED_SI_OP
31557 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
31559 #undef TARGET_ASM_ALIGNED_DI_OP
31560 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
31563 #undef TARGET_PROFILE_BEFORE_PROLOGUE
31564 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
31566 #undef TARGET_ASM_UNALIGNED_HI_OP
31567 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
31568 #undef TARGET_ASM_UNALIGNED_SI_OP
31569 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
31570 #undef TARGET_ASM_UNALIGNED_DI_OP
31571 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
31573 #undef TARGET_PRINT_OPERAND
31574 #define TARGET_PRINT_OPERAND ix86_print_operand
31575 #undef TARGET_PRINT_OPERAND_ADDRESS
31576 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
31577 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
31578 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
31579 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
31580 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA i386_asm_output_addr_const_extra
31582 #undef TARGET_SCHED_ADJUST_COST
31583 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
31584 #undef TARGET_SCHED_ISSUE_RATE
31585 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
31586 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
31587 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
31588 ia32_multipass_dfa_lookahead
31590 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
31591 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
31594 #undef TARGET_HAVE_TLS
31595 #define TARGET_HAVE_TLS true
31597 #undef TARGET_CANNOT_FORCE_CONST_MEM
31598 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
31599 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
31600 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
31602 #undef TARGET_DELEGITIMIZE_ADDRESS
31603 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
31605 #undef TARGET_MS_BITFIELD_LAYOUT_P
31606 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
31609 #undef TARGET_BINDS_LOCAL_P
31610 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
31612 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31613 #undef TARGET_BINDS_LOCAL_P
31614 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
31617 #undef TARGET_ASM_OUTPUT_MI_THUNK
31618 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
31619 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
31620 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
31622 #undef TARGET_ASM_FILE_START
31623 #define TARGET_ASM_FILE_START x86_file_start
31625 #undef TARGET_DEFAULT_TARGET_FLAGS
31626 #define TARGET_DEFAULT_TARGET_FLAGS \
31628 | TARGET_SUBTARGET_DEFAULT \
31629 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
31632 #undef TARGET_HANDLE_OPTION
31633 #define TARGET_HANDLE_OPTION ix86_handle_option
31635 #undef TARGET_REGISTER_MOVE_COST
31636 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
31637 #undef TARGET_MEMORY_MOVE_COST
31638 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
31639 #undef TARGET_RTX_COSTS
31640 #define TARGET_RTX_COSTS ix86_rtx_costs
31641 #undef TARGET_ADDRESS_COST
31642 #define TARGET_ADDRESS_COST ix86_address_cost
31644 #undef TARGET_FIXED_CONDITION_CODE_REGS
31645 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
31646 #undef TARGET_CC_MODES_COMPATIBLE
31647 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
31649 #undef TARGET_MACHINE_DEPENDENT_REORG
31650 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
31652 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
31653 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
31655 #undef TARGET_BUILD_BUILTIN_VA_LIST
31656 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
31658 #undef TARGET_ENUM_VA_LIST_P
31659 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
31661 #undef TARGET_FN_ABI_VA_LIST
31662 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
31664 #undef TARGET_CANONICAL_VA_LIST_TYPE
31665 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
31667 #undef TARGET_EXPAND_BUILTIN_VA_START
31668 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
31670 #undef TARGET_MD_ASM_CLOBBERS
31671 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
31673 #undef TARGET_PROMOTE_PROTOTYPES
31674 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
31675 #undef TARGET_STRUCT_VALUE_RTX
31676 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
31677 #undef TARGET_SETUP_INCOMING_VARARGS
31678 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
31679 #undef TARGET_MUST_PASS_IN_STACK
31680 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
31681 #undef TARGET_FUNCTION_ARG_ADVANCE
31682 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
31683 #undef TARGET_FUNCTION_ARG
31684 #define TARGET_FUNCTION_ARG ix86_function_arg
31685 #undef TARGET_PASS_BY_REFERENCE
31686 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
31687 #undef TARGET_INTERNAL_ARG_POINTER
31688 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
31689 #undef TARGET_UPDATE_STACK_BOUNDARY
31690 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
31691 #undef TARGET_GET_DRAP_RTX
31692 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
31693 #undef TARGET_STRICT_ARGUMENT_NAMING
31694 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
31695 #undef TARGET_STATIC_CHAIN
31696 #define TARGET_STATIC_CHAIN ix86_static_chain
31697 #undef TARGET_TRAMPOLINE_INIT
31698 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
31699 #undef TARGET_RETURN_POPS_ARGS
31700 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
31702 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
31703 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
31705 #undef TARGET_SCALAR_MODE_SUPPORTED_P
31706 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
31708 #undef TARGET_VECTOR_MODE_SUPPORTED_P
31709 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
31711 #undef TARGET_C_MODE_FOR_SUFFIX
31712 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
31715 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
31716 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
31719 #ifdef SUBTARGET_INSERT_ATTRIBUTES
31720 #undef TARGET_INSERT_ATTRIBUTES
31721 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
31724 #undef TARGET_MANGLE_TYPE
31725 #define TARGET_MANGLE_TYPE ix86_mangle_type
31727 #undef TARGET_STACK_PROTECT_FAIL
31728 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
31730 #undef TARGET_FUNCTION_VALUE
31731 #define TARGET_FUNCTION_VALUE ix86_function_value
31733 #undef TARGET_FUNCTION_VALUE_REGNO_P
31734 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
31736 #undef TARGET_SECONDARY_RELOAD
31737 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
31739 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
31740 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
31741 ix86_builtin_vectorization_cost
31742 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
31743 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
31744 ix86_vectorize_builtin_vec_perm
31745 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
31746 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
31747 ix86_vectorize_builtin_vec_perm_ok
31749 #undef TARGET_SET_CURRENT_FUNCTION
31750 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
31752 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
31753 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
31755 #undef TARGET_OPTION_SAVE
31756 #define TARGET_OPTION_SAVE ix86_function_specific_save
31758 #undef TARGET_OPTION_RESTORE
31759 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
31761 #undef TARGET_OPTION_PRINT
31762 #define TARGET_OPTION_PRINT ix86_function_specific_print
31764 #undef TARGET_CAN_INLINE_P
31765 #define TARGET_CAN_INLINE_P ix86_can_inline_p
31767 #undef TARGET_EXPAND_TO_RTL_HOOK
31768 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
31770 #undef TARGET_LEGITIMATE_ADDRESS_P
31771 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
31773 #undef TARGET_IRA_COVER_CLASSES
31774 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
31776 #undef TARGET_FRAME_POINTER_REQUIRED
31777 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
31779 #undef TARGET_CAN_ELIMINATE
31780 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
31782 #undef TARGET_ASM_CODE_END
31783 #define TARGET_ASM_CODE_END ix86_code_end
31785 struct gcc_target targetm = TARGET_INITIALIZER;
31787 #include "gt-i386.h"