1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "hard-reg-set.h"
31 #include "insn-config.h"
32 #include "conditions.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
42 #include "diagnostic-core.h"
44 #include "basic-block.h"
47 #include "target-def.h"
48 #include "langhooks.h"
53 #include "tm-constrs.h"
57 #include "dwarf2out.h"
59 static rtx legitimize_dllimport_symbol (rtx, bool);
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
149 /* Processor costs (relative to an add) */
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
416 (we ensure the alignment). For small blocks inline loop is still a
417 noticeable win, for bigger blocks either rep movsl or rep movsb is
418 way to go. Rep movsb has apparently more expensive startup time in CPU,
419 but after 4K the difference is down in the noise. */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
706 100, /* number of parallel prefetches */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very
715 small blocks it is better to use loop. For large blocks, libcall can
716 do nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
780 MOVD reg64, xmmreg Double FADD 3
782 MOVD reg32, xmmreg Double FADD 3
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
792 100, /* number of parallel prefetches */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
822 struct processor_costs bdver1_cost = {
823 COSTS_N_INSNS (1), /* cost of an add instruction */
824 COSTS_N_INSNS (2), /* cost of a lea instruction */
825 COSTS_N_INSNS (1), /* variable shift costs */
826 COSTS_N_INSNS (1), /* constant shift costs */
827 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
828 COSTS_N_INSNS (4), /* HI */
829 COSTS_N_INSNS (3), /* SI */
830 COSTS_N_INSNS (4), /* DI */
831 COSTS_N_INSNS (5)}, /* other */
832 0, /* cost of multiply per each bit set */
833 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
834 COSTS_N_INSNS (35), /* HI */
835 COSTS_N_INSNS (51), /* SI */
836 COSTS_N_INSNS (83), /* DI */
837 COSTS_N_INSNS (83)}, /* other */
838 COSTS_N_INSNS (1), /* cost of movsx */
839 COSTS_N_INSNS (1), /* cost of movzx */
840 8, /* "large" insn */
842 4, /* cost for loading QImode using movzbl */
843 {3, 4, 3}, /* cost of loading integer registers
844 in QImode, HImode and SImode.
845 Relative to reg-reg move (2). */
846 {3, 4, 3}, /* cost of storing integer registers */
847 4, /* cost of reg,reg fld/fst */
848 {4, 4, 12}, /* cost of loading fp registers
849 in SFmode, DFmode and XFmode */
850 {6, 6, 8}, /* cost of storing fp registers
851 in SFmode, DFmode and XFmode */
852 2, /* cost of moving MMX register */
853 {3, 3}, /* cost of loading MMX registers
854 in SImode and DImode */
855 {4, 4}, /* cost of storing MMX registers
856 in SImode and DImode */
857 2, /* cost of moving SSE register */
858 {4, 4, 3}, /* cost of loading SSE registers
859 in SImode, DImode and TImode */
860 {4, 4, 5}, /* cost of storing SSE registers
861 in SImode, DImode and TImode */
862 3, /* MMX or SSE register to integer */
864 MOVD reg64, xmmreg Double FSTORE 4
865 MOVD reg32, xmmreg Double FSTORE 4
867 MOVD reg64, xmmreg Double FADD 3
869 MOVD reg32, xmmreg Double FADD 3
871 64, /* size of l1 cache. */
872 1024, /* size of l2 cache. */
873 64, /* size of prefetch block */
874 /* New AMD processors never drop prefetches; if they cannot be performed
875 immediately, they are queued. We set number of simultaneous prefetches
876 to a large constant to reflect this (it probably is not a good idea not
877 to limit number of prefetches at all, as their execution also takes some
879 100, /* number of parallel prefetches */
881 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
882 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
883 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
884 COSTS_N_INSNS (2), /* cost of FABS instruction. */
885 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
886 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
888 /* BDVER1 has optimized REP instruction for medium sized blocks, but for
889 very small blocks it is better to use loop. For large blocks, libcall
890 can do nontemporary accesses and beat inline considerably. */
891 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
892 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
893 {{libcall, {{8, loop}, {24, unrolled_loop},
894 {2048, rep_prefix_4_byte}, {-1, libcall}}},
895 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
896 4, /* scalar_stmt_cost. */
897 2, /* scalar load_cost. */
898 2, /* scalar_store_cost. */
899 6, /* vec_stmt_cost. */
900 0, /* vec_to_scalar_cost. */
901 2, /* scalar_to_vec_cost. */
902 2, /* vec_align_load_cost. */
903 2, /* vec_unalign_load_cost. */
904 2, /* vec_store_cost. */
905 2, /* cond_taken_branch_cost. */
906 1, /* cond_not_taken_branch_cost. */
910 struct processor_costs pentium4_cost = {
911 COSTS_N_INSNS (1), /* cost of an add instruction */
912 COSTS_N_INSNS (3), /* cost of a lea instruction */
913 COSTS_N_INSNS (4), /* variable shift costs */
914 COSTS_N_INSNS (4), /* constant shift costs */
915 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
916 COSTS_N_INSNS (15), /* HI */
917 COSTS_N_INSNS (15), /* SI */
918 COSTS_N_INSNS (15), /* DI */
919 COSTS_N_INSNS (15)}, /* other */
920 0, /* cost of multiply per each bit set */
921 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
922 COSTS_N_INSNS (56), /* HI */
923 COSTS_N_INSNS (56), /* SI */
924 COSTS_N_INSNS (56), /* DI */
925 COSTS_N_INSNS (56)}, /* other */
926 COSTS_N_INSNS (1), /* cost of movsx */
927 COSTS_N_INSNS (1), /* cost of movzx */
928 16, /* "large" insn */
930 2, /* cost for loading QImode using movzbl */
931 {4, 5, 4}, /* cost of loading integer registers
932 in QImode, HImode and SImode.
933 Relative to reg-reg move (2). */
934 {2, 3, 2}, /* cost of storing integer registers */
935 2, /* cost of reg,reg fld/fst */
936 {2, 2, 6}, /* cost of loading fp registers
937 in SFmode, DFmode and XFmode */
938 {4, 4, 6}, /* cost of storing fp registers
939 in SFmode, DFmode and XFmode */
940 2, /* cost of moving MMX register */
941 {2, 2}, /* cost of loading MMX registers
942 in SImode and DImode */
943 {2, 2}, /* cost of storing MMX registers
944 in SImode and DImode */
945 12, /* cost of moving SSE register */
946 {12, 12, 12}, /* cost of loading SSE registers
947 in SImode, DImode and TImode */
948 {2, 2, 8}, /* cost of storing SSE registers
949 in SImode, DImode and TImode */
950 10, /* MMX or SSE register to integer */
951 8, /* size of l1 cache. */
952 256, /* size of l2 cache. */
953 64, /* size of prefetch block */
954 6, /* number of parallel prefetches */
956 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
957 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
958 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
959 COSTS_N_INSNS (2), /* cost of FABS instruction. */
960 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
961 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
962 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
963 DUMMY_STRINGOP_ALGS},
964 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
966 DUMMY_STRINGOP_ALGS},
967 1, /* scalar_stmt_cost. */
968 1, /* scalar load_cost. */
969 1, /* scalar_store_cost. */
970 1, /* vec_stmt_cost. */
971 1, /* vec_to_scalar_cost. */
972 1, /* scalar_to_vec_cost. */
973 1, /* vec_align_load_cost. */
974 2, /* vec_unalign_load_cost. */
975 1, /* vec_store_cost. */
976 3, /* cond_taken_branch_cost. */
977 1, /* cond_not_taken_branch_cost. */
981 struct processor_costs nocona_cost = {
982 COSTS_N_INSNS (1), /* cost of an add instruction */
983 COSTS_N_INSNS (1), /* cost of a lea instruction */
984 COSTS_N_INSNS (1), /* variable shift costs */
985 COSTS_N_INSNS (1), /* constant shift costs */
986 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
987 COSTS_N_INSNS (10), /* HI */
988 COSTS_N_INSNS (10), /* SI */
989 COSTS_N_INSNS (10), /* DI */
990 COSTS_N_INSNS (10)}, /* other */
991 0, /* cost of multiply per each bit set */
992 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
993 COSTS_N_INSNS (66), /* HI */
994 COSTS_N_INSNS (66), /* SI */
995 COSTS_N_INSNS (66), /* DI */
996 COSTS_N_INSNS (66)}, /* other */
997 COSTS_N_INSNS (1), /* cost of movsx */
998 COSTS_N_INSNS (1), /* cost of movzx */
999 16, /* "large" insn */
1000 17, /* MOVE_RATIO */
1001 4, /* cost for loading QImode using movzbl */
1002 {4, 4, 4}, /* cost of loading integer registers
1003 in QImode, HImode and SImode.
1004 Relative to reg-reg move (2). */
1005 {4, 4, 4}, /* cost of storing integer registers */
1006 3, /* cost of reg,reg fld/fst */
1007 {12, 12, 12}, /* cost of loading fp registers
1008 in SFmode, DFmode and XFmode */
1009 {4, 4, 4}, /* cost of storing fp registers
1010 in SFmode, DFmode and XFmode */
1011 6, /* cost of moving MMX register */
1012 {12, 12}, /* cost of loading MMX registers
1013 in SImode and DImode */
1014 {12, 12}, /* cost of storing MMX registers
1015 in SImode and DImode */
1016 6, /* cost of moving SSE register */
1017 {12, 12, 12}, /* cost of loading SSE registers
1018 in SImode, DImode and TImode */
1019 {12, 12, 12}, /* cost of storing SSE registers
1020 in SImode, DImode and TImode */
1021 8, /* MMX or SSE register to integer */
1022 8, /* size of l1 cache. */
1023 1024, /* size of l2 cache. */
1024 128, /* size of prefetch block */
1025 8, /* number of parallel prefetches */
1026 1, /* Branch cost */
1027 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
1028 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1029 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
1030 COSTS_N_INSNS (3), /* cost of FABS instruction. */
1031 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
1032 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
1033 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
1034 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
1035 {100000, unrolled_loop}, {-1, libcall}}}},
1036 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
1038 {libcall, {{24, loop}, {64, unrolled_loop},
1039 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1040 1, /* scalar_stmt_cost. */
1041 1, /* scalar load_cost. */
1042 1, /* scalar_store_cost. */
1043 1, /* vec_stmt_cost. */
1044 1, /* vec_to_scalar_cost. */
1045 1, /* scalar_to_vec_cost. */
1046 1, /* vec_align_load_cost. */
1047 2, /* vec_unalign_load_cost. */
1048 1, /* vec_store_cost. */
1049 3, /* cond_taken_branch_cost. */
1050 1, /* cond_not_taken_branch_cost. */
1054 struct processor_costs core2_cost = {
1055 COSTS_N_INSNS (1), /* cost of an add instruction */
1056 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1057 COSTS_N_INSNS (1), /* variable shift costs */
1058 COSTS_N_INSNS (1), /* constant shift costs */
1059 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1060 COSTS_N_INSNS (3), /* HI */
1061 COSTS_N_INSNS (3), /* SI */
1062 COSTS_N_INSNS (3), /* DI */
1063 COSTS_N_INSNS (3)}, /* other */
1064 0, /* cost of multiply per each bit set */
1065 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
1066 COSTS_N_INSNS (22), /* HI */
1067 COSTS_N_INSNS (22), /* SI */
1068 COSTS_N_INSNS (22), /* DI */
1069 COSTS_N_INSNS (22)}, /* other */
1070 COSTS_N_INSNS (1), /* cost of movsx */
1071 COSTS_N_INSNS (1), /* cost of movzx */
1072 8, /* "large" insn */
1073 16, /* MOVE_RATIO */
1074 2, /* cost for loading QImode using movzbl */
1075 {6, 6, 6}, /* cost of loading integer registers
1076 in QImode, HImode and SImode.
1077 Relative to reg-reg move (2). */
1078 {4, 4, 4}, /* cost of storing integer registers */
1079 2, /* cost of reg,reg fld/fst */
1080 {6, 6, 6}, /* cost of loading fp registers
1081 in SFmode, DFmode and XFmode */
1082 {4, 4, 4}, /* cost of storing fp registers
1083 in SFmode, DFmode and XFmode */
1084 2, /* cost of moving MMX register */
1085 {6, 6}, /* cost of loading MMX registers
1086 in SImode and DImode */
1087 {4, 4}, /* cost of storing MMX registers
1088 in SImode and DImode */
1089 2, /* cost of moving SSE register */
1090 {6, 6, 6}, /* cost of loading SSE registers
1091 in SImode, DImode and TImode */
1092 {4, 4, 4}, /* cost of storing SSE registers
1093 in SImode, DImode and TImode */
1094 2, /* MMX or SSE register to integer */
1095 32, /* size of l1 cache. */
1096 2048, /* size of l2 cache. */
1097 128, /* size of prefetch block */
1098 8, /* number of parallel prefetches */
1099 3, /* Branch cost */
1100 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1101 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1102 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1103 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1104 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1105 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1106 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1107 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1108 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1109 {{libcall, {{8, loop}, {15, unrolled_loop},
1110 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1111 {libcall, {{24, loop}, {32, unrolled_loop},
1112 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1113 1, /* scalar_stmt_cost. */
1114 1, /* scalar load_cost. */
1115 1, /* scalar_store_cost. */
1116 1, /* vec_stmt_cost. */
1117 1, /* vec_to_scalar_cost. */
1118 1, /* scalar_to_vec_cost. */
1119 1, /* vec_align_load_cost. */
1120 2, /* vec_unalign_load_cost. */
1121 1, /* vec_store_cost. */
1122 3, /* cond_taken_branch_cost. */
1123 1, /* cond_not_taken_branch_cost. */
1127 struct processor_costs atom_cost = {
1128 COSTS_N_INSNS (1), /* cost of an add instruction */
1129 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1130 COSTS_N_INSNS (1), /* variable shift costs */
1131 COSTS_N_INSNS (1), /* constant shift costs */
1132 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1133 COSTS_N_INSNS (4), /* HI */
1134 COSTS_N_INSNS (3), /* SI */
1135 COSTS_N_INSNS (4), /* DI */
1136 COSTS_N_INSNS (2)}, /* other */
1137 0, /* cost of multiply per each bit set */
1138 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1139 COSTS_N_INSNS (26), /* HI */
1140 COSTS_N_INSNS (42), /* SI */
1141 COSTS_N_INSNS (74), /* DI */
1142 COSTS_N_INSNS (74)}, /* other */
1143 COSTS_N_INSNS (1), /* cost of movsx */
1144 COSTS_N_INSNS (1), /* cost of movzx */
1145 8, /* "large" insn */
1146 17, /* MOVE_RATIO */
1147 2, /* cost for loading QImode using movzbl */
1148 {4, 4, 4}, /* cost of loading integer registers
1149 in QImode, HImode and SImode.
1150 Relative to reg-reg move (2). */
1151 {4, 4, 4}, /* cost of storing integer registers */
1152 4, /* cost of reg,reg fld/fst */
1153 {12, 12, 12}, /* cost of loading fp registers
1154 in SFmode, DFmode and XFmode */
1155 {6, 6, 8}, /* cost of storing fp registers
1156 in SFmode, DFmode and XFmode */
1157 2, /* cost of moving MMX register */
1158 {8, 8}, /* cost of loading MMX registers
1159 in SImode and DImode */
1160 {8, 8}, /* cost of storing MMX registers
1161 in SImode and DImode */
1162 2, /* cost of moving SSE register */
1163 {8, 8, 8}, /* cost of loading SSE registers
1164 in SImode, DImode and TImode */
1165 {8, 8, 8}, /* cost of storing SSE registers
1166 in SImode, DImode and TImode */
1167 5, /* MMX or SSE register to integer */
1168 32, /* size of l1 cache. */
1169 256, /* size of l2 cache. */
1170 64, /* size of prefetch block */
1171 6, /* number of parallel prefetches */
1172 3, /* Branch cost */
1173 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1174 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1175 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1176 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1177 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1178 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1179 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1180 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1181 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1182 {{libcall, {{8, loop}, {15, unrolled_loop},
1183 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1184 {libcall, {{24, loop}, {32, unrolled_loop},
1185 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1186 1, /* scalar_stmt_cost. */
1187 1, /* scalar load_cost. */
1188 1, /* scalar_store_cost. */
1189 1, /* vec_stmt_cost. */
1190 1, /* vec_to_scalar_cost. */
1191 1, /* scalar_to_vec_cost. */
1192 1, /* vec_align_load_cost. */
1193 2, /* vec_unalign_load_cost. */
1194 1, /* vec_store_cost. */
1195 3, /* cond_taken_branch_cost. */
1196 1, /* cond_not_taken_branch_cost. */
1199 /* Generic64 should produce code tuned for Nocona and K8. */
1201 struct processor_costs generic64_cost = {
1202 COSTS_N_INSNS (1), /* cost of an add instruction */
1203 /* On all chips taken into consideration lea is 2 cycles and more. With
1204 this cost however our current implementation of synth_mult results in
1205 use of unnecessary temporary registers causing regression on several
1206 SPECfp benchmarks. */
1207 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1208 COSTS_N_INSNS (1), /* variable shift costs */
1209 COSTS_N_INSNS (1), /* constant shift costs */
1210 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1211 COSTS_N_INSNS (4), /* HI */
1212 COSTS_N_INSNS (3), /* SI */
1213 COSTS_N_INSNS (4), /* DI */
1214 COSTS_N_INSNS (2)}, /* other */
1215 0, /* cost of multiply per each bit set */
1216 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1217 COSTS_N_INSNS (26), /* HI */
1218 COSTS_N_INSNS (42), /* SI */
1219 COSTS_N_INSNS (74), /* DI */
1220 COSTS_N_INSNS (74)}, /* other */
1221 COSTS_N_INSNS (1), /* cost of movsx */
1222 COSTS_N_INSNS (1), /* cost of movzx */
1223 8, /* "large" insn */
1224 17, /* MOVE_RATIO */
1225 4, /* cost for loading QImode using movzbl */
1226 {4, 4, 4}, /* cost of loading integer registers
1227 in QImode, HImode and SImode.
1228 Relative to reg-reg move (2). */
1229 {4, 4, 4}, /* cost of storing integer registers */
1230 4, /* cost of reg,reg fld/fst */
1231 {12, 12, 12}, /* cost of loading fp registers
1232 in SFmode, DFmode and XFmode */
1233 {6, 6, 8}, /* cost of storing fp registers
1234 in SFmode, DFmode and XFmode */
1235 2, /* cost of moving MMX register */
1236 {8, 8}, /* cost of loading MMX registers
1237 in SImode and DImode */
1238 {8, 8}, /* cost of storing MMX registers
1239 in SImode and DImode */
1240 2, /* cost of moving SSE register */
1241 {8, 8, 8}, /* cost of loading SSE registers
1242 in SImode, DImode and TImode */
1243 {8, 8, 8}, /* cost of storing SSE registers
1244 in SImode, DImode and TImode */
1245 5, /* MMX or SSE register to integer */
1246 32, /* size of l1 cache. */
1247 512, /* size of l2 cache. */
1248 64, /* size of prefetch block */
1249 6, /* number of parallel prefetches */
1250 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
1251 value is increased to perhaps more appropriate value of 5. */
1252 3, /* Branch cost */
1253 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1254 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1255 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1256 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1257 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1258 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1259 {DUMMY_STRINGOP_ALGS,
1260 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1261 {DUMMY_STRINGOP_ALGS,
1262 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1263 1, /* scalar_stmt_cost. */
1264 1, /* scalar load_cost. */
1265 1, /* scalar_store_cost. */
1266 1, /* vec_stmt_cost. */
1267 1, /* vec_to_scalar_cost. */
1268 1, /* scalar_to_vec_cost. */
1269 1, /* vec_align_load_cost. */
1270 2, /* vec_unalign_load_cost. */
1271 1, /* vec_store_cost. */
1272 3, /* cond_taken_branch_cost. */
1273 1, /* cond_not_taken_branch_cost. */
1276 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
1279 struct processor_costs generic32_cost = {
1280 COSTS_N_INSNS (1), /* cost of an add instruction */
1281 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1282 COSTS_N_INSNS (1), /* variable shift costs */
1283 COSTS_N_INSNS (1), /* constant shift costs */
1284 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1285 COSTS_N_INSNS (4), /* HI */
1286 COSTS_N_INSNS (3), /* SI */
1287 COSTS_N_INSNS (4), /* DI */
1288 COSTS_N_INSNS (2)}, /* other */
1289 0, /* cost of multiply per each bit set */
1290 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1291 COSTS_N_INSNS (26), /* HI */
1292 COSTS_N_INSNS (42), /* SI */
1293 COSTS_N_INSNS (74), /* DI */
1294 COSTS_N_INSNS (74)}, /* other */
1295 COSTS_N_INSNS (1), /* cost of movsx */
1296 COSTS_N_INSNS (1), /* cost of movzx */
1297 8, /* "large" insn */
1298 17, /* MOVE_RATIO */
1299 4, /* cost for loading QImode using movzbl */
1300 {4, 4, 4}, /* cost of loading integer registers
1301 in QImode, HImode and SImode.
1302 Relative to reg-reg move (2). */
1303 {4, 4, 4}, /* cost of storing integer registers */
1304 4, /* cost of reg,reg fld/fst */
1305 {12, 12, 12}, /* cost of loading fp registers
1306 in SFmode, DFmode and XFmode */
1307 {6, 6, 8}, /* cost of storing fp registers
1308 in SFmode, DFmode and XFmode */
1309 2, /* cost of moving MMX register */
1310 {8, 8}, /* cost of loading MMX registers
1311 in SImode and DImode */
1312 {8, 8}, /* cost of storing MMX registers
1313 in SImode and DImode */
1314 2, /* cost of moving SSE register */
1315 {8, 8, 8}, /* cost of loading SSE registers
1316 in SImode, DImode and TImode */
1317 {8, 8, 8}, /* cost of storing SSE registers
1318 in SImode, DImode and TImode */
1319 5, /* MMX or SSE register to integer */
1320 32, /* size of l1 cache. */
1321 256, /* size of l2 cache. */
1322 64, /* size of prefetch block */
1323 6, /* number of parallel prefetches */
1324 3, /* Branch cost */
1325 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1326 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1327 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1328 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1329 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1330 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1331 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1332 DUMMY_STRINGOP_ALGS},
1333 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1334 DUMMY_STRINGOP_ALGS},
1335 1, /* scalar_stmt_cost. */
1336 1, /* scalar load_cost. */
1337 1, /* scalar_store_cost. */
1338 1, /* vec_stmt_cost. */
1339 1, /* vec_to_scalar_cost. */
1340 1, /* scalar_to_vec_cost. */
1341 1, /* vec_align_load_cost. */
1342 2, /* vec_unalign_load_cost. */
1343 1, /* vec_store_cost. */
1344 3, /* cond_taken_branch_cost. */
1345 1, /* cond_not_taken_branch_cost. */
1348 const struct processor_costs *ix86_cost = &pentium_cost;
1350 /* Processor feature/optimization bitmasks. */
1351 #define m_386 (1<<PROCESSOR_I386)
1352 #define m_486 (1<<PROCESSOR_I486)
1353 #define m_PENT (1<<PROCESSOR_PENTIUM)
1354 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1355 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1356 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1357 #define m_CORE2 (1<<PROCESSOR_CORE2)
1358 #define m_ATOM (1<<PROCESSOR_ATOM)
1360 #define m_GEODE (1<<PROCESSOR_GEODE)
1361 #define m_K6 (1<<PROCESSOR_K6)
1362 #define m_K6_GEODE (m_K6 | m_GEODE)
1363 #define m_K8 (1<<PROCESSOR_K8)
1364 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1365 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1366 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1367 #define m_BDVER1 (1<<PROCESSOR_BDVER1)
1368 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1)
1370 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1371 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1373 /* Generic instruction choice should be common subset of supported CPUs
1374 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1375 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1377 /* Feature tests against the various tunings. */
1378 unsigned char ix86_tune_features[X86_TUNE_LAST];
1380 /* Feature tests against the various tunings used to create ix86_tune_features
1381 based on the processor mask. */
1382 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1383 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1384 negatively, so enabling for Generic64 seems like good code size
1385 tradeoff. We can't enable it for 32bit generic because it does not
1386 work well with PPro base chips. */
1387 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1389 /* X86_TUNE_PUSH_MEMORY */
1390 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1391 | m_NOCONA | m_CORE2 | m_GENERIC,
1393 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1396 /* X86_TUNE_UNROLL_STRLEN */
1397 m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6
1398 | m_CORE2 | m_GENERIC,
1400 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1401 m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1403 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1404 on simulation result. But after P4 was made, no performance benefit
1405 was observed with branch hints. It also increases the code size.
1406 As a result, icc never generates branch hints. */
1409 /* X86_TUNE_DOUBLE_WITH_ADD */
1412 /* X86_TUNE_USE_SAHF */
1413 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4
1414 | m_NOCONA | m_CORE2 | m_GENERIC,
1416 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1417 partial dependencies. */
1418 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA
1419 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1421 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1422 register stalls on Generic32 compilation setting as well. However
1423 in current implementation the partial register stalls are not eliminated
1424 very well - they can be introduced via subregs synthesized by combine
1425 and can happen in caller/callee saving sequences. Because this option
1426 pays back little on PPro based chips and is in conflict with partial reg
1427 dependencies used by Athlon/P4 based chips, it is better to leave it off
1428 for generic32 for now. */
1431 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1432 m_CORE2 | m_GENERIC,
1434 /* X86_TUNE_USE_HIMODE_FIOP */
1435 m_386 | m_486 | m_K6_GEODE,
1437 /* X86_TUNE_USE_SIMODE_FIOP */
1438 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_ATOM | m_CORE2 | m_GENERIC),
1440 /* X86_TUNE_USE_MOV0 */
1443 /* X86_TUNE_USE_CLTD */
1444 ~(m_PENT | m_ATOM | m_K6 | m_CORE2 | m_GENERIC),
1446 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1449 /* X86_TUNE_SPLIT_LONG_MOVES */
1452 /* X86_TUNE_READ_MODIFY_WRITE */
1455 /* X86_TUNE_READ_MODIFY */
1458 /* X86_TUNE_PROMOTE_QIMODE */
1459 m_K6_GEODE | m_PENT | m_ATOM | m_386 | m_486 | m_AMD_MULTIPLE
1460 | m_CORE2 | m_GENERIC /* | m_PENT4 ? */,
1462 /* X86_TUNE_FAST_PREFIX */
1463 ~(m_PENT | m_486 | m_386),
1465 /* X86_TUNE_SINGLE_STRINGOP */
1466 m_386 | m_PENT4 | m_NOCONA,
1468 /* X86_TUNE_QIMODE_MATH */
1471 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1472 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1473 might be considered for Generic32 if our scheme for avoiding partial
1474 stalls was more effective. */
1477 /* X86_TUNE_PROMOTE_QI_REGS */
1480 /* X86_TUNE_PROMOTE_HI_REGS */
1483 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1484 m_ATOM | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA
1485 | m_CORE2 | m_GENERIC,
1487 /* X86_TUNE_ADD_ESP_8 */
1488 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_K6_GEODE | m_386
1489 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1491 /* X86_TUNE_SUB_ESP_4 */
1492 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2
1495 /* X86_TUNE_SUB_ESP_8 */
1496 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_386 | m_486
1497 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1499 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1500 for DFmode copies */
1501 ~(m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1502 | m_GENERIC | m_GEODE),
1504 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1505 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1507 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1508 conflict here in between PPro/Pentium4 based chips that thread 128bit
1509 SSE registers as single units versus K8 based chips that divide SSE
1510 registers to two 64bit halves. This knob promotes all store destinations
1511 to be 128bit to allow register renaming on 128bit SSE units, but usually
1512 results in one extra microop on 64bit SSE units. Experimental results
1513 shows that disabling this option on P4 brings over 20% SPECfp regression,
1514 while enabling it on K8 brings roughly 2.4% regression that can be partly
1515 masked by careful scheduling of moves. */
1516 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1517 | m_AMDFAM10 | m_BDVER1,
1519 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
1520 m_AMDFAM10 | m_BDVER1,
1522 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
1525 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
1528 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1529 are resolved on SSE register parts instead of whole registers, so we may
1530 maintain just lower part of scalar values in proper format leaving the
1531 upper part undefined. */
1534 /* X86_TUNE_SSE_TYPELESS_STORES */
1537 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1538 m_PPRO | m_PENT4 | m_NOCONA,
1540 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1541 m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1543 /* X86_TUNE_PROLOGUE_USING_MOVE */
1544 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1546 /* X86_TUNE_EPILOGUE_USING_MOVE */
1547 m_ATHLON_K8 | m_ATOM | m_PPRO | m_CORE2 | m_GENERIC,
1549 /* X86_TUNE_SHIFT1 */
1552 /* X86_TUNE_USE_FFREEP */
1555 /* X86_TUNE_INTER_UNIT_MOVES */
1556 ~(m_AMD_MULTIPLE | m_GENERIC),
1558 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1559 ~(m_AMDFAM10 | m_BDVER1),
1561 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1562 than 4 branch instructions in the 16 byte window. */
1563 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2
1566 /* X86_TUNE_SCHEDULE */
1567 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_ATOM | m_CORE2
1570 /* X86_TUNE_USE_BT */
1571 m_AMD_MULTIPLE | m_ATOM | m_CORE2 | m_GENERIC,
1573 /* X86_TUNE_USE_INCDEC */
1574 ~(m_PENT4 | m_NOCONA | m_GENERIC | m_ATOM),
1576 /* X86_TUNE_PAD_RETURNS */
1577 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1579 /* X86_TUNE_EXT_80387_CONSTANTS */
1580 m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO
1581 | m_CORE2 | m_GENERIC,
1583 /* X86_TUNE_SHORTEN_X87_SSE */
1586 /* X86_TUNE_AVOID_VECTOR_DECODE */
1589 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1590 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1593 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1594 vector path on AMD machines. */
1595 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1597 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1599 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1,
1601 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1605 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1606 but one byte longer. */
1609 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1610 operand that cannot be represented using a modRM byte. The XOR
1611 replacement is long decoded, so this split helps here as well. */
1614 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1616 m_AMDFAM10 | m_GENERIC,
1618 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1619 from integer to FP. */
1622 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1623 with a subsequent conditional jump instruction into a single
1624 compare-and-branch uop. */
1627 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
1628 will impact LEA instruction selection. */
1632 /* Feature tests against the various architecture variations. */
1633 unsigned char ix86_arch_features[X86_ARCH_LAST];
1635 /* Feature tests against the various architecture variations, used to create
1636 ix86_arch_features based on the processor mask. */
1637 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1638 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1639 ~(m_386 | m_486 | m_PENT | m_K6),
1641 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1644 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1647 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1650 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1654 static const unsigned int x86_accumulate_outgoing_args
1655 = m_AMD_MULTIPLE | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1658 static const unsigned int x86_arch_always_fancy_math_387
1659 = m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1660 | m_NOCONA | m_CORE2 | m_GENERIC;
1662 static enum stringop_alg stringop_alg = no_stringop;
1664 /* In case the average insn count for single function invocation is
1665 lower than this constant, emit fast (but longer) prologue and
1667 #define FAST_PROLOGUE_INSN_COUNT 20
1669 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1670 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1671 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1672 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1674 /* Array of the smallest class containing reg number REGNO, indexed by
1675 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1677 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1679 /* ax, dx, cx, bx */
1680 AREG, DREG, CREG, BREG,
1681 /* si, di, bp, sp */
1682 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1684 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1685 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1688 /* flags, fpsr, fpcr, frame */
1689 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1691 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1694 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1697 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1698 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1699 /* SSE REX registers */
1700 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1704 /* The "default" register map used in 32bit mode. */
1706 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1708 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1709 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1710 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1711 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1712 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1713 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1714 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1717 /* The "default" register map used in 64bit mode. */
1719 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1721 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1722 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1723 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1724 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1725 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1726 8,9,10,11,12,13,14,15, /* extended integer registers */
1727 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1730 /* Define the register numbers to be used in Dwarf debugging information.
1731 The SVR4 reference port C compiler uses the following register numbers
1732 in its Dwarf output code:
1733 0 for %eax (gcc regno = 0)
1734 1 for %ecx (gcc regno = 2)
1735 2 for %edx (gcc regno = 1)
1736 3 for %ebx (gcc regno = 3)
1737 4 for %esp (gcc regno = 7)
1738 5 for %ebp (gcc regno = 6)
1739 6 for %esi (gcc regno = 4)
1740 7 for %edi (gcc regno = 5)
1741 The following three DWARF register numbers are never generated by
1742 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1743 believes these numbers have these meanings.
1744 8 for %eip (no gcc equivalent)
1745 9 for %eflags (gcc regno = 17)
1746 10 for %trapno (no gcc equivalent)
1747 It is not at all clear how we should number the FP stack registers
1748 for the x86 architecture. If the version of SDB on x86/svr4 were
1749 a bit less brain dead with respect to floating-point then we would
1750 have a precedent to follow with respect to DWARF register numbers
1751 for x86 FP registers, but the SDB on x86/svr4 is so completely
1752 broken with respect to FP registers that it is hardly worth thinking
1753 of it as something to strive for compatibility with.
1754 The version of x86/svr4 SDB I have at the moment does (partially)
1755 seem to believe that DWARF register number 11 is associated with
1756 the x86 register %st(0), but that's about all. Higher DWARF
1757 register numbers don't seem to be associated with anything in
1758 particular, and even for DWARF regno 11, SDB only seems to under-
1759 stand that it should say that a variable lives in %st(0) (when
1760 asked via an `=' command) if we said it was in DWARF regno 11,
1761 but SDB still prints garbage when asked for the value of the
1762 variable in question (via a `/' command).
1763 (Also note that the labels SDB prints for various FP stack regs
1764 when doing an `x' command are all wrong.)
1765 Note that these problems generally don't affect the native SVR4
1766 C compiler because it doesn't allow the use of -O with -g and
1767 because when it is *not* optimizing, it allocates a memory
1768 location for each floating-point variable, and the memory
1769 location is what gets described in the DWARF AT_location
1770 attribute for the variable in question.
1771 Regardless of the severe mental illness of the x86/svr4 SDB, we
1772 do something sensible here and we use the following DWARF
1773 register numbers. Note that these are all stack-top-relative
1775 11 for %st(0) (gcc regno = 8)
1776 12 for %st(1) (gcc regno = 9)
1777 13 for %st(2) (gcc regno = 10)
1778 14 for %st(3) (gcc regno = 11)
1779 15 for %st(4) (gcc regno = 12)
1780 16 for %st(5) (gcc regno = 13)
1781 17 for %st(6) (gcc regno = 14)
1782 18 for %st(7) (gcc regno = 15)
1784 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1786 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1787 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1788 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1789 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1790 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1791 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1792 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1795 /* Define parameter passing and return registers. */
1797 static int const x86_64_int_parameter_registers[6] =
1799 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1802 static int const x86_64_ms_abi_int_parameter_registers[4] =
1804 CX_REG, DX_REG, R8_REG, R9_REG
1807 static int const x86_64_int_return_registers[4] =
1809 AX_REG, DX_REG, DI_REG, SI_REG
1812 /* Define the structure for the machine field in struct function. */
1814 struct GTY(()) stack_local_entry {
1815 unsigned short mode;
1818 struct stack_local_entry *next;
1821 /* Structure describing stack frame layout.
1822 Stack grows downward:
1828 saved frame pointer if frame_pointer_needed
1829 <- HARD_FRAME_POINTER
1838 [va_arg registers] (
1839 > to_allocate <- FRAME_POINTER
1852 HOST_WIDE_INT frame;
1854 int outgoing_arguments_size;
1856 HOST_WIDE_INT to_allocate;
1857 /* The offsets relative to ARG_POINTER. */
1858 HOST_WIDE_INT frame_pointer_offset;
1859 HOST_WIDE_INT hard_frame_pointer_offset;
1860 HOST_WIDE_INT stack_pointer_offset;
1862 /* When save_regs_using_mov is set, emit prologue using
1863 move instead of push instructions. */
1864 bool save_regs_using_mov;
1867 /* Code model option. */
1868 enum cmodel ix86_cmodel;
1870 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1872 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1874 /* Which unit we are generating floating point math for. */
1875 enum fpmath_unit ix86_fpmath;
1877 /* Which cpu are we scheduling for. */
1878 enum attr_cpu ix86_schedule;
1880 /* Which cpu are we optimizing for. */
1881 enum processor_type ix86_tune;
1883 /* Which instruction set architecture to use. */
1884 enum processor_type ix86_arch;
1886 /* true if sse prefetch instruction is not NOOP. */
1887 int x86_prefetch_sse;
1889 /* ix86_regparm_string as a number */
1890 static int ix86_regparm;
1892 /* -mstackrealign option */
1893 extern int ix86_force_align_arg_pointer;
1894 static const char ix86_force_align_arg_pointer_string[]
1895 = "force_align_arg_pointer";
1897 static rtx (*ix86_gen_leave) (void);
1898 static rtx (*ix86_gen_pop1) (rtx);
1899 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1900 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1901 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx, rtx);
1902 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1903 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1904 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1905 static rtx (*ix86_gen_allocate_stack_worker) (rtx, rtx);
1906 static rtx (*ix86_gen_adjust_stack_and_probe) (rtx, rtx, rtx);
1907 static rtx (*ix86_gen_probe_stack_range) (rtx, rtx, rtx);
1909 /* Preferred alignment for stack boundary in bits. */
1910 unsigned int ix86_preferred_stack_boundary;
1912 /* Alignment for incoming stack boundary in bits specified at
1914 static unsigned int ix86_user_incoming_stack_boundary;
1916 /* Default alignment for incoming stack boundary in bits. */
1917 static unsigned int ix86_default_incoming_stack_boundary;
1919 /* Alignment for incoming stack boundary in bits. */
1920 unsigned int ix86_incoming_stack_boundary;
1922 /* The abi used by target. */
1923 enum calling_abi ix86_abi;
1925 /* Values 1-5: see jump.c */
1926 int ix86_branch_cost;
1928 /* Calling abi specific va_list type nodes. */
1929 static GTY(()) tree sysv_va_list_type_node;
1930 static GTY(()) tree ms_va_list_type_node;
1932 /* Variables which are this size or smaller are put in the data/bss
1933 or ldata/lbss sections. */
1935 int ix86_section_threshold = 65536;
1937 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1938 char internal_label_prefix[16];
1939 int internal_label_prefix_len;
1941 /* Fence to use after loop using movnt. */
1944 /* Register class used for passing given 64bit part of the argument.
1945 These represent classes as documented by the PS ABI, with the exception
1946 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1947 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1949 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1950 whenever possible (upper half does contain padding). */
1951 enum x86_64_reg_class
1954 X86_64_INTEGER_CLASS,
1955 X86_64_INTEGERSI_CLASS,
1962 X86_64_COMPLEX_X87_CLASS,
1966 #define MAX_CLASSES 4
1968 /* Table of constants used by fldpi, fldln2, etc.... */
1969 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1970 static bool ext_80387_constants_init = 0;
1973 static struct machine_function * ix86_init_machine_status (void);
1974 static rtx ix86_function_value (const_tree, const_tree, bool);
1975 static bool ix86_function_value_regno_p (const unsigned int);
1976 static rtx ix86_static_chain (const_tree, bool);
1977 static int ix86_function_regparm (const_tree, const_tree);
1978 static void ix86_compute_frame_layout (struct ix86_frame *);
1979 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1981 static void ix86_add_new_builtins (int);
1982 static rtx ix86_expand_vec_perm_builtin (tree);
1983 static tree ix86_canonical_va_list_type (tree);
1985 enum ix86_function_specific_strings
1987 IX86_FUNCTION_SPECIFIC_ARCH,
1988 IX86_FUNCTION_SPECIFIC_TUNE,
1989 IX86_FUNCTION_SPECIFIC_FPMATH,
1990 IX86_FUNCTION_SPECIFIC_MAX
1993 static char *ix86_target_string (int, int, const char *, const char *,
1994 const char *, bool);
1995 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1996 static void ix86_function_specific_save (struct cl_target_option *);
1997 static void ix86_function_specific_restore (struct cl_target_option *);
1998 static void ix86_function_specific_print (FILE *, int,
1999 struct cl_target_option *);
2000 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
2001 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
2002 static bool ix86_can_inline_p (tree, tree);
2003 static void ix86_set_current_function (tree);
2004 static unsigned int ix86_minimum_incoming_stack_boundary (bool);
2006 static enum calling_abi ix86_function_abi (const_tree);
2009 #ifndef SUBTARGET32_DEFAULT_CPU
2010 #define SUBTARGET32_DEFAULT_CPU "i386"
2013 /* The svr4 ABI for the i386 says that records and unions are returned
2015 #ifndef DEFAULT_PCC_STRUCT_RETURN
2016 #define DEFAULT_PCC_STRUCT_RETURN 1
2019 /* Whether -mtune= or -march= were specified */
2020 static int ix86_tune_defaulted;
2021 static int ix86_arch_specified;
2023 /* Bit flags that specify the ISA we are compiling for. */
2024 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
2026 /* A mask of ix86_isa_flags that includes bit X if X
2027 was set or cleared on the command line. */
2028 static int ix86_isa_flags_explicit;
2030 /* Define a set of ISAs which are available when a given ISA is
2031 enabled. MMX and SSE ISAs are handled separately. */
2033 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
2034 #define OPTION_MASK_ISA_3DNOW_SET \
2035 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
2037 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
2038 #define OPTION_MASK_ISA_SSE2_SET \
2039 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
2040 #define OPTION_MASK_ISA_SSE3_SET \
2041 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
2042 #define OPTION_MASK_ISA_SSSE3_SET \
2043 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
2044 #define OPTION_MASK_ISA_SSE4_1_SET \
2045 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
2046 #define OPTION_MASK_ISA_SSE4_2_SET \
2047 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
2048 #define OPTION_MASK_ISA_AVX_SET \
2049 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
2050 #define OPTION_MASK_ISA_FMA_SET \
2051 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
2053 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
2055 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
2057 #define OPTION_MASK_ISA_SSE4A_SET \
2058 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
2059 #define OPTION_MASK_ISA_FMA4_SET \
2060 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
2061 | OPTION_MASK_ISA_AVX_SET)
2062 #define OPTION_MASK_ISA_XOP_SET \
2063 (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
2064 #define OPTION_MASK_ISA_LWP_SET \
2067 /* AES and PCLMUL need SSE2 because they use xmm registers */
2068 #define OPTION_MASK_ISA_AES_SET \
2069 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
2070 #define OPTION_MASK_ISA_PCLMUL_SET \
2071 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
2073 #define OPTION_MASK_ISA_ABM_SET \
2074 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
2076 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
2077 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
2078 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
2079 #define OPTION_MASK_ISA_MOVBE_SET OPTION_MASK_ISA_MOVBE
2080 #define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
2082 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
2083 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
2084 #define OPTION_MASK_ISA_F16C_SET \
2085 (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
2087 /* Define a set of ISAs which aren't available when a given ISA is
2088 disabled. MMX and SSE ISAs are handled separately. */
2090 #define OPTION_MASK_ISA_MMX_UNSET \
2091 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
2092 #define OPTION_MASK_ISA_3DNOW_UNSET \
2093 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
2094 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
2096 #define OPTION_MASK_ISA_SSE_UNSET \
2097 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
2098 #define OPTION_MASK_ISA_SSE2_UNSET \
2099 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
2100 #define OPTION_MASK_ISA_SSE3_UNSET \
2101 (OPTION_MASK_ISA_SSE3 \
2102 | OPTION_MASK_ISA_SSSE3_UNSET \
2103 | OPTION_MASK_ISA_SSE4A_UNSET )
2104 #define OPTION_MASK_ISA_SSSE3_UNSET \
2105 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
2106 #define OPTION_MASK_ISA_SSE4_1_UNSET \
2107 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
2108 #define OPTION_MASK_ISA_SSE4_2_UNSET \
2109 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
2110 #define OPTION_MASK_ISA_AVX_UNSET \
2111 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
2112 | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET)
2113 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
2115 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
2117 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
2119 #define OPTION_MASK_ISA_SSE4A_UNSET \
2120 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
2122 #define OPTION_MASK_ISA_FMA4_UNSET \
2123 (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
2124 #define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
2125 #define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
2127 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
2128 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
2129 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
2130 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
2131 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
2132 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
2133 #define OPTION_MASK_ISA_MOVBE_UNSET OPTION_MASK_ISA_MOVBE
2134 #define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
2136 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
2137 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
2138 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
2140 /* Vectorization library interface and handlers. */
2141 static tree (*ix86_veclib_handler) (enum built_in_function, tree, tree);
2143 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
2144 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
2146 /* Processor target table, indexed by processor number */
2149 const struct processor_costs *cost; /* Processor costs */
2150 const int align_loop; /* Default alignments. */
2151 const int align_loop_max_skip;
2152 const int align_jump;
2153 const int align_jump_max_skip;
2154 const int align_func;
2157 static const struct ptt processor_target_table[PROCESSOR_max] =
2159 {&i386_cost, 4, 3, 4, 3, 4},
2160 {&i486_cost, 16, 15, 16, 15, 16},
2161 {&pentium_cost, 16, 7, 16, 7, 16},
2162 {&pentiumpro_cost, 16, 15, 16, 10, 16},
2163 {&geode_cost, 0, 0, 0, 0, 0},
2164 {&k6_cost, 32, 7, 32, 7, 32},
2165 {&athlon_cost, 16, 7, 16, 7, 16},
2166 {&pentium4_cost, 0, 0, 0, 0, 0},
2167 {&k8_cost, 16, 7, 16, 7, 16},
2168 {&nocona_cost, 0, 0, 0, 0, 0},
2169 {&core2_cost, 16, 10, 16, 10, 16},
2170 {&generic32_cost, 16, 7, 16, 7, 16},
2171 {&generic64_cost, 16, 10, 16, 10, 16},
2172 {&amdfam10_cost, 32, 24, 32, 7, 32},
2173 {&bdver1_cost, 32, 24, 32, 7, 32},
2174 {&atom_cost, 16, 7, 16, 7, 16}
2177 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
2204 /* Implement TARGET_HANDLE_OPTION. */
2207 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
2214 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2215 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2219 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2220 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2227 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2228 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2232 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2233 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2243 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2244 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2248 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2249 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2256 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2257 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2261 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2262 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2269 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2270 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2274 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2275 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2282 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2283 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2287 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2288 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2295 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2296 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2300 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2301 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2308 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2309 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2313 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2314 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2321 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2322 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2326 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2327 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2334 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2335 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2339 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2340 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2345 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2346 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2350 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2351 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2357 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2358 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2362 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2363 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2370 ix86_isa_flags |= OPTION_MASK_ISA_FMA4_SET;
2371 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_SET;
2375 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA4_UNSET;
2376 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA4_UNSET;
2383 ix86_isa_flags |= OPTION_MASK_ISA_XOP_SET;
2384 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_SET;
2388 ix86_isa_flags &= ~OPTION_MASK_ISA_XOP_UNSET;
2389 ix86_isa_flags_explicit |= OPTION_MASK_ISA_XOP_UNSET;
2396 ix86_isa_flags |= OPTION_MASK_ISA_LWP_SET;
2397 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_SET;
2401 ix86_isa_flags &= ~OPTION_MASK_ISA_LWP_UNSET;
2402 ix86_isa_flags_explicit |= OPTION_MASK_ISA_LWP_UNSET;
2409 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2410 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2414 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2415 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2422 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2423 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2427 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2428 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2435 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2436 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2440 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2441 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2448 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2449 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2453 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2454 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2461 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE_SET;
2462 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_SET;
2466 ix86_isa_flags &= ~OPTION_MASK_ISA_MOVBE_UNSET;
2467 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MOVBE_UNSET;
2474 ix86_isa_flags |= OPTION_MASK_ISA_CRC32_SET;
2475 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_SET;
2479 ix86_isa_flags &= ~OPTION_MASK_ISA_CRC32_UNSET;
2480 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CRC32_UNSET;
2487 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2488 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2492 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2493 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2500 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2501 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2505 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2506 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2513 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE_SET;
2514 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_SET;
2518 ix86_isa_flags &= ~OPTION_MASK_ISA_FSGSBASE_UNSET;
2519 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FSGSBASE_UNSET;
2526 ix86_isa_flags |= OPTION_MASK_ISA_RDRND_SET;
2527 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_SET;
2531 ix86_isa_flags &= ~OPTION_MASK_ISA_RDRND_UNSET;
2532 ix86_isa_flags_explicit |= OPTION_MASK_ISA_RDRND_UNSET;
2539 ix86_isa_flags |= OPTION_MASK_ISA_F16C_SET;
2540 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_SET;
2544 ix86_isa_flags &= ~OPTION_MASK_ISA_F16C_UNSET;
2545 ix86_isa_flags_explicit |= OPTION_MASK_ISA_F16C_UNSET;
2554 /* Return a string that documents the current -m options. The caller is
2555 responsible for freeing the string. */
2558 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2559 const char *fpmath, bool add_nl_p)
2561 struct ix86_target_opts
2563 const char *option; /* option string */
2564 int mask; /* isa mask options */
2567 /* This table is ordered so that options like -msse4.2 that imply
2568 preceding options while match those first. */
2569 static struct ix86_target_opts isa_opts[] =
2571 { "-m64", OPTION_MASK_ISA_64BIT },
2572 { "-mfma4", OPTION_MASK_ISA_FMA4 },
2573 { "-mfma", OPTION_MASK_ISA_FMA },
2574 { "-mxop", OPTION_MASK_ISA_XOP },
2575 { "-mlwp", OPTION_MASK_ISA_LWP },
2576 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2577 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2578 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2579 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2580 { "-msse3", OPTION_MASK_ISA_SSE3 },
2581 { "-msse2", OPTION_MASK_ISA_SSE2 },
2582 { "-msse", OPTION_MASK_ISA_SSE },
2583 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2584 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2585 { "-mmmx", OPTION_MASK_ISA_MMX },
2586 { "-mabm", OPTION_MASK_ISA_ABM },
2587 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2588 { "-mmovbe", OPTION_MASK_ISA_MOVBE },
2589 { "-mcrc32", OPTION_MASK_ISA_CRC32 },
2590 { "-maes", OPTION_MASK_ISA_AES },
2591 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2592 { "-mfsgsbase", OPTION_MASK_ISA_FSGSBASE },
2593 { "-mrdrnd", OPTION_MASK_ISA_RDRND },
2594 { "-mf16c", OPTION_MASK_ISA_F16C },
2598 static struct ix86_target_opts flag_opts[] =
2600 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2601 { "-m80387", MASK_80387 },
2602 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2603 { "-malign-double", MASK_ALIGN_DOUBLE },
2604 { "-mcld", MASK_CLD },
2605 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2606 { "-mieee-fp", MASK_IEEE_FP },
2607 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2608 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2609 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2610 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2611 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2612 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2613 { "-mno-red-zone", MASK_NO_RED_ZONE },
2614 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2615 { "-mrecip", MASK_RECIP },
2616 { "-mrtd", MASK_RTD },
2617 { "-msseregparm", MASK_SSEREGPARM },
2618 { "-mstack-arg-probe", MASK_STACK_PROBE },
2619 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2622 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2625 char target_other[40];
2634 memset (opts, '\0', sizeof (opts));
2636 /* Add -march= option. */
2639 opts[num][0] = "-march=";
2640 opts[num++][1] = arch;
2643 /* Add -mtune= option. */
2646 opts[num][0] = "-mtune=";
2647 opts[num++][1] = tune;
2650 /* Pick out the options in isa options. */
2651 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2653 if ((isa & isa_opts[i].mask) != 0)
2655 opts[num++][0] = isa_opts[i].option;
2656 isa &= ~ isa_opts[i].mask;
2660 if (isa && add_nl_p)
2662 opts[num++][0] = isa_other;
2663 sprintf (isa_other, "(other isa: %#x)", isa);
2666 /* Add flag options. */
2667 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2669 if ((flags & flag_opts[i].mask) != 0)
2671 opts[num++][0] = flag_opts[i].option;
2672 flags &= ~ flag_opts[i].mask;
2676 if (flags && add_nl_p)
2678 opts[num++][0] = target_other;
2679 sprintf (target_other, "(other flags: %#x)", flags);
2682 /* Add -fpmath= option. */
2685 opts[num][0] = "-mfpmath=";
2686 opts[num++][1] = fpmath;
2693 gcc_assert (num < ARRAY_SIZE (opts));
2695 /* Size the string. */
2697 sep_len = (add_nl_p) ? 3 : 1;
2698 for (i = 0; i < num; i++)
2701 for (j = 0; j < 2; j++)
2703 len += strlen (opts[i][j]);
2706 /* Build the string. */
2707 ret = ptr = (char *) xmalloc (len);
2710 for (i = 0; i < num; i++)
2714 for (j = 0; j < 2; j++)
2715 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2722 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2730 for (j = 0; j < 2; j++)
2733 memcpy (ptr, opts[i][j], len2[j]);
2735 line_len += len2[j];
2740 gcc_assert (ret + len >= ptr);
2745 /* Return TRUE if software prefetching is beneficial for the
2749 software_prefetching_beneficial_p (void)
2753 case PROCESSOR_GEODE:
2755 case PROCESSOR_ATHLON:
2757 case PROCESSOR_AMDFAM10:
2765 /* Return true, if profiling code should be emitted before
2766 prologue. Otherwise it returns false.
2767 Note: For x86 with "hotfix" it is sorried. */
2769 ix86_profile_before_prologue (void)
2771 return flag_fentry != 0;
2774 /* Function that is callable from the debugger to print the current
2777 ix86_debug_options (void)
2779 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2780 ix86_arch_string, ix86_tune_string,
2781 ix86_fpmath_string, true);
2785 fprintf (stderr, "%s\n\n", opts);
2789 fputs ("<no options>\n\n", stderr);
2794 /* Sometimes certain combinations of command options do not make
2795 sense on a particular target machine. You can define a macro
2796 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2797 defined, is executed once just after all the command options have
2800 Don't use this macro to turn on various extra optimizations for
2801 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2804 override_options (bool main_args_p)
2807 unsigned int ix86_arch_mask, ix86_tune_mask;
2808 const bool ix86_tune_specified = (ix86_tune_string != NULL);
2813 /* Comes from final.c -- no real reason to change it. */
2814 #define MAX_CODE_ALIGN 16
2822 PTA_PREFETCH_SSE = 1 << 4,
2824 PTA_3DNOW_A = 1 << 6,
2828 PTA_POPCNT = 1 << 10,
2830 PTA_SSE4A = 1 << 12,
2831 PTA_NO_SAHF = 1 << 13,
2832 PTA_SSE4_1 = 1 << 14,
2833 PTA_SSE4_2 = 1 << 15,
2835 PTA_PCLMUL = 1 << 17,
2838 PTA_MOVBE = 1 << 20,
2842 PTA_FSGSBASE = 1 << 24,
2843 PTA_RDRND = 1 << 25,
2849 const char *const name; /* processor name or nickname. */
2850 const enum processor_type processor;
2851 const enum attr_cpu schedule;
2852 const unsigned /*enum pta_flags*/ flags;
2854 const processor_alias_table[] =
2856 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2857 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2858 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2859 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2860 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2861 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2862 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2863 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2864 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2865 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2866 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2867 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2868 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2870 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2872 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2873 PTA_MMX | PTA_SSE | PTA_SSE2},
2874 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2875 PTA_MMX |PTA_SSE | PTA_SSE2},
2876 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2877 PTA_MMX | PTA_SSE | PTA_SSE2},
2878 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2879 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2880 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2881 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2882 | PTA_CX16 | PTA_NO_SAHF},
2883 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2884 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2885 | PTA_SSSE3 | PTA_CX16},
2886 {"atom", PROCESSOR_ATOM, CPU_ATOM,
2887 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2888 | PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
2889 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2890 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2891 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2892 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2893 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2894 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2895 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2896 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2897 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2898 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2899 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2900 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2901 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2902 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2903 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2904 {"x86-64", PROCESSOR_K8, CPU_K8,
2905 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2906 {"k8", PROCESSOR_K8, CPU_K8,
2907 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2908 | PTA_SSE2 | PTA_NO_SAHF},
2909 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2910 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2911 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2912 {"opteron", PROCESSOR_K8, CPU_K8,
2913 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2914 | PTA_SSE2 | PTA_NO_SAHF},
2915 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2916 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2917 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2918 {"athlon64", PROCESSOR_K8, CPU_K8,
2919 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2920 | PTA_SSE2 | PTA_NO_SAHF},
2921 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2922 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2923 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2924 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2925 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2926 | PTA_SSE2 | PTA_NO_SAHF},
2927 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2928 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2929 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2930 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2931 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2932 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2933 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1,
2934 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2935 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM
2936 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
2937 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP},
2938 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2939 0 /* flags are only used for -march switch. */ },
2940 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2941 PTA_64BIT /* flags are only used for -march switch. */ },
2944 int const pta_size = ARRAY_SIZE (processor_alias_table);
2946 /* Set up prefix/suffix so the error messages refer to either the command
2947 line argument, or the attribute(target). */
2956 prefix = "option(\"";
2961 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2962 SUBTARGET_OVERRIDE_OPTIONS;
2965 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2966 SUBSUBTARGET_OVERRIDE_OPTIONS;
2969 /* -fPIC is the default for x86_64. */
2970 if (TARGET_MACHO && TARGET_64BIT)
2973 /* Set the default values for switches whose default depends on TARGET_64BIT
2974 in case they weren't overwritten by command line options. */
2979 /* Mach-O doesn't support omitting the frame pointer for now. */
2980 if (flag_omit_frame_pointer == 2)
2981 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2982 if (flag_asynchronous_unwind_tables == 2)
2983 flag_asynchronous_unwind_tables = 1;
2984 if (flag_pcc_struct_return == 2)
2985 flag_pcc_struct_return = 0;
2991 if (flag_omit_frame_pointer == 2)
2992 flag_omit_frame_pointer = 0;
2993 if (flag_asynchronous_unwind_tables == 2)
2994 flag_asynchronous_unwind_tables = 0;
2995 if (flag_pcc_struct_return == 2)
2996 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2999 /* Need to check -mtune=generic first. */
3000 if (ix86_tune_string)
3002 if (!strcmp (ix86_tune_string, "generic")
3003 || !strcmp (ix86_tune_string, "i686")
3004 /* As special support for cross compilers we read -mtune=native
3005 as -mtune=generic. With native compilers we won't see the
3006 -mtune=native, as it was changed by the driver. */
3007 || !strcmp (ix86_tune_string, "native"))
3010 ix86_tune_string = "generic64";
3012 ix86_tune_string = "generic32";
3014 /* If this call is for setting the option attribute, allow the
3015 generic32/generic64 that was previously set. */
3016 else if (!main_args_p
3017 && (!strcmp (ix86_tune_string, "generic32")
3018 || !strcmp (ix86_tune_string, "generic64")))
3020 else if (!strncmp (ix86_tune_string, "generic", 7))
3021 error ("bad value (%s) for %stune=%s %s",
3022 ix86_tune_string, prefix, suffix, sw);
3023 else if (!strcmp (ix86_tune_string, "x86-64"))
3024 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
3025 "%stune=k8%s or %stune=generic%s instead as appropriate.",
3026 prefix, suffix, prefix, suffix, prefix, suffix);
3030 if (ix86_arch_string)
3031 ix86_tune_string = ix86_arch_string;
3032 if (!ix86_tune_string)
3034 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
3035 ix86_tune_defaulted = 1;
3038 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
3039 need to use a sensible tune option. */
3040 if (!strcmp (ix86_tune_string, "generic")
3041 || !strcmp (ix86_tune_string, "x86-64")
3042 || !strcmp (ix86_tune_string, "i686"))
3045 ix86_tune_string = "generic64";
3047 ix86_tune_string = "generic32";
3051 if (ix86_stringop_string)
3053 if (!strcmp (ix86_stringop_string, "rep_byte"))
3054 stringop_alg = rep_prefix_1_byte;
3055 else if (!strcmp (ix86_stringop_string, "libcall"))
3056 stringop_alg = libcall;
3057 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
3058 stringop_alg = rep_prefix_4_byte;
3059 else if (!strcmp (ix86_stringop_string, "rep_8byte")
3061 /* rep; movq isn't available in 32-bit code. */
3062 stringop_alg = rep_prefix_8_byte;
3063 else if (!strcmp (ix86_stringop_string, "byte_loop"))
3064 stringop_alg = loop_1_byte;
3065 else if (!strcmp (ix86_stringop_string, "loop"))
3066 stringop_alg = loop;
3067 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
3068 stringop_alg = unrolled_loop;
3070 error ("bad value (%s) for %sstringop-strategy=%s %s",
3071 ix86_stringop_string, prefix, suffix, sw);
3074 if (!ix86_arch_string)
3075 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU;
3077 ix86_arch_specified = 1;
3079 /* Validate -mabi= value. */
3080 if (ix86_abi_string)
3082 if (strcmp (ix86_abi_string, "sysv") == 0)
3083 ix86_abi = SYSV_ABI;
3084 else if (strcmp (ix86_abi_string, "ms") == 0)
3087 error ("unknown ABI (%s) for %sabi=%s %s",
3088 ix86_abi_string, prefix, suffix, sw);
3091 ix86_abi = DEFAULT_ABI;
3093 if (ix86_cmodel_string != 0)
3095 if (!strcmp (ix86_cmodel_string, "small"))
3096 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3097 else if (!strcmp (ix86_cmodel_string, "medium"))
3098 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
3099 else if (!strcmp (ix86_cmodel_string, "large"))
3100 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
3102 error ("code model %s does not support PIC mode", ix86_cmodel_string);
3103 else if (!strcmp (ix86_cmodel_string, "32"))
3104 ix86_cmodel = CM_32;
3105 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
3106 ix86_cmodel = CM_KERNEL;
3108 error ("bad value (%s) for %scmodel=%s %s",
3109 ix86_cmodel_string, prefix, suffix, sw);
3113 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
3114 use of rip-relative addressing. This eliminates fixups that
3115 would otherwise be needed if this object is to be placed in a
3116 DLL, and is essentially just as efficient as direct addressing. */
3117 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
3118 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
3119 else if (TARGET_64BIT)
3120 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
3122 ix86_cmodel = CM_32;
3124 if (ix86_asm_string != 0)
3127 && !strcmp (ix86_asm_string, "intel"))
3128 ix86_asm_dialect = ASM_INTEL;
3129 else if (!strcmp (ix86_asm_string, "att"))
3130 ix86_asm_dialect = ASM_ATT;
3132 error ("bad value (%s) for %sasm=%s %s",
3133 ix86_asm_string, prefix, suffix, sw);
3135 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
3136 error ("code model %qs not supported in the %s bit mode",
3137 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
3138 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
3139 sorry ("%i-bit mode not compiled in",
3140 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
3142 for (i = 0; i < pta_size; i++)
3143 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
3145 ix86_schedule = processor_alias_table[i].schedule;
3146 ix86_arch = processor_alias_table[i].processor;
3147 /* Default cpu tuning to the architecture. */
3148 ix86_tune = ix86_arch;
3150 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3151 error ("CPU you selected does not support x86-64 "
3154 if (processor_alias_table[i].flags & PTA_MMX
3155 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
3156 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
3157 if (processor_alias_table[i].flags & PTA_3DNOW
3158 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
3159 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
3160 if (processor_alias_table[i].flags & PTA_3DNOW_A
3161 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
3162 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
3163 if (processor_alias_table[i].flags & PTA_SSE
3164 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
3165 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
3166 if (processor_alias_table[i].flags & PTA_SSE2
3167 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
3168 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
3169 if (processor_alias_table[i].flags & PTA_SSE3
3170 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
3171 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
3172 if (processor_alias_table[i].flags & PTA_SSSE3
3173 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
3174 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
3175 if (processor_alias_table[i].flags & PTA_SSE4_1
3176 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
3177 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
3178 if (processor_alias_table[i].flags & PTA_SSE4_2
3179 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
3180 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
3181 if (processor_alias_table[i].flags & PTA_AVX
3182 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
3183 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
3184 if (processor_alias_table[i].flags & PTA_FMA
3185 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
3186 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
3187 if (processor_alias_table[i].flags & PTA_SSE4A
3188 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
3189 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
3190 if (processor_alias_table[i].flags & PTA_FMA4
3191 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA4))
3192 ix86_isa_flags |= OPTION_MASK_ISA_FMA4;
3193 if (processor_alias_table[i].flags & PTA_XOP
3194 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_XOP))
3195 ix86_isa_flags |= OPTION_MASK_ISA_XOP;
3196 if (processor_alias_table[i].flags & PTA_LWP
3197 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_LWP))
3198 ix86_isa_flags |= OPTION_MASK_ISA_LWP;
3199 if (processor_alias_table[i].flags & PTA_ABM
3200 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
3201 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
3202 if (processor_alias_table[i].flags & PTA_CX16
3203 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
3204 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
3205 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
3206 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
3207 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
3208 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
3209 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
3210 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
3211 if (processor_alias_table[i].flags & PTA_MOVBE
3212 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MOVBE))
3213 ix86_isa_flags |= OPTION_MASK_ISA_MOVBE;
3214 if (processor_alias_table[i].flags & PTA_AES
3215 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
3216 ix86_isa_flags |= OPTION_MASK_ISA_AES;
3217 if (processor_alias_table[i].flags & PTA_PCLMUL
3218 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
3219 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
3220 if (processor_alias_table[i].flags & PTA_FSGSBASE
3221 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FSGSBASE))
3222 ix86_isa_flags |= OPTION_MASK_ISA_FSGSBASE;
3223 if (processor_alias_table[i].flags & PTA_RDRND
3224 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_RDRND))
3225 ix86_isa_flags |= OPTION_MASK_ISA_RDRND;
3226 if (processor_alias_table[i].flags & PTA_F16C
3227 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_F16C))
3228 ix86_isa_flags |= OPTION_MASK_ISA_F16C;
3229 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
3230 x86_prefetch_sse = true;
3235 if (!strcmp (ix86_arch_string, "generic"))
3236 error ("generic CPU can be used only for %stune=%s %s",
3237 prefix, suffix, sw);
3238 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size)
3239 error ("bad value (%s) for %sarch=%s %s",
3240 ix86_arch_string, prefix, suffix, sw);
3242 ix86_arch_mask = 1u << ix86_arch;
3243 for (i = 0; i < X86_ARCH_LAST; ++i)
3244 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3246 for (i = 0; i < pta_size; i++)
3247 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
3249 ix86_schedule = processor_alias_table[i].schedule;
3250 ix86_tune = processor_alias_table[i].processor;
3251 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
3253 if (ix86_tune_defaulted)
3255 ix86_tune_string = "x86-64";
3256 for (i = 0; i < pta_size; i++)
3257 if (! strcmp (ix86_tune_string,
3258 processor_alias_table[i].name))
3260 ix86_schedule = processor_alias_table[i].schedule;
3261 ix86_tune = processor_alias_table[i].processor;
3264 error ("CPU you selected does not support x86-64 "
3267 /* Intel CPUs have always interpreted SSE prefetch instructions as
3268 NOPs; so, we can enable SSE prefetch instructions even when
3269 -mtune (rather than -march) points us to a processor that has them.
3270 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
3271 higher processors. */
3273 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
3274 x86_prefetch_sse = true;
3278 if (ix86_tune_specified && i == pta_size)
3279 error ("bad value (%s) for %stune=%s %s",
3280 ix86_tune_string, prefix, suffix, sw);
3282 ix86_tune_mask = 1u << ix86_tune;
3283 for (i = 0; i < X86_TUNE_LAST; ++i)
3284 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3287 ix86_cost = &ix86_size_cost;
3289 ix86_cost = processor_target_table[ix86_tune].cost;
3291 /* Arrange to set up i386_stack_locals for all functions. */
3292 init_machine_status = ix86_init_machine_status;
3294 /* Validate -mregparm= value. */
3295 if (ix86_regparm_string)
3298 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
3299 i = atoi (ix86_regparm_string);
3300 if (i < 0 || i > REGPARM_MAX)
3301 error ("%sregparm=%d%s is not between 0 and %d",
3302 prefix, i, suffix, REGPARM_MAX);
3307 ix86_regparm = REGPARM_MAX;
3309 /* If the user has provided any of the -malign-* options,
3310 warn and use that value only if -falign-* is not set.
3311 Remove this code in GCC 3.2 or later. */
3312 if (ix86_align_loops_string)
3314 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
3315 prefix, suffix, suffix);
3316 if (align_loops == 0)
3318 i = atoi (ix86_align_loops_string);
3319 if (i < 0 || i > MAX_CODE_ALIGN)
3320 error ("%salign-loops=%d%s is not between 0 and %d",
3321 prefix, i, suffix, MAX_CODE_ALIGN);
3323 align_loops = 1 << i;
3327 if (ix86_align_jumps_string)
3329 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
3330 prefix, suffix, suffix);
3331 if (align_jumps == 0)
3333 i = atoi (ix86_align_jumps_string);
3334 if (i < 0 || i > MAX_CODE_ALIGN)
3335 error ("%salign-loops=%d%s is not between 0 and %d",
3336 prefix, i, suffix, MAX_CODE_ALIGN);
3338 align_jumps = 1 << i;
3342 if (ix86_align_funcs_string)
3344 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
3345 prefix, suffix, suffix);
3346 if (align_functions == 0)
3348 i = atoi (ix86_align_funcs_string);
3349 if (i < 0 || i > MAX_CODE_ALIGN)
3350 error ("%salign-loops=%d%s is not between 0 and %d",
3351 prefix, i, suffix, MAX_CODE_ALIGN);
3353 align_functions = 1 << i;
3357 /* Default align_* from the processor table. */
3358 if (align_loops == 0)
3360 align_loops = processor_target_table[ix86_tune].align_loop;
3361 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
3363 if (align_jumps == 0)
3365 align_jumps = processor_target_table[ix86_tune].align_jump;
3366 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
3368 if (align_functions == 0)
3370 align_functions = processor_target_table[ix86_tune].align_func;
3373 /* Validate -mbranch-cost= value, or provide default. */
3374 ix86_branch_cost = ix86_cost->branch_cost;
3375 if (ix86_branch_cost_string)
3377 i = atoi (ix86_branch_cost_string);
3379 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
3381 ix86_branch_cost = i;
3383 if (ix86_section_threshold_string)
3385 i = atoi (ix86_section_threshold_string);
3387 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3389 ix86_section_threshold = i;
3392 if (ix86_tls_dialect_string)
3394 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3395 ix86_tls_dialect = TLS_DIALECT_GNU;
3396 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3397 ix86_tls_dialect = TLS_DIALECT_GNU2;
3399 error ("bad value (%s) for %stls-dialect=%s %s",
3400 ix86_tls_dialect_string, prefix, suffix, sw);
3403 if (ix87_precision_string)
3405 i = atoi (ix87_precision_string);
3406 if (i != 32 && i != 64 && i != 80)
3407 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3412 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3414 /* Enable by default the SSE and MMX builtins. Do allow the user to
3415 explicitly disable any of these. In particular, disabling SSE and
3416 MMX for kernel code is extremely useful. */
3417 if (!ix86_arch_specified)
3419 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3420 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3423 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3427 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3429 if (!ix86_arch_specified)
3431 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3433 /* i386 ABI does not specify red zone. It still makes sense to use it
3434 when programmer takes care to stack from being destroyed. */
3435 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3436 target_flags |= MASK_NO_RED_ZONE;
3439 /* Keep nonleaf frame pointers. */
3440 if (flag_omit_frame_pointer)
3441 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3442 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3443 flag_omit_frame_pointer = 1;
3445 /* If we're doing fast math, we don't care about comparison order
3446 wrt NaNs. This lets us use a shorter comparison sequence. */
3447 if (flag_finite_math_only)
3448 target_flags &= ~MASK_IEEE_FP;
3450 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3451 since the insns won't need emulation. */
3452 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3453 target_flags &= ~MASK_NO_FANCY_MATH_387;
3455 /* Likewise, if the target doesn't have a 387, or we've specified
3456 software floating point, don't use 387 inline intrinsics. */
3458 target_flags |= MASK_NO_FANCY_MATH_387;
3460 /* Turn on MMX builtins for -msse. */
3463 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3464 x86_prefetch_sse = true;
3467 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3468 if (TARGET_SSE4_2 || TARGET_ABM)
3469 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3471 /* Validate -mpreferred-stack-boundary= value or default it to
3472 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3473 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3474 if (ix86_preferred_stack_boundary_string)
3476 i = atoi (ix86_preferred_stack_boundary_string);
3477 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3478 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3479 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3481 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3484 /* Set the default value for -mstackrealign. */
3485 if (ix86_force_align_arg_pointer == -1)
3486 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3488 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3490 /* Validate -mincoming-stack-boundary= value or default it to
3491 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3492 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3493 if (ix86_incoming_stack_boundary_string)
3495 i = atoi (ix86_incoming_stack_boundary_string);
3496 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3497 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3498 i, TARGET_64BIT ? 4 : 2);
3501 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3502 ix86_incoming_stack_boundary
3503 = ix86_user_incoming_stack_boundary;
3507 /* Accept -msseregparm only if at least SSE support is enabled. */
3508 if (TARGET_SSEREGPARM
3510 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3512 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3513 if (ix86_fpmath_string != 0)
3515 if (! strcmp (ix86_fpmath_string, "387"))
3516 ix86_fpmath = FPMATH_387;
3517 else if (! strcmp (ix86_fpmath_string, "sse"))
3521 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3522 ix86_fpmath = FPMATH_387;
3525 ix86_fpmath = FPMATH_SSE;
3527 else if (! strcmp (ix86_fpmath_string, "387,sse")
3528 || ! strcmp (ix86_fpmath_string, "387+sse")
3529 || ! strcmp (ix86_fpmath_string, "sse,387")
3530 || ! strcmp (ix86_fpmath_string, "sse+387")
3531 || ! strcmp (ix86_fpmath_string, "both"))
3535 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3536 ix86_fpmath = FPMATH_387;
3538 else if (!TARGET_80387)
3540 warning (0, "387 instruction set disabled, using SSE arithmetics");
3541 ix86_fpmath = FPMATH_SSE;
3544 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3547 error ("bad value (%s) for %sfpmath=%s %s",
3548 ix86_fpmath_string, prefix, suffix, sw);
3551 /* If the i387 is disabled, then do not return values in it. */
3553 target_flags &= ~MASK_FLOAT_RETURNS;
3555 /* Use external vectorized library in vectorizing intrinsics. */
3556 if (ix86_veclibabi_string)
3558 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3559 ix86_veclib_handler = ix86_veclibabi_svml;
3560 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3561 ix86_veclib_handler = ix86_veclibabi_acml;
3563 error ("unknown vectorization library ABI type (%s) for "
3564 "%sveclibabi=%s %s", ix86_veclibabi_string,
3565 prefix, suffix, sw);
3568 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3569 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3571 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3573 /* ??? Unwind info is not correct around the CFG unless either a frame
3574 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3575 unwind info generation to be aware of the CFG and propagating states
3577 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3578 || flag_exceptions || flag_non_call_exceptions)
3579 && flag_omit_frame_pointer
3580 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3582 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3583 warning (0, "unwind tables currently require either a frame pointer "
3584 "or %saccumulate-outgoing-args%s for correctness",
3586 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3589 /* If stack probes are required, the space used for large function
3590 arguments on the stack must also be probed, so enable
3591 -maccumulate-outgoing-args so this happens in the prologue. */
3592 if (TARGET_STACK_PROBE
3593 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3595 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3596 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3597 "for correctness", prefix, suffix);
3598 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3601 /* For sane SSE instruction set generation we need fcomi instruction.
3602 It is safe to enable all CMOVE instructions. */
3606 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3609 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3610 p = strchr (internal_label_prefix, 'X');
3611 internal_label_prefix_len = p - internal_label_prefix;
3615 /* When scheduling description is not available, disable scheduler pass
3616 so it won't slow down the compilation and make x87 code slower. */
3617 if (!TARGET_SCHEDULE)
3618 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3620 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3621 set_param_value ("simultaneous-prefetches",
3622 ix86_cost->simultaneous_prefetches);
3623 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3624 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3625 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3626 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3627 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3628 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3630 /* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
3631 if (flag_prefetch_loop_arrays < 0
3634 && software_prefetching_beneficial_p ())
3635 flag_prefetch_loop_arrays = 1;
3637 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3638 can be optimized to ap = __builtin_next_arg (0). */
3640 targetm.expand_builtin_va_start = NULL;
3644 ix86_gen_leave = gen_leave_rex64;
3645 ix86_gen_pop1 = gen_popdi1;
3646 ix86_gen_add3 = gen_adddi3;
3647 ix86_gen_sub3 = gen_subdi3;
3648 ix86_gen_sub3_carry = gen_subdi3_carry;
3649 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3650 ix86_gen_monitor = gen_sse3_monitor64;
3651 ix86_gen_andsp = gen_anddi3;
3652 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_64;
3653 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probedi;
3654 ix86_gen_probe_stack_range = gen_probe_stack_rangedi;
3658 ix86_gen_leave = gen_leave;
3659 ix86_gen_pop1 = gen_popsi1;
3660 ix86_gen_add3 = gen_addsi3;
3661 ix86_gen_sub3 = gen_subsi3;
3662 ix86_gen_sub3_carry = gen_subsi3_carry;
3663 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3664 ix86_gen_monitor = gen_sse3_monitor;
3665 ix86_gen_andsp = gen_andsi3;
3666 ix86_gen_allocate_stack_worker = gen_allocate_stack_worker_32;
3667 ix86_gen_adjust_stack_and_probe = gen_adjust_stack_and_probesi;
3668 ix86_gen_probe_stack_range = gen_probe_stack_rangesi;
3672 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3674 target_flags |= MASK_CLD & ~target_flags_explicit;
3677 if (!TARGET_64BIT && flag_pic)
3679 if (flag_fentry > 0)
3680 sorry ("-mfentry isn't supported for 32-bit in combination with -fpic");
3683 if (flag_fentry < 0)
3685 #if defined(PROFILE_BEFORE_PROLOGUE)
3692 /* Save the initial options in case the user does function specific options */
3694 target_option_default_node = target_option_current_node
3695 = build_target_option_node ();
3698 /* Update register usage after having seen the compiler flags. */
3701 ix86_conditional_register_usage (void)
3706 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3708 if (fixed_regs[i] > 1)
3709 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3710 if (call_used_regs[i] > 1)
3711 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3714 /* The PIC register, if it exists, is fixed. */
3715 j = PIC_OFFSET_TABLE_REGNUM;
3716 if (j != INVALID_REGNUM)
3717 fixed_regs[j] = call_used_regs[j] = 1;
3719 /* The MS_ABI changes the set of call-used registers. */
3720 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3722 call_used_regs[SI_REG] = 0;
3723 call_used_regs[DI_REG] = 0;
3724 call_used_regs[XMM6_REG] = 0;
3725 call_used_regs[XMM7_REG] = 0;
3726 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3727 call_used_regs[i] = 0;
3730 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3731 other call-clobbered regs for 64-bit. */
3734 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3736 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3737 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3738 && call_used_regs[i])
3739 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3742 /* If MMX is disabled, squash the registers. */
3744 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3745 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3746 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3748 /* If SSE is disabled, squash the registers. */
3750 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3751 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3752 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3754 /* If the FPU is disabled, squash the registers. */
3755 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3756 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3757 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3758 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3760 /* If 32-bit, squash the 64-bit registers. */
3763 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3765 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3771 /* Save the current options */
3774 ix86_function_specific_save (struct cl_target_option *ptr)
3776 ptr->arch = ix86_arch;
3777 ptr->schedule = ix86_schedule;
3778 ptr->tune = ix86_tune;
3779 ptr->fpmath = ix86_fpmath;
3780 ptr->branch_cost = ix86_branch_cost;
3781 ptr->tune_defaulted = ix86_tune_defaulted;
3782 ptr->arch_specified = ix86_arch_specified;
3783 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3784 ptr->target_flags_explicit = target_flags_explicit;
3786 /* The fields are char but the variables are not; make sure the
3787 values fit in the fields. */
3788 gcc_assert (ptr->arch == ix86_arch);
3789 gcc_assert (ptr->schedule == ix86_schedule);
3790 gcc_assert (ptr->tune == ix86_tune);
3791 gcc_assert (ptr->fpmath == ix86_fpmath);
3792 gcc_assert (ptr->branch_cost == ix86_branch_cost);
3795 /* Restore the current options */
3798 ix86_function_specific_restore (struct cl_target_option *ptr)
3800 enum processor_type old_tune = ix86_tune;
3801 enum processor_type old_arch = ix86_arch;
3802 unsigned int ix86_arch_mask, ix86_tune_mask;
3805 ix86_arch = (enum processor_type) ptr->arch;
3806 ix86_schedule = (enum attr_cpu) ptr->schedule;
3807 ix86_tune = (enum processor_type) ptr->tune;
3808 ix86_fpmath = (enum fpmath_unit) ptr->fpmath;
3809 ix86_branch_cost = ptr->branch_cost;
3810 ix86_tune_defaulted = ptr->tune_defaulted;
3811 ix86_arch_specified = ptr->arch_specified;
3812 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3813 target_flags_explicit = ptr->target_flags_explicit;
3815 /* Recreate the arch feature tests if the arch changed */
3816 if (old_arch != ix86_arch)
3818 ix86_arch_mask = 1u << ix86_arch;
3819 for (i = 0; i < X86_ARCH_LAST; ++i)
3820 ix86_arch_features[i]
3821 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3824 /* Recreate the tune optimization tests */
3825 if (old_tune != ix86_tune)
3827 ix86_tune_mask = 1u << ix86_tune;
3828 for (i = 0; i < X86_TUNE_LAST; ++i)
3829 ix86_tune_features[i]
3830 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3834 /* Print the current options */
3837 ix86_function_specific_print (FILE *file, int indent,
3838 struct cl_target_option *ptr)
3841 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3842 NULL, NULL, NULL, false);
3844 fprintf (file, "%*sarch = %d (%s)\n",
3847 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3848 ? cpu_names[ptr->arch]
3851 fprintf (file, "%*stune = %d (%s)\n",
3854 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3855 ? cpu_names[ptr->tune]
3858 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3859 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3860 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3861 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3865 fprintf (file, "%*s%s\n", indent, "", target_string);
3866 free (target_string);
3871 /* Inner function to process the attribute((target(...))), take an argument and
3872 set the current options from the argument. If we have a list, recursively go
3876 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3881 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3882 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3883 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3884 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3899 enum ix86_opt_type type;
3904 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3905 IX86_ATTR_ISA ("abm", OPT_mabm),
3906 IX86_ATTR_ISA ("aes", OPT_maes),
3907 IX86_ATTR_ISA ("avx", OPT_mavx),
3908 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3909 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3910 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3911 IX86_ATTR_ISA ("sse", OPT_msse),
3912 IX86_ATTR_ISA ("sse2", OPT_msse2),
3913 IX86_ATTR_ISA ("sse3", OPT_msse3),
3914 IX86_ATTR_ISA ("sse4", OPT_msse4),
3915 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3916 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3917 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3918 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3919 IX86_ATTR_ISA ("fma4", OPT_mfma4),
3920 IX86_ATTR_ISA ("xop", OPT_mxop),
3921 IX86_ATTR_ISA ("lwp", OPT_mlwp),
3922 IX86_ATTR_ISA ("fsgsbase", OPT_mfsgsbase),
3923 IX86_ATTR_ISA ("rdrnd", OPT_mrdrnd),
3924 IX86_ATTR_ISA ("f16c", OPT_mf16c),
3926 /* string options */
3927 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3928 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3929 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3932 IX86_ATTR_YES ("cld",
3936 IX86_ATTR_NO ("fancy-math-387",
3937 OPT_mfancy_math_387,
3938 MASK_NO_FANCY_MATH_387),
3940 IX86_ATTR_YES ("ieee-fp",
3944 IX86_ATTR_YES ("inline-all-stringops",
3945 OPT_minline_all_stringops,
3946 MASK_INLINE_ALL_STRINGOPS),
3948 IX86_ATTR_YES ("inline-stringops-dynamically",
3949 OPT_minline_stringops_dynamically,
3950 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3952 IX86_ATTR_NO ("align-stringops",
3953 OPT_mno_align_stringops,
3954 MASK_NO_ALIGN_STRINGOPS),
3956 IX86_ATTR_YES ("recip",
3962 /* If this is a list, recurse to get the options. */
3963 if (TREE_CODE (args) == TREE_LIST)
3967 for (; args; args = TREE_CHAIN (args))
3968 if (TREE_VALUE (args)
3969 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3975 else if (TREE_CODE (args) != STRING_CST)
3978 /* Handle multiple arguments separated by commas. */
3979 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3981 while (next_optstr && *next_optstr != '\0')
3983 char *p = next_optstr;
3985 char *comma = strchr (next_optstr, ',');
3986 const char *opt_string;
3987 size_t len, opt_len;
3992 enum ix86_opt_type type = ix86_opt_unknown;
3998 len = comma - next_optstr;
3999 next_optstr = comma + 1;
4007 /* Recognize no-xxx. */
4008 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
4017 /* Find the option. */
4020 for (i = 0; i < ARRAY_SIZE (attrs); i++)
4022 type = attrs[i].type;
4023 opt_len = attrs[i].len;
4024 if (ch == attrs[i].string[0]
4025 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
4026 && memcmp (p, attrs[i].string, opt_len) == 0)
4029 mask = attrs[i].mask;
4030 opt_string = attrs[i].string;
4035 /* Process the option. */
4038 error ("attribute(target(\"%s\")) is unknown", orig_p);
4042 else if (type == ix86_opt_isa)
4043 ix86_handle_option (opt, p, opt_set_p);
4045 else if (type == ix86_opt_yes || type == ix86_opt_no)
4047 if (type == ix86_opt_no)
4048 opt_set_p = !opt_set_p;
4051 target_flags |= mask;
4053 target_flags &= ~mask;
4056 else if (type == ix86_opt_str)
4060 error ("option(\"%s\") was already specified", opt_string);
4064 p_strings[opt] = xstrdup (p + opt_len);
4074 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
4077 ix86_valid_target_attribute_tree (tree args)
4079 const char *orig_arch_string = ix86_arch_string;
4080 const char *orig_tune_string = ix86_tune_string;
4081 const char *orig_fpmath_string = ix86_fpmath_string;
4082 int orig_tune_defaulted = ix86_tune_defaulted;
4083 int orig_arch_specified = ix86_arch_specified;
4084 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
4087 struct cl_target_option *def
4088 = TREE_TARGET_OPTION (target_option_default_node);
4090 /* Process each of the options on the chain. */
4091 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
4094 /* If the changed options are different from the default, rerun override_options,
4095 and then save the options away. The string options are are attribute options,
4096 and will be undone when we copy the save structure. */
4097 if (ix86_isa_flags != def->ix86_isa_flags
4098 || target_flags != def->target_flags
4099 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
4100 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
4101 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4103 /* If we are using the default tune= or arch=, undo the string assigned,
4104 and use the default. */
4105 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
4106 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
4107 else if (!orig_arch_specified)
4108 ix86_arch_string = NULL;
4110 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
4111 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
4112 else if (orig_tune_defaulted)
4113 ix86_tune_string = NULL;
4115 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
4116 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
4117 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
4118 else if (!TARGET_64BIT && TARGET_SSE)
4119 ix86_fpmath_string = "sse,387";
4121 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
4122 override_options (false);
4124 /* Add any builtin functions with the new isa if any. */
4125 ix86_add_new_builtins (ix86_isa_flags);
4127 /* Save the current options unless we are validating options for
4129 t = build_target_option_node ();
4131 ix86_arch_string = orig_arch_string;
4132 ix86_tune_string = orig_tune_string;
4133 ix86_fpmath_string = orig_fpmath_string;
4135 /* Free up memory allocated to hold the strings */
4136 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
4137 if (option_strings[i])
4138 free (option_strings[i]);
4144 /* Hook to validate attribute((target("string"))). */
4147 ix86_valid_target_attribute_p (tree fndecl,
4148 tree ARG_UNUSED (name),
4150 int ARG_UNUSED (flags))
4152 struct cl_target_option cur_target;
4154 tree old_optimize = build_optimization_node ();
4155 tree new_target, new_optimize;
4156 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
4158 /* If the function changed the optimization levels as well as setting target
4159 options, start with the optimizations specified. */
4160 if (func_optimize && func_optimize != old_optimize)
4161 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
4163 /* The target attributes may also change some optimization flags, so update
4164 the optimization options if necessary. */
4165 cl_target_option_save (&cur_target);
4166 new_target = ix86_valid_target_attribute_tree (args);
4167 new_optimize = build_optimization_node ();
4174 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
4176 if (old_optimize != new_optimize)
4177 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
4180 cl_target_option_restore (&cur_target);
4182 if (old_optimize != new_optimize)
4183 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
4189 /* Hook to determine if one function can safely inline another. */
4192 ix86_can_inline_p (tree caller, tree callee)
4195 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
4196 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
4198 /* If callee has no option attributes, then it is ok to inline. */
4202 /* If caller has no option attributes, but callee does then it is not ok to
4204 else if (!caller_tree)
4209 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
4210 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
4212 /* Callee's isa options should a subset of the caller's, i.e. a SSE4 function
4213 can inline a SSE2 function but a SSE2 function can't inline a SSE4
4215 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
4216 != callee_opts->ix86_isa_flags)
4219 /* See if we have the same non-isa options. */
4220 else if (caller_opts->target_flags != callee_opts->target_flags)
4223 /* See if arch, tune, etc. are the same. */
4224 else if (caller_opts->arch != callee_opts->arch)
4227 else if (caller_opts->tune != callee_opts->tune)
4230 else if (caller_opts->fpmath != callee_opts->fpmath)
4233 else if (caller_opts->branch_cost != callee_opts->branch_cost)
4244 /* Remember the last target of ix86_set_current_function. */
4245 static GTY(()) tree ix86_previous_fndecl;
4247 /* Establish appropriate back-end context for processing the function
4248 FNDECL. The argument might be NULL to indicate processing at top
4249 level, outside of any function scope. */
4251 ix86_set_current_function (tree fndecl)
4253 /* Only change the context if the function changes. This hook is called
4254 several times in the course of compiling a function, and we don't want to
4255 slow things down too much or call target_reinit when it isn't safe. */
4256 if (fndecl && fndecl != ix86_previous_fndecl)
4258 tree old_tree = (ix86_previous_fndecl
4259 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
4262 tree new_tree = (fndecl
4263 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
4266 ix86_previous_fndecl = fndecl;
4267 if (old_tree == new_tree)
4272 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
4278 struct cl_target_option *def
4279 = TREE_TARGET_OPTION (target_option_current_node);
4281 cl_target_option_restore (def);
4288 /* Return true if this goes in large data/bss. */
4291 ix86_in_large_data_p (tree exp)
4293 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
4296 /* Functions are never large data. */
4297 if (TREE_CODE (exp) == FUNCTION_DECL)
4300 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
4302 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
4303 if (strcmp (section, ".ldata") == 0
4304 || strcmp (section, ".lbss") == 0)
4310 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
4312 /* If this is an incomplete type with size 0, then we can't put it
4313 in data because it might be too big when completed. */
4314 if (!size || size > ix86_section_threshold)
4321 /* Switch to the appropriate section for output of DECL.
4322 DECL is either a `VAR_DECL' node or a constant of some sort.
4323 RELOC indicates whether forming the initial value of DECL requires
4324 link-time relocations. */
4326 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
4330 x86_64_elf_select_section (tree decl, int reloc,
4331 unsigned HOST_WIDE_INT align)
4333 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4334 && ix86_in_large_data_p (decl))
4336 const char *sname = NULL;
4337 unsigned int flags = SECTION_WRITE;
4338 switch (categorize_decl_for_section (decl, reloc))
4343 case SECCAT_DATA_REL:
4344 sname = ".ldata.rel";
4346 case SECCAT_DATA_REL_LOCAL:
4347 sname = ".ldata.rel.local";
4349 case SECCAT_DATA_REL_RO:
4350 sname = ".ldata.rel.ro";
4352 case SECCAT_DATA_REL_RO_LOCAL:
4353 sname = ".ldata.rel.ro.local";
4357 flags |= SECTION_BSS;
4360 case SECCAT_RODATA_MERGE_STR:
4361 case SECCAT_RODATA_MERGE_STR_INIT:
4362 case SECCAT_RODATA_MERGE_CONST:
4366 case SECCAT_SRODATA:
4373 /* We don't split these for medium model. Place them into
4374 default sections and hope for best. */
4379 /* We might get called with string constants, but get_named_section
4380 doesn't like them as they are not DECLs. Also, we need to set
4381 flags in that case. */
4383 return get_section (sname, flags, NULL);
4384 return get_named_section (decl, sname, reloc);
4387 return default_elf_select_section (decl, reloc, align);
4390 /* Build up a unique section name, expressed as a
4391 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
4392 RELOC indicates whether the initial value of EXP requires
4393 link-time relocations. */
4395 static void ATTRIBUTE_UNUSED
4396 x86_64_elf_unique_section (tree decl, int reloc)
4398 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4399 && ix86_in_large_data_p (decl))
4401 const char *prefix = NULL;
4402 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
4403 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
4405 switch (categorize_decl_for_section (decl, reloc))
4408 case SECCAT_DATA_REL:
4409 case SECCAT_DATA_REL_LOCAL:
4410 case SECCAT_DATA_REL_RO:
4411 case SECCAT_DATA_REL_RO_LOCAL:
4412 prefix = one_only ? ".ld" : ".ldata";
4415 prefix = one_only ? ".lb" : ".lbss";
4418 case SECCAT_RODATA_MERGE_STR:
4419 case SECCAT_RODATA_MERGE_STR_INIT:
4420 case SECCAT_RODATA_MERGE_CONST:
4421 prefix = one_only ? ".lr" : ".lrodata";
4423 case SECCAT_SRODATA:
4430 /* We don't split these for medium model. Place them into
4431 default sections and hope for best. */
4436 const char *name, *linkonce;
4439 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4440 name = targetm.strip_name_encoding (name);
4442 /* If we're using one_only, then there needs to be a .gnu.linkonce
4443 prefix to the section name. */
4444 linkonce = one_only ? ".gnu.linkonce" : "";
4446 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4448 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4452 default_unique_section (decl, reloc);
4455 #ifdef COMMON_ASM_OP
4456 /* This says how to output assembler code to declare an
4457 uninitialized external linkage data object.
4459 For medium model x86-64 we need to use .largecomm opcode for
4462 x86_elf_aligned_common (FILE *file,
4463 const char *name, unsigned HOST_WIDE_INT size,
4466 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4467 && size > (unsigned int)ix86_section_threshold)
4468 fputs (".largecomm\t", file);
4470 fputs (COMMON_ASM_OP, file);
4471 assemble_name (file, name);
4472 fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
4473 size, align / BITS_PER_UNIT);
4477 /* Utility function for targets to use in implementing
4478 ASM_OUTPUT_ALIGNED_BSS. */
4481 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4482 const char *name, unsigned HOST_WIDE_INT size,
4485 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4486 && size > (unsigned int)ix86_section_threshold)
4487 switch_to_section (get_named_section (decl, ".lbss", 0));
4489 switch_to_section (bss_section);
4490 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4491 #ifdef ASM_DECLARE_OBJECT_NAME
4492 last_assemble_variable_decl = decl;
4493 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4495 /* Standard thing is just output label for the object. */
4496 ASM_OUTPUT_LABEL (file, name);
4497 #endif /* ASM_DECLARE_OBJECT_NAME */
4498 ASM_OUTPUT_SKIP (file, size ? size : 1);
4502 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4504 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4505 make the problem with not enough registers even worse. */
4506 #ifdef INSN_SCHEDULING
4508 flag_schedule_insns = 0;
4512 /* The Darwin libraries never set errno, so we might as well
4513 avoid calling them when that's the only reason we would. */
4514 flag_errno_math = 0;
4516 /* The default values of these switches depend on the TARGET_64BIT
4517 that is not known at this moment. Mark these values with 2 and
4518 let user the to override these. In case there is no command line option
4519 specifying them, we will set the defaults in override_options. */
4521 flag_omit_frame_pointer = 2;
4523 /* For -O2 and beyond, turn on -fzee for x86_64 target. */
4527 flag_pcc_struct_return = 2;
4528 flag_asynchronous_unwind_tables = 2;
4529 flag_vect_cost_model = 1;
4530 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4531 SUBTARGET_OPTIMIZATION_OPTIONS;
4535 /* Decide whether we must probe the stack before any space allocation
4536 on this target. It's essentially TARGET_STACK_PROBE except when
4537 -fstack-check causes the stack to be already probed differently. */
4540 ix86_target_stack_probe (void)
4542 /* Do not probe the stack twice if static stack checking is enabled. */
4543 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
4546 return TARGET_STACK_PROBE;
4549 /* Decide whether we can make a sibling call to a function. DECL is the
4550 declaration of the function being targeted by the call and EXP is the
4551 CALL_EXPR representing the call. */
4554 ix86_function_ok_for_sibcall (tree decl, tree exp)
4556 tree type, decl_or_type;
4559 /* If we are generating position-independent code, we cannot sibcall
4560 optimize any indirect call, or a direct call to a global function,
4561 as the PLT requires %ebx be live. */
4562 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4565 /* If we need to align the outgoing stack, then sibcalling would
4566 unalign the stack, which may break the called function. */
4567 if (ix86_minimum_incoming_stack_boundary (true)
4568 < PREFERRED_STACK_BOUNDARY)
4573 decl_or_type = decl;
4574 type = TREE_TYPE (decl);
4578 /* We're looking at the CALL_EXPR, we need the type of the function. */
4579 type = CALL_EXPR_FN (exp); /* pointer expression */
4580 type = TREE_TYPE (type); /* pointer type */
4581 type = TREE_TYPE (type); /* function type */
4582 decl_or_type = type;
4585 /* Check that the return value locations are the same. Like
4586 if we are returning floats on the 80387 register stack, we cannot
4587 make a sibcall from a function that doesn't return a float to a
4588 function that does or, conversely, from a function that does return
4589 a float to a function that doesn't; the necessary stack adjustment
4590 would not be executed. This is also the place we notice
4591 differences in the return value ABI. Note that it is ok for one
4592 of the functions to have void return type as long as the return
4593 value of the other is passed in a register. */
4594 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4595 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4597 if (STACK_REG_P (a) || STACK_REG_P (b))
4599 if (!rtx_equal_p (a, b))
4602 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4604 else if (!rtx_equal_p (a, b))
4609 /* The SYSV ABI has more call-clobbered registers;
4610 disallow sibcalls from MS to SYSV. */
4611 if (cfun->machine->call_abi == MS_ABI
4612 && ix86_function_type_abi (type) == SYSV_ABI)
4617 /* If this call is indirect, we'll need to be able to use a
4618 call-clobbered register for the address of the target function.
4619 Make sure that all such registers are not used for passing
4620 parameters. Note that DLLIMPORT functions are indirect. */
4622 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4624 if (ix86_function_regparm (type, NULL) >= 3)
4626 /* ??? Need to count the actual number of registers to be used,
4627 not the possible number of registers. Fix later. */
4633 /* Otherwise okay. That also includes certain types of indirect calls. */
4637 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
4638 and "sseregparm" calling convention attributes;
4639 arguments as in struct attribute_spec.handler. */
4642 ix86_handle_cconv_attribute (tree *node, tree name,
4644 int flags ATTRIBUTE_UNUSED,
4647 if (TREE_CODE (*node) != FUNCTION_TYPE
4648 && TREE_CODE (*node) != METHOD_TYPE
4649 && TREE_CODE (*node) != FIELD_DECL
4650 && TREE_CODE (*node) != TYPE_DECL)
4652 warning (OPT_Wattributes, "%qE attribute only applies to functions",
4654 *no_add_attrs = true;
4658 /* Can combine regparm with all attributes but fastcall. */
4659 if (is_attribute_p ("regparm", name))
4663 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4665 error ("fastcall and regparm attributes are not compatible");
4668 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4670 error ("regparam and thiscall attributes are not compatible");
4673 cst = TREE_VALUE (args);
4674 if (TREE_CODE (cst) != INTEGER_CST)
4676 warning (OPT_Wattributes,
4677 "%qE attribute requires an integer constant argument",
4679 *no_add_attrs = true;
4681 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4683 warning (OPT_Wattributes, "argument to %qE attribute larger than %d",
4685 *no_add_attrs = true;
4693 /* Do not warn when emulating the MS ABI. */
4694 if ((TREE_CODE (*node) != FUNCTION_TYPE
4695 && TREE_CODE (*node) != METHOD_TYPE)
4696 || ix86_function_type_abi (*node) != MS_ABI)
4697 warning (OPT_Wattributes, "%qE attribute ignored",
4699 *no_add_attrs = true;
4703 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4704 if (is_attribute_p ("fastcall", name))
4706 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4708 error ("fastcall and cdecl attributes are not compatible");
4710 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4712 error ("fastcall and stdcall attributes are not compatible");
4714 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4716 error ("fastcall and regparm attributes are not compatible");
4718 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4720 error ("fastcall and thiscall attributes are not compatible");
4724 /* Can combine stdcall with fastcall (redundant), regparm and
4726 else if (is_attribute_p ("stdcall", name))
4728 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4730 error ("stdcall and cdecl attributes are not compatible");
4732 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4734 error ("stdcall and fastcall attributes are not compatible");
4736 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4738 error ("stdcall and thiscall attributes are not compatible");
4742 /* Can combine cdecl with regparm and sseregparm. */
4743 else if (is_attribute_p ("cdecl", name))
4745 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4747 error ("stdcall and cdecl attributes are not compatible");
4749 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4751 error ("fastcall and cdecl attributes are not compatible");
4753 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node)))
4755 error ("cdecl and thiscall attributes are not compatible");
4758 else if (is_attribute_p ("thiscall", name))
4760 if (TREE_CODE (*node) != METHOD_TYPE && pedantic)
4761 warning (OPT_Wattributes, "%qE attribute is used for none class-method",
4763 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4765 error ("stdcall and thiscall attributes are not compatible");
4767 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4769 error ("fastcall and thiscall attributes are not compatible");
4771 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4773 error ("cdecl and thiscall attributes are not compatible");
4777 /* Can combine sseregparm with all attributes. */
4782 /* Return 0 if the attributes for two types are incompatible, 1 if they
4783 are compatible, and 2 if they are nearly compatible (which causes a
4784 warning to be generated). */
4787 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4789 /* Check for mismatch of non-default calling convention. */
4790 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4792 if (TREE_CODE (type1) != FUNCTION_TYPE
4793 && TREE_CODE (type1) != METHOD_TYPE)
4796 /* Check for mismatched fastcall/regparm types. */
4797 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4798 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4799 || (ix86_function_regparm (type1, NULL)
4800 != ix86_function_regparm (type2, NULL)))
4803 /* Check for mismatched sseregparm types. */
4804 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4805 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4808 /* Check for mismatched thiscall types. */
4809 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1))
4810 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2)))
4813 /* Check for mismatched return types (cdecl vs stdcall). */
4814 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4815 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4821 /* Return the regparm value for a function with the indicated TYPE and DECL.
4822 DECL may be NULL when calling function indirectly
4823 or considering a libcall. */
4826 ix86_function_regparm (const_tree type, const_tree decl)
4832 return (ix86_function_type_abi (type) == SYSV_ABI
4833 ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
4835 regparm = ix86_regparm;
4836 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4839 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4843 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4846 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
4849 /* Use register calling convention for local functions when possible. */
4851 && TREE_CODE (decl) == FUNCTION_DECL
4853 && !(profile_flag && !flag_fentry))
4855 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4856 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE (decl));
4859 int local_regparm, globals = 0, regno;
4861 /* Make sure no regparm register is taken by a
4862 fixed register variable. */
4863 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4864 if (fixed_regs[local_regparm])
4867 /* We don't want to use regparm(3) for nested functions as
4868 these use a static chain pointer in the third argument. */
4869 if (local_regparm == 3 && DECL_STATIC_CHAIN (decl))
4872 /* Each fixed register usage increases register pressure,
4873 so less registers should be used for argument passing.
4874 This functionality can be overriden by an explicit
4876 for (regno = 0; regno <= DI_REG; regno++)
4877 if (fixed_regs[regno])
4881 = globals < local_regparm ? local_regparm - globals : 0;
4883 if (local_regparm > regparm)
4884 regparm = local_regparm;
4891 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4892 DFmode (2) arguments in SSE registers for a function with the
4893 indicated TYPE and DECL. DECL may be NULL when calling function
4894 indirectly or considering a libcall. Otherwise return 0. */
4897 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4899 gcc_assert (!TARGET_64BIT);
4901 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4902 by the sseregparm attribute. */
4903 if (TARGET_SSEREGPARM
4904 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4911 error ("Calling %qD with attribute sseregparm without "
4912 "SSE/SSE2 enabled", decl);
4914 error ("Calling %qT with attribute sseregparm without "
4915 "SSE/SSE2 enabled", type);
4923 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4924 (and DFmode for SSE2) arguments in SSE registers. */
4925 if (decl && TARGET_SSE_MATH && optimize
4926 && !(profile_flag && !flag_fentry))
4928 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4929 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4931 return TARGET_SSE2 ? 2 : 1;
4937 /* Return true if EAX is live at the start of the function. Used by
4938 ix86_expand_prologue to determine if we need special help before
4939 calling allocate_stack_worker. */
4942 ix86_eax_live_at_start_p (void)
4944 /* Cheat. Don't bother working forward from ix86_function_regparm
4945 to the function type to whether an actual argument is located in
4946 eax. Instead just look at cfg info, which is still close enough
4947 to correct at this point. This gives false positives for broken
4948 functions that might use uninitialized data that happens to be
4949 allocated in eax, but who cares? */
4950 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4953 /* Value is the number of bytes of arguments automatically
4954 popped when returning from a subroutine call.
4955 FUNDECL is the declaration node of the function (as a tree),
4956 FUNTYPE is the data type of the function (as a tree),
4957 or for a library call it is an identifier node for the subroutine name.
4958 SIZE is the number of bytes of arguments passed on the stack.
4960 On the 80386, the RTD insn may be used to pop them if the number
4961 of args is fixed, but if the number is variable then the caller
4962 must pop them all. RTD can't be used for library calls now
4963 because the library is compiled with the Unix compiler.
4964 Use of RTD is a selectable option, since it is incompatible with
4965 standard Unix calling sequences. If the option is not selected,
4966 the caller must always pop the args.
4968 The attribute stdcall is equivalent to RTD on a per module basis. */
4971 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4975 /* None of the 64-bit ABIs pop arguments. */
4979 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4981 /* Cdecl functions override -mrtd, and never pop the stack. */
4982 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4984 /* Stdcall and fastcall functions will pop the stack if not
4986 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4987 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))
4988 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype)))
4991 if (rtd && ! stdarg_p (funtype))
4995 /* Lose any fake structure return argument if it is passed on the stack. */
4996 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4997 && !KEEP_AGGREGATE_RETURN_POINTER)
4999 int nregs = ix86_function_regparm (funtype, fundecl);
5001 return GET_MODE_SIZE (Pmode);
5007 /* Argument support functions. */
5009 /* Return true when register may be used to pass function parameters. */
5011 ix86_function_arg_regno_p (int regno)
5014 const int *parm_regs;
5019 return (regno < REGPARM_MAX
5020 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
5022 return (regno < REGPARM_MAX
5023 || (TARGET_MMX && MMX_REGNO_P (regno)
5024 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
5025 || (TARGET_SSE && SSE_REGNO_P (regno)
5026 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
5031 if (SSE_REGNO_P (regno) && TARGET_SSE)
5036 if (TARGET_SSE && SSE_REGNO_P (regno)
5037 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
5041 /* TODO: The function should depend on current function ABI but
5042 builtins.c would need updating then. Therefore we use the
5045 /* RAX is used as hidden argument to va_arg functions. */
5046 if (ix86_abi == SYSV_ABI && regno == AX_REG)
5049 if (ix86_abi == MS_ABI)
5050 parm_regs = x86_64_ms_abi_int_parameter_registers;
5052 parm_regs = x86_64_int_parameter_registers;
5053 for (i = 0; i < (ix86_abi == MS_ABI
5054 ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
5055 if (regno == parm_regs[i])
5060 /* Return if we do not know how to pass TYPE solely in registers. */
5063 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
5065 if (must_pass_in_stack_var_size_or_pad (mode, type))
5068 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
5069 The layout_type routine is crafty and tries to trick us into passing
5070 currently unsupported vector types on the stack by using TImode. */
5071 return (!TARGET_64BIT && mode == TImode
5072 && type && TREE_CODE (type) != VECTOR_TYPE);
5075 /* It returns the size, in bytes, of the area reserved for arguments passed
5076 in registers for the function represented by fndecl dependent to the used
5079 ix86_reg_parm_stack_space (const_tree fndecl)
5081 enum calling_abi call_abi = SYSV_ABI;
5082 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
5083 call_abi = ix86_function_abi (fndecl);
5085 call_abi = ix86_function_type_abi (fndecl);
5086 if (call_abi == MS_ABI)
5091 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
5094 ix86_function_type_abi (const_tree fntype)
5096 if (TARGET_64BIT && fntype != NULL)
5098 enum calling_abi abi = ix86_abi;
5099 if (abi == SYSV_ABI)
5101 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
5104 else if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
5112 ix86_function_ms_hook_prologue (const_tree fntype)
5114 if (fntype && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fntype)))
5116 if (decl_function_context (fntype) != NULL_TREE)
5118 error_at (DECL_SOURCE_LOCATION (fntype),
5119 "ms_hook_prologue is not compatible with nested function");
5127 static enum calling_abi
5128 ix86_function_abi (const_tree fndecl)
5132 return ix86_function_type_abi (TREE_TYPE (fndecl));
5135 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
5138 ix86_cfun_abi (void)
5140 if (! cfun || ! TARGET_64BIT)
5142 return cfun->machine->call_abi;
5145 /* Write the extra assembler code needed to declare a function properly. */
5148 ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
5151 bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
5155 int i, filler_count = (TARGET_64BIT ? 32 : 16);
5156 unsigned int filler_cc = 0xcccccccc;
5158 for (i = 0; i < filler_count; i += 4)
5159 fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
5162 ASM_OUTPUT_LABEL (asm_out_file, fname);
5164 /* Output magic byte marker, if hot-patch attribute is set.
5165 For x86 case frame-pointer prologue will be emitted in
5170 /* leaq [%rsp + 0], %rsp */
5171 asm_fprintf (asm_out_file, ASM_BYTE
5172 "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n");
5174 /* movl.s %edi, %edi. */
5175 asm_fprintf (asm_out_file, ASM_BYTE "0x8b, 0xff\n");
5180 extern void init_regs (void);
5182 /* Implementation of call abi switching target hook. Specific to FNDECL
5183 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
5184 for more details. */
5186 ix86_call_abi_override (const_tree fndecl)
5188 if (fndecl == NULL_TREE)
5189 cfun->machine->call_abi = ix86_abi;
5191 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
5194 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
5195 re-initialization of init_regs each time we switch function context since
5196 this is needed only during RTL expansion. */
5198 ix86_maybe_switch_abi (void)
5201 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
5205 /* Initialize a variable CUM of type CUMULATIVE_ARGS
5206 for a call to a function whose data type is FNTYPE.
5207 For a library call, FNTYPE is 0. */
5210 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
5211 tree fntype, /* tree ptr for function decl */
5212 rtx libname, /* SYMBOL_REF of library name or 0 */
5215 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
5216 memset (cum, 0, sizeof (*cum));
5219 cum->call_abi = ix86_function_abi (fndecl);
5221 cum->call_abi = ix86_function_type_abi (fntype);
5222 /* Set up the number of registers to use for passing arguments. */
5224 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
5225 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
5226 "or subtarget optimization implying it");
5227 cum->nregs = ix86_regparm;
5230 cum->nregs = (cum->call_abi == SYSV_ABI
5231 ? X86_64_REGPARM_MAX
5232 : X86_64_MS_REGPARM_MAX);
5236 cum->sse_nregs = SSE_REGPARM_MAX;
5239 cum->sse_nregs = (cum->call_abi == SYSV_ABI
5240 ? X86_64_SSE_REGPARM_MAX
5241 : X86_64_MS_SSE_REGPARM_MAX);
5245 cum->mmx_nregs = MMX_REGPARM_MAX;
5246 cum->warn_avx = true;
5247 cum->warn_sse = true;
5248 cum->warn_mmx = true;
5250 /* Because type might mismatch in between caller and callee, we need to
5251 use actual type of function for local calls.
5252 FIXME: cgraph_analyze can be told to actually record if function uses
5253 va_start so for local functions maybe_vaarg can be made aggressive
5255 FIXME: once typesytem is fixed, we won't need this code anymore. */
5257 fntype = TREE_TYPE (fndecl);
5258 cum->maybe_vaarg = (fntype
5259 ? (!prototype_p (fntype) || stdarg_p (fntype))
5264 /* If there are variable arguments, then we won't pass anything
5265 in registers in 32-bit mode. */
5266 if (stdarg_p (fntype))
5277 /* Use ecx and edx registers if function has fastcall attribute,
5278 else look for regparm information. */
5281 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
5284 cum->fastcall = 1; /* Same first register as in fastcall. */
5286 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
5292 cum->nregs = ix86_function_regparm (fntype, fndecl);
5295 /* Set up the number of SSE registers used for passing SFmode
5296 and DFmode arguments. Warn for mismatching ABI. */
5297 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
5301 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
5302 But in the case of vector types, it is some vector mode.
5304 When we have only some of our vector isa extensions enabled, then there
5305 are some modes for which vector_mode_supported_p is false. For these
5306 modes, the generic vector support in gcc will choose some non-vector mode
5307 in order to implement the type. By computing the natural mode, we'll
5308 select the proper ABI location for the operand and not depend on whatever
5309 the middle-end decides to do with these vector types.
5311 The midde-end can't deal with the vector types > 16 bytes. In this
5312 case, we return the original mode and warn ABI change if CUM isn't
5315 static enum machine_mode
5316 type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum)
5318 enum machine_mode mode = TYPE_MODE (type);
5320 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
5322 HOST_WIDE_INT size = int_size_in_bytes (type);
5323 if ((size == 8 || size == 16 || size == 32)
5324 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
5325 && TYPE_VECTOR_SUBPARTS (type) > 1)
5327 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
5329 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
5330 mode = MIN_MODE_VECTOR_FLOAT;
5332 mode = MIN_MODE_VECTOR_INT;
5334 /* Get the mode which has this inner mode and number of units. */
5335 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
5336 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
5337 && GET_MODE_INNER (mode) == innermode)
5339 if (size == 32 && !TARGET_AVX)
5341 static bool warnedavx;
5348 warning (0, "AVX vector argument without AVX "
5349 "enabled changes the ABI");
5351 return TYPE_MODE (type);
5364 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
5365 this may not agree with the mode that the type system has chosen for the
5366 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
5367 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
5370 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
5375 if (orig_mode != BLKmode)
5376 tmp = gen_rtx_REG (orig_mode, regno);
5379 tmp = gen_rtx_REG (mode, regno);
5380 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
5381 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
5387 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
5388 of this code is to classify each 8bytes of incoming argument by the register
5389 class and assign registers accordingly. */
5391 /* Return the union class of CLASS1 and CLASS2.
5392 See the x86-64 PS ABI for details. */
5394 static enum x86_64_reg_class
5395 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
5397 /* Rule #1: If both classes are equal, this is the resulting class. */
5398 if (class1 == class2)
5401 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
5403 if (class1 == X86_64_NO_CLASS)
5405 if (class2 == X86_64_NO_CLASS)
5408 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
5409 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
5410 return X86_64_MEMORY_CLASS;
5412 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
5413 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
5414 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
5415 return X86_64_INTEGERSI_CLASS;
5416 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
5417 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
5418 return X86_64_INTEGER_CLASS;
5420 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
5422 if (class1 == X86_64_X87_CLASS
5423 || class1 == X86_64_X87UP_CLASS
5424 || class1 == X86_64_COMPLEX_X87_CLASS
5425 || class2 == X86_64_X87_CLASS
5426 || class2 == X86_64_X87UP_CLASS
5427 || class2 == X86_64_COMPLEX_X87_CLASS)
5428 return X86_64_MEMORY_CLASS;
5430 /* Rule #6: Otherwise class SSE is used. */
5431 return X86_64_SSE_CLASS;
5434 /* Classify the argument of type TYPE and mode MODE.
5435 CLASSES will be filled by the register class used to pass each word
5436 of the operand. The number of words is returned. In case the parameter
5437 should be passed in memory, 0 is returned. As a special case for zero
5438 sized containers, classes[0] will be NO_CLASS and 1 is returned.
5440 BIT_OFFSET is used internally for handling records and specifies offset
5441 of the offset in bits modulo 256 to avoid overflow cases.
5443 See the x86-64 PS ABI for details.
5447 classify_argument (enum machine_mode mode, const_tree type,
5448 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
5450 HOST_WIDE_INT bytes =
5451 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5452 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5454 /* Variable sized entities are always passed/returned in memory. */
5458 if (mode != VOIDmode
5459 && targetm.calls.must_pass_in_stack (mode, type))
5462 if (type && AGGREGATE_TYPE_P (type))
5466 enum x86_64_reg_class subclasses[MAX_CLASSES];
5468 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
5472 for (i = 0; i < words; i++)
5473 classes[i] = X86_64_NO_CLASS;
5475 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
5476 signalize memory class, so handle it as special case. */
5479 classes[0] = X86_64_NO_CLASS;
5483 /* Classify each field of record and merge classes. */
5484 switch (TREE_CODE (type))
5487 /* And now merge the fields of structure. */
5488 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5490 if (TREE_CODE (field) == FIELD_DECL)
5494 if (TREE_TYPE (field) == error_mark_node)
5497 /* Bitfields are always classified as integer. Handle them
5498 early, since later code would consider them to be
5499 misaligned integers. */
5500 if (DECL_BIT_FIELD (field))
5502 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5503 i < ((int_bit_position (field) + (bit_offset % 64))
5504 + tree_low_cst (DECL_SIZE (field), 0)
5507 merge_classes (X86_64_INTEGER_CLASS,
5514 type = TREE_TYPE (field);
5516 /* Flexible array member is ignored. */
5517 if (TYPE_MODE (type) == BLKmode
5518 && TREE_CODE (type) == ARRAY_TYPE
5519 && TYPE_SIZE (type) == NULL_TREE
5520 && TYPE_DOMAIN (type) != NULL_TREE
5521 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5526 if (!warned && warn_psabi)
5529 inform (input_location,
5530 "The ABI of passing struct with"
5531 " a flexible array member has"
5532 " changed in GCC 4.4");
5536 num = classify_argument (TYPE_MODE (type), type,
5538 (int_bit_position (field)
5539 + bit_offset) % 256);
5542 pos = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5543 for (i = 0; i < num && (i + pos) < words; i++)
5545 merge_classes (subclasses[i], classes[i + pos]);
5552 /* Arrays are handled as small records. */
5555 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5556 TREE_TYPE (type), subclasses, bit_offset);
5560 /* The partial classes are now full classes. */
5561 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5562 subclasses[0] = X86_64_SSE_CLASS;
5563 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5564 && !((bit_offset % 64) == 0 && bytes == 4))
5565 subclasses[0] = X86_64_INTEGER_CLASS;
5567 for (i = 0; i < words; i++)
5568 classes[i] = subclasses[i % num];
5573 case QUAL_UNION_TYPE:
5574 /* Unions are similar to RECORD_TYPE but offset is always 0.
5576 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5578 if (TREE_CODE (field) == FIELD_DECL)
5582 if (TREE_TYPE (field) == error_mark_node)
5585 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5586 TREE_TYPE (field), subclasses,
5590 for (i = 0; i < num; i++)
5591 classes[i] = merge_classes (subclasses[i], classes[i]);
5602 /* When size > 16 bytes, if the first one isn't
5603 X86_64_SSE_CLASS or any other ones aren't
5604 X86_64_SSEUP_CLASS, everything should be passed in
5606 if (classes[0] != X86_64_SSE_CLASS)
5609 for (i = 1; i < words; i++)
5610 if (classes[i] != X86_64_SSEUP_CLASS)
5614 /* Final merger cleanup. */
5615 for (i = 0; i < words; i++)
5617 /* If one class is MEMORY, everything should be passed in
5619 if (classes[i] == X86_64_MEMORY_CLASS)
5622 /* The X86_64_SSEUP_CLASS should be always preceded by
5623 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5624 if (classes[i] == X86_64_SSEUP_CLASS
5625 && classes[i - 1] != X86_64_SSE_CLASS
5626 && classes[i - 1] != X86_64_SSEUP_CLASS)
5628 /* The first one should never be X86_64_SSEUP_CLASS. */
5629 gcc_assert (i != 0);
5630 classes[i] = X86_64_SSE_CLASS;
5633 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5634 everything should be passed in memory. */
5635 if (classes[i] == X86_64_X87UP_CLASS
5636 && (classes[i - 1] != X86_64_X87_CLASS))
5640 /* The first one should never be X86_64_X87UP_CLASS. */
5641 gcc_assert (i != 0);
5642 if (!warned && warn_psabi)
5645 inform (input_location,
5646 "The ABI of passing union with long double"
5647 " has changed in GCC 4.4");
5655 /* Compute alignment needed. We align all types to natural boundaries with
5656 exception of XFmode that is aligned to 64bits. */
5657 if (mode != VOIDmode && mode != BLKmode)
5659 int mode_alignment = GET_MODE_BITSIZE (mode);
5662 mode_alignment = 128;
5663 else if (mode == XCmode)
5664 mode_alignment = 256;
5665 if (COMPLEX_MODE_P (mode))
5666 mode_alignment /= 2;
5667 /* Misaligned fields are always returned in memory. */
5668 if (bit_offset % mode_alignment)
5672 /* for V1xx modes, just use the base mode */
5673 if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
5674 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5675 mode = GET_MODE_INNER (mode);
5677 /* Classification of atomic types. */
5682 classes[0] = X86_64_SSE_CLASS;
5685 classes[0] = X86_64_SSE_CLASS;
5686 classes[1] = X86_64_SSEUP_CLASS;
5696 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5700 classes[0] = X86_64_INTEGERSI_CLASS;
5703 else if (size <= 64)
5705 classes[0] = X86_64_INTEGER_CLASS;
5708 else if (size <= 64+32)
5710 classes[0] = X86_64_INTEGER_CLASS;
5711 classes[1] = X86_64_INTEGERSI_CLASS;
5714 else if (size <= 64+64)
5716 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5724 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5728 /* OImode shouldn't be used directly. */
5733 if (!(bit_offset % 64))
5734 classes[0] = X86_64_SSESF_CLASS;
5736 classes[0] = X86_64_SSE_CLASS;
5739 classes[0] = X86_64_SSEDF_CLASS;
5742 classes[0] = X86_64_X87_CLASS;
5743 classes[1] = X86_64_X87UP_CLASS;
5746 classes[0] = X86_64_SSE_CLASS;
5747 classes[1] = X86_64_SSEUP_CLASS;
5750 classes[0] = X86_64_SSE_CLASS;
5751 if (!(bit_offset % 64))
5757 if (!warned && warn_psabi)
5760 inform (input_location,
5761 "The ABI of passing structure with complex float"
5762 " member has changed in GCC 4.4");
5764 classes[1] = X86_64_SSESF_CLASS;
5768 classes[0] = X86_64_SSEDF_CLASS;
5769 classes[1] = X86_64_SSEDF_CLASS;
5772 classes[0] = X86_64_COMPLEX_X87_CLASS;
5775 /* This modes is larger than 16 bytes. */
5783 classes[0] = X86_64_SSE_CLASS;
5784 classes[1] = X86_64_SSEUP_CLASS;
5785 classes[2] = X86_64_SSEUP_CLASS;
5786 classes[3] = X86_64_SSEUP_CLASS;
5794 classes[0] = X86_64_SSE_CLASS;
5795 classes[1] = X86_64_SSEUP_CLASS;
5803 classes[0] = X86_64_SSE_CLASS;
5809 gcc_assert (VECTOR_MODE_P (mode));
5814 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5816 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5817 classes[0] = X86_64_INTEGERSI_CLASS;
5819 classes[0] = X86_64_INTEGER_CLASS;
5820 classes[1] = X86_64_INTEGER_CLASS;
5821 return 1 + (bytes > 8);
5825 /* Examine the argument and return set number of register required in each
5826 class. Return 0 iff parameter should be passed in memory. */
5828 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5829 int *int_nregs, int *sse_nregs)
5831 enum x86_64_reg_class regclass[MAX_CLASSES];
5832 int n = classify_argument (mode, type, regclass, 0);
5838 for (n--; n >= 0; n--)
5839 switch (regclass[n])
5841 case X86_64_INTEGER_CLASS:
5842 case X86_64_INTEGERSI_CLASS:
5845 case X86_64_SSE_CLASS:
5846 case X86_64_SSESF_CLASS:
5847 case X86_64_SSEDF_CLASS:
5850 case X86_64_NO_CLASS:
5851 case X86_64_SSEUP_CLASS:
5853 case X86_64_X87_CLASS:
5854 case X86_64_X87UP_CLASS:
5858 case X86_64_COMPLEX_X87_CLASS:
5859 return in_return ? 2 : 0;
5860 case X86_64_MEMORY_CLASS:
5866 /* Construct container for the argument used by GCC interface. See
5867 FUNCTION_ARG for the detailed description. */
5870 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5871 const_tree type, int in_return, int nintregs, int nsseregs,
5872 const int *intreg, int sse_regno)
5874 /* The following variables hold the static issued_error state. */
5875 static bool issued_sse_arg_error;
5876 static bool issued_sse_ret_error;
5877 static bool issued_x87_ret_error;
5879 enum machine_mode tmpmode;
5881 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5882 enum x86_64_reg_class regclass[MAX_CLASSES];
5886 int needed_sseregs, needed_intregs;
5887 rtx exp[MAX_CLASSES];
5890 n = classify_argument (mode, type, regclass, 0);
5893 if (!examine_argument (mode, type, in_return, &needed_intregs,
5896 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5899 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5900 some less clueful developer tries to use floating-point anyway. */
5901 if (needed_sseregs && !TARGET_SSE)
5905 if (!issued_sse_ret_error)
5907 error ("SSE register return with SSE disabled");
5908 issued_sse_ret_error = true;
5911 else if (!issued_sse_arg_error)
5913 error ("SSE register argument with SSE disabled");
5914 issued_sse_arg_error = true;
5919 /* Likewise, error if the ABI requires us to return values in the
5920 x87 registers and the user specified -mno-80387. */
5921 if (!TARGET_80387 && in_return)
5922 for (i = 0; i < n; i++)
5923 if (regclass[i] == X86_64_X87_CLASS
5924 || regclass[i] == X86_64_X87UP_CLASS
5925 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5927 if (!issued_x87_ret_error)
5929 error ("x87 register return with x87 disabled");
5930 issued_x87_ret_error = true;
5935 /* First construct simple cases. Avoid SCmode, since we want to use
5936 single register to pass this type. */
5937 if (n == 1 && mode != SCmode)
5938 switch (regclass[0])
5940 case X86_64_INTEGER_CLASS:
5941 case X86_64_INTEGERSI_CLASS:
5942 return gen_rtx_REG (mode, intreg[0]);
5943 case X86_64_SSE_CLASS:
5944 case X86_64_SSESF_CLASS:
5945 case X86_64_SSEDF_CLASS:
5946 if (mode != BLKmode)
5947 return gen_reg_or_parallel (mode, orig_mode,
5948 SSE_REGNO (sse_regno));
5950 case X86_64_X87_CLASS:
5951 case X86_64_COMPLEX_X87_CLASS:
5952 return gen_rtx_REG (mode, FIRST_STACK_REG);
5953 case X86_64_NO_CLASS:
5954 /* Zero sized array, struct or class. */
5959 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5960 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5961 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5963 && regclass[0] == X86_64_SSE_CLASS
5964 && regclass[1] == X86_64_SSEUP_CLASS
5965 && regclass[2] == X86_64_SSEUP_CLASS
5966 && regclass[3] == X86_64_SSEUP_CLASS
5968 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5971 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5972 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5973 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5974 && regclass[1] == X86_64_INTEGER_CLASS
5975 && (mode == CDImode || mode == TImode || mode == TFmode)
5976 && intreg[0] + 1 == intreg[1])
5977 return gen_rtx_REG (mode, intreg[0]);
5979 /* Otherwise figure out the entries of the PARALLEL. */
5980 for (i = 0; i < n; i++)
5984 switch (regclass[i])
5986 case X86_64_NO_CLASS:
5988 case X86_64_INTEGER_CLASS:
5989 case X86_64_INTEGERSI_CLASS:
5990 /* Merge TImodes on aligned occasions here too. */
5991 if (i * 8 + 8 > bytes)
5992 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5993 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5997 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5998 if (tmpmode == BLKmode)
6000 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6001 gen_rtx_REG (tmpmode, *intreg),
6005 case X86_64_SSESF_CLASS:
6006 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6007 gen_rtx_REG (SFmode,
6008 SSE_REGNO (sse_regno)),
6012 case X86_64_SSEDF_CLASS:
6013 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6014 gen_rtx_REG (DFmode,
6015 SSE_REGNO (sse_regno)),
6019 case X86_64_SSE_CLASS:
6027 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
6037 && regclass[1] == X86_64_SSEUP_CLASS
6038 && regclass[2] == X86_64_SSEUP_CLASS
6039 && regclass[3] == X86_64_SSEUP_CLASS);
6046 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
6047 gen_rtx_REG (tmpmode,
6048 SSE_REGNO (sse_regno)),
6057 /* Empty aligned struct, union or class. */
6061 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
6062 for (i = 0; i < nexps; i++)
6063 XVECEXP (ret, 0, i) = exp [i];
6067 /* Update the data in CUM to advance over an argument of mode MODE
6068 and data type TYPE. (TYPE is null for libcalls where that information
6069 may not be available.) */
6072 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6073 const_tree type, HOST_WIDE_INT bytes,
6074 HOST_WIDE_INT words)
6090 cum->words += words;
6091 cum->nregs -= words;
6092 cum->regno += words;
6094 if (cum->nregs <= 0)
6102 /* OImode shouldn't be used directly. */
6106 if (cum->float_in_sse < 2)
6109 if (cum->float_in_sse < 1)
6126 if (!type || !AGGREGATE_TYPE_P (type))
6128 cum->sse_words += words;
6129 cum->sse_nregs -= 1;
6130 cum->sse_regno += 1;
6131 if (cum->sse_nregs <= 0)
6145 if (!type || !AGGREGATE_TYPE_P (type))
6147 cum->mmx_words += words;
6148 cum->mmx_nregs -= 1;
6149 cum->mmx_regno += 1;
6150 if (cum->mmx_nregs <= 0)
6161 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6162 const_tree type, HOST_WIDE_INT words, bool named)
6164 int int_nregs, sse_nregs;
6166 /* Unnamed 256bit vector mode parameters are passed on stack. */
6167 if (!named && VALID_AVX256_REG_MODE (mode))
6170 if (examine_argument (mode, type, 0, &int_nregs, &sse_nregs)
6171 && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
6173 cum->nregs -= int_nregs;
6174 cum->sse_nregs -= sse_nregs;
6175 cum->regno += int_nregs;
6176 cum->sse_regno += sse_nregs;
6180 int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD;
6181 cum->words = (cum->words + align - 1) & ~(align - 1);
6182 cum->words += words;
6187 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
6188 HOST_WIDE_INT words)
6190 /* Otherwise, this should be passed indirect. */
6191 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
6193 cum->words += words;
6201 /* Update the data in CUM to advance over an argument of mode MODE and
6202 data type TYPE. (TYPE is null for libcalls where that information
6203 may not be available.) */
6206 ix86_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6207 const_tree type, bool named)
6209 HOST_WIDE_INT bytes, words;
6211 if (mode == BLKmode)
6212 bytes = int_size_in_bytes (type);
6214 bytes = GET_MODE_SIZE (mode);
6215 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6218 mode = type_natural_mode (type, NULL);
6220 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6221 function_arg_advance_ms_64 (cum, bytes, words);
6222 else if (TARGET_64BIT)
6223 function_arg_advance_64 (cum, mode, type, words, named);
6225 function_arg_advance_32 (cum, mode, type, bytes, words);
6228 /* Define where to put the arguments to a function.
6229 Value is zero to push the argument on the stack,
6230 or a hard register in which to store the argument.
6232 MODE is the argument's machine mode.
6233 TYPE is the data type of the argument (as a tree).
6234 This is null for libcalls where that information may
6236 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6237 the preceding args and about the function being called.
6238 NAMED is nonzero if this argument is a named parameter
6239 (otherwise it is an extra parameter matching an ellipsis). */
6242 function_arg_32 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6243 enum machine_mode orig_mode, const_tree type,
6244 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
6246 static bool warnedsse, warnedmmx;
6248 /* Avoid the AL settings for the Unix64 ABI. */
6249 if (mode == VOIDmode)
6265 if (words <= cum->nregs)
6267 int regno = cum->regno;
6269 /* Fastcall allocates the first two DWORD (SImode) or
6270 smaller arguments to ECX and EDX if it isn't an
6276 || (type && AGGREGATE_TYPE_P (type)))
6279 /* ECX not EAX is the first allocated register. */
6280 if (regno == AX_REG)
6283 return gen_rtx_REG (mode, regno);
6288 if (cum->float_in_sse < 2)
6291 if (cum->float_in_sse < 1)
6295 /* In 32bit, we pass TImode in xmm registers. */
6302 if (!type || !AGGREGATE_TYPE_P (type))
6304 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
6307 warning (0, "SSE vector argument without SSE enabled "
6311 return gen_reg_or_parallel (mode, orig_mode,
6312 cum->sse_regno + FIRST_SSE_REG);
6317 /* OImode shouldn't be used directly. */
6326 if (!type || !AGGREGATE_TYPE_P (type))
6329 return gen_reg_or_parallel (mode, orig_mode,
6330 cum->sse_regno + FIRST_SSE_REG);
6340 if (!type || !AGGREGATE_TYPE_P (type))
6342 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
6345 warning (0, "MMX vector argument without MMX enabled "
6349 return gen_reg_or_parallel (mode, orig_mode,
6350 cum->mmx_regno + FIRST_MMX_REG);
6359 function_arg_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6360 enum machine_mode orig_mode, const_tree type, bool named)
6362 /* Handle a hidden AL argument containing number of registers
6363 for varargs x86-64 functions. */
6364 if (mode == VOIDmode)
6365 return GEN_INT (cum->maybe_vaarg
6366 ? (cum->sse_nregs < 0
6367 ? X86_64_SSE_REGPARM_MAX
6382 /* Unnamed 256bit vector mode parameters are passed on stack. */
6388 return construct_container (mode, orig_mode, type, 0, cum->nregs,
6390 &x86_64_int_parameter_registers [cum->regno],
6395 function_arg_ms_64 (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
6396 enum machine_mode orig_mode, bool named,
6397 HOST_WIDE_INT bytes)
6401 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
6402 We use value of -2 to specify that current function call is MSABI. */
6403 if (mode == VOIDmode)
6404 return GEN_INT (-2);
6406 /* If we've run out of registers, it goes on the stack. */
6407 if (cum->nregs == 0)
6410 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
6412 /* Only floating point modes are passed in anything but integer regs. */
6413 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
6416 regno = cum->regno + FIRST_SSE_REG;
6421 /* Unnamed floating parameters are passed in both the
6422 SSE and integer registers. */
6423 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
6424 t2 = gen_rtx_REG (mode, regno);
6425 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
6426 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
6427 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
6430 /* Handle aggregated types passed in register. */
6431 if (orig_mode == BLKmode)
6433 if (bytes > 0 && bytes <= 8)
6434 mode = (bytes > 4 ? DImode : SImode);
6435 if (mode == BLKmode)
6439 return gen_reg_or_parallel (mode, orig_mode, regno);
6442 /* Return where to put the arguments to a function.
6443 Return zero to push the argument on the stack, or a hard register in which to store the argument.
6445 MODE is the argument's machine mode. TYPE is the data type of the
6446 argument. It is null for libcalls where that information may not be
6447 available. CUM gives information about the preceding args and about
6448 the function being called. NAMED is nonzero if this argument is a
6449 named parameter (otherwise it is an extra parameter matching an
6453 ix86_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode omode,
6454 const_tree type, bool named)
6456 enum machine_mode mode = omode;
6457 HOST_WIDE_INT bytes, words;
6459 if (mode == BLKmode)
6460 bytes = int_size_in_bytes (type);
6462 bytes = GET_MODE_SIZE (mode);
6463 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6465 /* To simplify the code below, represent vector types with a vector mode
6466 even if MMX/SSE are not active. */
6467 if (type && TREE_CODE (type) == VECTOR_TYPE)
6468 mode = type_natural_mode (type, cum);
6470 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6471 return function_arg_ms_64 (cum, mode, omode, named, bytes);
6472 else if (TARGET_64BIT)
6473 return function_arg_64 (cum, mode, omode, type, named);
6475 return function_arg_32 (cum, mode, omode, type, bytes, words);
6478 /* A C expression that indicates when an argument must be passed by
6479 reference. If nonzero for an argument, a copy of that argument is
6480 made in memory and a pointer to the argument is passed instead of
6481 the argument itself. The pointer is passed in whatever way is
6482 appropriate for passing a pointer to that type. */
6485 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
6486 enum machine_mode mode ATTRIBUTE_UNUSED,
6487 const_tree type, bool named ATTRIBUTE_UNUSED)
6489 /* See Windows x64 Software Convention. */
6490 if (TARGET_64BIT && (cum ? cum->call_abi : ix86_abi) == MS_ABI)
6492 int msize = (int) GET_MODE_SIZE (mode);
6495 /* Arrays are passed by reference. */
6496 if (TREE_CODE (type) == ARRAY_TYPE)
6499 if (AGGREGATE_TYPE_P (type))
6501 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
6502 are passed by reference. */
6503 msize = int_size_in_bytes (type);
6507 /* __m128 is passed by reference. */
6509 case 1: case 2: case 4: case 8:
6515 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6521 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6524 contains_aligned_value_p (const_tree type)
6526 enum machine_mode mode = TYPE_MODE (type);
6527 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6531 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6533 if (TYPE_ALIGN (type) < 128)
6536 if (AGGREGATE_TYPE_P (type))
6538 /* Walk the aggregates recursively. */
6539 switch (TREE_CODE (type))
6543 case QUAL_UNION_TYPE:
6547 /* Walk all the structure fields. */
6548 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6550 if (TREE_CODE (field) == FIELD_DECL
6551 && contains_aligned_value_p (TREE_TYPE (field)))
6558 /* Just for use if some languages passes arrays by value. */
6559 if (contains_aligned_value_p (TREE_TYPE (type)))
6570 /* Gives the alignment boundary, in bits, of an argument with the
6571 specified mode and type. */
6574 ix86_function_arg_boundary (enum machine_mode mode, const_tree type)
6579 /* Since the main variant type is used for call, we convert it to
6580 the main variant type. */
6581 type = TYPE_MAIN_VARIANT (type);
6582 align = TYPE_ALIGN (type);
6585 align = GET_MODE_ALIGNMENT (mode);
6586 if (align < PARM_BOUNDARY)
6587 align = PARM_BOUNDARY;
6588 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6589 natural boundaries. */
6590 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6592 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6593 make an exception for SSE modes since these require 128bit
6596 The handling here differs from field_alignment. ICC aligns MMX
6597 arguments to 4 byte boundaries, while structure fields are aligned
6598 to 8 byte boundaries. */
6601 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6602 align = PARM_BOUNDARY;
6606 if (!contains_aligned_value_p (type))
6607 align = PARM_BOUNDARY;
6610 if (align > BIGGEST_ALIGNMENT)
6611 align = BIGGEST_ALIGNMENT;
6615 /* Return true if N is a possible register number of function value. */
6618 ix86_function_value_regno_p (const unsigned int regno)
6625 case FIRST_FLOAT_REG:
6626 /* TODO: The function should depend on current function ABI but
6627 builtins.c would need updating then. Therefore we use the
6629 if (TARGET_64BIT && ix86_abi == MS_ABI)
6631 return TARGET_FLOAT_RETURNS_IN_80387;
6637 if (TARGET_MACHO || TARGET_64BIT)
6645 /* Define how to find the value returned by a function.
6646 VALTYPE is the data type of the value (as a tree).
6647 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6648 otherwise, FUNC is 0. */
6651 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6652 const_tree fntype, const_tree fn)
6656 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6657 we normally prevent this case when mmx is not available. However
6658 some ABIs may require the result to be returned like DImode. */
6659 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6660 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6662 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6663 we prevent this case when sse is not available. However some ABIs
6664 may require the result to be returned like integer TImode. */
6665 else if (mode == TImode
6666 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6667 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6669 /* 32-byte vector modes in %ymm0. */
6670 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6671 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6673 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6674 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6675 regno = FIRST_FLOAT_REG;
6677 /* Most things go in %eax. */
6680 /* Override FP return register with %xmm0 for local functions when
6681 SSE math is enabled or for functions with sseregparm attribute. */
6682 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6684 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6685 if ((sse_level >= 1 && mode == SFmode)
6686 || (sse_level == 2 && mode == DFmode))
6687 regno = FIRST_SSE_REG;
6690 /* OImode shouldn't be used directly. */
6691 gcc_assert (mode != OImode);
6693 return gen_rtx_REG (orig_mode, regno);
6697 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6702 /* Handle libcalls, which don't provide a type node. */
6703 if (valtype == NULL)
6715 return gen_rtx_REG (mode, FIRST_SSE_REG);
6718 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6722 return gen_rtx_REG (mode, AX_REG);
6726 ret = construct_container (mode, orig_mode, valtype, 1,
6727 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6728 x86_64_int_return_registers, 0);
6730 /* For zero sized structures, construct_container returns NULL, but we
6731 need to keep rest of compiler happy by returning meaningful value. */
6733 ret = gen_rtx_REG (orig_mode, AX_REG);
6739 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6741 unsigned int regno = AX_REG;
6745 switch (GET_MODE_SIZE (mode))
6748 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6749 && !COMPLEX_MODE_P (mode))
6750 regno = FIRST_SSE_REG;
6754 if (mode == SFmode || mode == DFmode)
6755 regno = FIRST_SSE_REG;
6761 return gen_rtx_REG (orig_mode, regno);
6765 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6766 enum machine_mode orig_mode, enum machine_mode mode)
6768 const_tree fn, fntype;
6771 if (fntype_or_decl && DECL_P (fntype_or_decl))
6772 fn = fntype_or_decl;
6773 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6775 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6776 return function_value_ms_64 (orig_mode, mode);
6777 else if (TARGET_64BIT)
6778 return function_value_64 (orig_mode, mode, valtype);
6780 return function_value_32 (orig_mode, mode, fntype, fn);
6784 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6785 bool outgoing ATTRIBUTE_UNUSED)
6787 enum machine_mode mode, orig_mode;
6789 orig_mode = TYPE_MODE (valtype);
6790 mode = type_natural_mode (valtype, NULL);
6791 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6795 ix86_libcall_value (enum machine_mode mode)
6797 return ix86_function_value_1 (NULL, NULL, mode, mode);
6800 /* Return true iff type is returned in memory. */
6802 static int ATTRIBUTE_UNUSED
6803 return_in_memory_32 (const_tree type, enum machine_mode mode)
6807 if (mode == BLKmode)
6810 size = int_size_in_bytes (type);
6812 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6815 if (VECTOR_MODE_P (mode) || mode == TImode)
6817 /* User-created vectors small enough to fit in EAX. */
6821 /* MMX/3dNow values are returned in MM0,
6822 except when it doesn't exits. */
6824 return (TARGET_MMX ? 0 : 1);
6826 /* SSE values are returned in XMM0, except when it doesn't exist. */
6828 return (TARGET_SSE ? 0 : 1);
6830 /* AVX values are returned in YMM0, except when it doesn't exist. */
6832 return TARGET_AVX ? 0 : 1;
6841 /* OImode shouldn't be used directly. */
6842 gcc_assert (mode != OImode);
6847 static int ATTRIBUTE_UNUSED
6848 return_in_memory_64 (const_tree type, enum machine_mode mode)
6850 int needed_intregs, needed_sseregs;
6851 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6854 static int ATTRIBUTE_UNUSED
6855 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6857 HOST_WIDE_INT size = int_size_in_bytes (type);
6859 /* __m128 is returned in xmm0. */
6860 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6861 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6864 /* Otherwise, the size must be exactly in [1248]. */
6865 return (size != 1 && size != 2 && size != 4 && size != 8);
6869 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6871 #ifdef SUBTARGET_RETURN_IN_MEMORY
6872 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6874 const enum machine_mode mode = type_natural_mode (type, NULL);
6878 if (ix86_function_type_abi (fntype) == MS_ABI)
6879 return return_in_memory_ms_64 (type, mode);
6881 return return_in_memory_64 (type, mode);
6884 return return_in_memory_32 (type, mode);
6888 /* Return false iff TYPE is returned in memory. This version is used
6889 on Solaris 2. It is similar to the generic ix86_return_in_memory,
6890 but differs notably in that when MMX is available, 8-byte vectors
6891 are returned in memory, rather than in MMX registers. */
6894 ix86_solaris_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6897 enum machine_mode mode = type_natural_mode (type, NULL);
6900 return return_in_memory_64 (type, mode);
6902 if (mode == BLKmode)
6905 size = int_size_in_bytes (type);
6907 if (VECTOR_MODE_P (mode))
6909 /* Return in memory only if MMX registers *are* available. This
6910 seems backwards, but it is consistent with the existing
6917 else if (mode == TImode)
6919 else if (mode == XFmode)
6925 /* When returning SSE vector types, we have a choice of either
6926 (1) being abi incompatible with a -march switch, or
6927 (2) generating an error.
6928 Given no good solution, I think the safest thing is one warning.
6929 The user won't be able to use -Werror, but....
6931 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6932 called in response to actually generating a caller or callee that
6933 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6934 via aggregate_value_p for general type probing from tree-ssa. */
6937 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6939 static bool warnedsse, warnedmmx;
6941 if (!TARGET_64BIT && type)
6943 /* Look at the return type of the function, not the function type. */
6944 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6946 if (!TARGET_SSE && !warnedsse)
6949 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6952 warning (0, "SSE vector return without SSE enabled "
6957 if (!TARGET_MMX && !warnedmmx)
6959 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6962 warning (0, "MMX vector return without MMX enabled "
6972 /* Create the va_list data type. */
6974 /* Returns the calling convention specific va_list date type.
6975 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6978 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6980 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6982 /* For i386 we use plain pointer to argument area. */
6983 if (!TARGET_64BIT || abi == MS_ABI)
6984 return build_pointer_type (char_type_node);
6986 record = lang_hooks.types.make_type (RECORD_TYPE);
6987 type_decl = build_decl (BUILTINS_LOCATION,
6988 TYPE_DECL, get_identifier ("__va_list_tag"), record);
6990 f_gpr = build_decl (BUILTINS_LOCATION,
6991 FIELD_DECL, get_identifier ("gp_offset"),
6992 unsigned_type_node);
6993 f_fpr = build_decl (BUILTINS_LOCATION,
6994 FIELD_DECL, get_identifier ("fp_offset"),
6995 unsigned_type_node);
6996 f_ovf = build_decl (BUILTINS_LOCATION,
6997 FIELD_DECL, get_identifier ("overflow_arg_area"),
6999 f_sav = build_decl (BUILTINS_LOCATION,
7000 FIELD_DECL, get_identifier ("reg_save_area"),
7003 va_list_gpr_counter_field = f_gpr;
7004 va_list_fpr_counter_field = f_fpr;
7006 DECL_FIELD_CONTEXT (f_gpr) = record;
7007 DECL_FIELD_CONTEXT (f_fpr) = record;
7008 DECL_FIELD_CONTEXT (f_ovf) = record;
7009 DECL_FIELD_CONTEXT (f_sav) = record;
7011 TREE_CHAIN (record) = type_decl;
7012 TYPE_NAME (record) = type_decl;
7013 TYPE_FIELDS (record) = f_gpr;
7014 DECL_CHAIN (f_gpr) = f_fpr;
7015 DECL_CHAIN (f_fpr) = f_ovf;
7016 DECL_CHAIN (f_ovf) = f_sav;
7018 layout_type (record);
7020 /* The correct type is an array type of one element. */
7021 return build_array_type (record, build_index_type (size_zero_node));
7024 /* Setup the builtin va_list data type and for 64-bit the additional
7025 calling convention specific va_list data types. */
7028 ix86_build_builtin_va_list (void)
7030 tree ret = ix86_build_builtin_va_list_abi (ix86_abi);
7032 /* Initialize abi specific va_list builtin types. */
7036 if (ix86_abi == MS_ABI)
7038 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
7039 if (TREE_CODE (t) != RECORD_TYPE)
7040 t = build_variant_type_copy (t);
7041 sysv_va_list_type_node = t;
7046 if (TREE_CODE (t) != RECORD_TYPE)
7047 t = build_variant_type_copy (t);
7048 sysv_va_list_type_node = t;
7050 if (ix86_abi != MS_ABI)
7052 t = ix86_build_builtin_va_list_abi (MS_ABI);
7053 if (TREE_CODE (t) != RECORD_TYPE)
7054 t = build_variant_type_copy (t);
7055 ms_va_list_type_node = t;
7060 if (TREE_CODE (t) != RECORD_TYPE)
7061 t = build_variant_type_copy (t);
7062 ms_va_list_type_node = t;
7069 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
7072 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
7078 /* GPR size of varargs save area. */
7079 if (cfun->va_list_gpr_size)
7080 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
7082 ix86_varargs_gpr_size = 0;
7084 /* FPR size of varargs save area. We don't need it if we don't pass
7085 anything in SSE registers. */
7086 if (TARGET_SSE && cfun->va_list_fpr_size)
7087 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
7089 ix86_varargs_fpr_size = 0;
7091 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
7094 save_area = frame_pointer_rtx;
7095 set = get_varargs_alias_set ();
7097 max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
7098 if (max > X86_64_REGPARM_MAX)
7099 max = X86_64_REGPARM_MAX;
7101 for (i = cum->regno; i < max; i++)
7103 mem = gen_rtx_MEM (Pmode,
7104 plus_constant (save_area, i * UNITS_PER_WORD));
7105 MEM_NOTRAP_P (mem) = 1;
7106 set_mem_alias_set (mem, set);
7107 emit_move_insn (mem, gen_rtx_REG (Pmode,
7108 x86_64_int_parameter_registers[i]));
7111 if (ix86_varargs_fpr_size)
7113 enum machine_mode smode;
7116 /* Now emit code to save SSE registers. The AX parameter contains number
7117 of SSE parameter registers used to call this function, though all we
7118 actually check here is the zero/non-zero status. */
7120 label = gen_label_rtx ();
7121 test = gen_rtx_EQ (VOIDmode, gen_rtx_REG (QImode, AX_REG), const0_rtx);
7122 emit_jump_insn (gen_cbranchqi4 (test, XEXP (test, 0), XEXP (test, 1),
7125 /* ??? If !TARGET_SSE_TYPELESS_STORES, would we perform better if
7126 we used movdqa (i.e. TImode) instead? Perhaps even better would
7127 be if we could determine the real mode of the data, via a hook
7128 into pass_stdarg. Ignore all that for now. */
7130 if (crtl->stack_alignment_needed < GET_MODE_ALIGNMENT (smode))
7131 crtl->stack_alignment_needed = GET_MODE_ALIGNMENT (smode);
7133 max = cum->sse_regno + cfun->va_list_fpr_size / 16;
7134 if (max > X86_64_SSE_REGPARM_MAX)
7135 max = X86_64_SSE_REGPARM_MAX;
7137 for (i = cum->sse_regno; i < max; ++i)
7139 mem = plus_constant (save_area, i * 16 + ix86_varargs_gpr_size);
7140 mem = gen_rtx_MEM (smode, mem);
7141 MEM_NOTRAP_P (mem) = 1;
7142 set_mem_alias_set (mem, set);
7143 set_mem_align (mem, GET_MODE_ALIGNMENT (smode));
7145 emit_move_insn (mem, gen_rtx_REG (smode, SSE_REGNO (i)));
7153 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
7155 alias_set_type set = get_varargs_alias_set ();
7158 for (i = cum->regno; i < X86_64_MS_REGPARM_MAX; i++)
7162 mem = gen_rtx_MEM (Pmode,
7163 plus_constant (virtual_incoming_args_rtx,
7164 i * UNITS_PER_WORD));
7165 MEM_NOTRAP_P (mem) = 1;
7166 set_mem_alias_set (mem, set);
7168 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
7169 emit_move_insn (mem, reg);
7174 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7175 tree type, int *pretend_size ATTRIBUTE_UNUSED,
7178 CUMULATIVE_ARGS next_cum;
7181 /* This argument doesn't appear to be used anymore. Which is good,
7182 because the old code here didn't suppress rtl generation. */
7183 gcc_assert (!no_rtl);
7188 fntype = TREE_TYPE (current_function_decl);
7190 /* For varargs, we do not want to skip the dummy va_dcl argument.
7191 For stdargs, we do want to skip the last named argument. */
7193 if (stdarg_p (fntype))
7194 ix86_function_arg_advance (&next_cum, mode, type, true);
7196 if (cum->call_abi == MS_ABI)
7197 setup_incoming_varargs_ms_64 (&next_cum);
7199 setup_incoming_varargs_64 (&next_cum);
7202 /* Checks if TYPE is of kind va_list char *. */
7205 is_va_list_char_pointer (tree type)
7209 /* For 32-bit it is always true. */
7212 canonic = ix86_canonical_va_list_type (type);
7213 return (canonic == ms_va_list_type_node
7214 || (ix86_abi == MS_ABI && canonic == va_list_type_node));
7217 /* Implement va_start. */
7220 ix86_va_start (tree valist, rtx nextarg)
7222 HOST_WIDE_INT words, n_gpr, n_fpr;
7223 tree f_gpr, f_fpr, f_ovf, f_sav;
7224 tree gpr, fpr, ovf, sav, t;
7227 /* Only 64bit target needs something special. */
7228 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7230 std_expand_builtin_va_start (valist, nextarg);
7234 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7235 f_fpr = DECL_CHAIN (f_gpr);
7236 f_ovf = DECL_CHAIN (f_fpr);
7237 f_sav = DECL_CHAIN (f_ovf);
7239 valist = build_simple_mem_ref (valist);
7240 TREE_TYPE (valist) = TREE_TYPE (sysv_va_list_type_node);
7241 /* The following should be folded into the MEM_REF offset. */
7242 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), unshare_expr (valist),
7244 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
7246 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
7248 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
7251 /* Count number of gp and fp argument registers used. */
7252 words = crtl->args.info.words;
7253 n_gpr = crtl->args.info.regno;
7254 n_fpr = crtl->args.info.sse_regno;
7256 if (cfun->va_list_gpr_size)
7258 type = TREE_TYPE (gpr);
7259 t = build2 (MODIFY_EXPR, type,
7260 gpr, build_int_cst (type, n_gpr * 8));
7261 TREE_SIDE_EFFECTS (t) = 1;
7262 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7265 if (TARGET_SSE && cfun->va_list_fpr_size)
7267 type = TREE_TYPE (fpr);
7268 t = build2 (MODIFY_EXPR, type, fpr,
7269 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
7270 TREE_SIDE_EFFECTS (t) = 1;
7271 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7274 /* Find the overflow area. */
7275 type = TREE_TYPE (ovf);
7276 t = make_tree (type, crtl->args.internal_arg_pointer);
7278 t = build2 (POINTER_PLUS_EXPR, type, t,
7279 size_int (words * UNITS_PER_WORD));
7280 t = build2 (MODIFY_EXPR, type, ovf, t);
7281 TREE_SIDE_EFFECTS (t) = 1;
7282 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7284 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
7286 /* Find the register save area.
7287 Prologue of the function save it right above stack frame. */
7288 type = TREE_TYPE (sav);
7289 t = make_tree (type, frame_pointer_rtx);
7290 if (!ix86_varargs_gpr_size)
7291 t = build2 (POINTER_PLUS_EXPR, type, t,
7292 size_int (-8 * X86_64_REGPARM_MAX));
7293 t = build2 (MODIFY_EXPR, type, sav, t);
7294 TREE_SIDE_EFFECTS (t) = 1;
7295 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7299 /* Implement va_arg. */
7302 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7305 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
7306 tree f_gpr, f_fpr, f_ovf, f_sav;
7307 tree gpr, fpr, ovf, sav, t;
7309 tree lab_false, lab_over = NULL_TREE;
7314 enum machine_mode nat_mode;
7315 unsigned int arg_boundary;
7317 /* Only 64bit target needs something special. */
7318 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
7319 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
7321 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
7322 f_fpr = DECL_CHAIN (f_gpr);
7323 f_ovf = DECL_CHAIN (f_fpr);
7324 f_sav = DECL_CHAIN (f_ovf);
7326 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
7327 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
7328 valist = build_va_arg_indirect_ref (valist);
7329 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
7330 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
7331 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
7333 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
7335 type = build_pointer_type (type);
7336 size = int_size_in_bytes (type);
7337 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7339 nat_mode = type_natural_mode (type, NULL);
7348 /* Unnamed 256bit vector mode parameters are passed on stack. */
7349 if (ix86_cfun_abi () == SYSV_ABI)
7356 container = construct_container (nat_mode, TYPE_MODE (type),
7357 type, 0, X86_64_REGPARM_MAX,
7358 X86_64_SSE_REGPARM_MAX, intreg,
7363 /* Pull the value out of the saved registers. */
7365 addr = create_tmp_var (ptr_type_node, "addr");
7369 int needed_intregs, needed_sseregs;
7371 tree int_addr, sse_addr;
7373 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7374 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7376 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
7378 need_temp = (!REG_P (container)
7379 && ((needed_intregs && TYPE_ALIGN (type) > 64)
7380 || TYPE_ALIGN (type) > 128));
7382 /* In case we are passing structure, verify that it is consecutive block
7383 on the register save area. If not we need to do moves. */
7384 if (!need_temp && !REG_P (container))
7386 /* Verify that all registers are strictly consecutive */
7387 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
7391 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7393 rtx slot = XVECEXP (container, 0, i);
7394 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
7395 || INTVAL (XEXP (slot, 1)) != i * 16)
7403 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
7405 rtx slot = XVECEXP (container, 0, i);
7406 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
7407 || INTVAL (XEXP (slot, 1)) != i * 8)
7419 int_addr = create_tmp_var (ptr_type_node, "int_addr");
7420 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
7423 /* First ensure that we fit completely in registers. */
7426 t = build_int_cst (TREE_TYPE (gpr),
7427 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
7428 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
7429 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7430 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7431 gimplify_and_add (t, pre_p);
7435 t = build_int_cst (TREE_TYPE (fpr),
7436 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
7437 + X86_64_REGPARM_MAX * 8);
7438 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
7439 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
7440 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
7441 gimplify_and_add (t, pre_p);
7444 /* Compute index to start of area used for integer regs. */
7447 /* int_addr = gpr + sav; */
7448 t = fold_convert (sizetype, gpr);
7449 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7450 gimplify_assign (int_addr, t, pre_p);
7454 /* sse_addr = fpr + sav; */
7455 t = fold_convert (sizetype, fpr);
7456 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
7457 gimplify_assign (sse_addr, t, pre_p);
7461 int i, prev_size = 0;
7462 tree temp = create_tmp_var (type, "va_arg_tmp");
7465 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
7466 gimplify_assign (addr, t, pre_p);
7468 for (i = 0; i < XVECLEN (container, 0); i++)
7470 rtx slot = XVECEXP (container, 0, i);
7471 rtx reg = XEXP (slot, 0);
7472 enum machine_mode mode = GET_MODE (reg);
7478 tree dest_addr, dest;
7479 int cur_size = GET_MODE_SIZE (mode);
7481 if (prev_size + cur_size > size)
7483 cur_size = size - prev_size;
7484 mode = mode_for_size (cur_size * BITS_PER_UNIT, MODE_INT, 1);
7485 if (mode == BLKmode)
7488 piece_type = lang_hooks.types.type_for_mode (mode, 1);
7489 if (mode == GET_MODE (reg))
7490 addr_type = build_pointer_type (piece_type);
7492 addr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7494 daddr_type = build_pointer_type_for_mode (piece_type, ptr_mode,
7497 if (SSE_REGNO_P (REGNO (reg)))
7499 src_addr = sse_addr;
7500 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
7504 src_addr = int_addr;
7505 src_offset = REGNO (reg) * 8;
7507 src_addr = fold_convert (addr_type, src_addr);
7508 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7509 size_int (src_offset));
7511 dest_addr = fold_convert (daddr_type, addr);
7512 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7513 size_int (INTVAL (XEXP (slot, 1))));
7514 if (cur_size == GET_MODE_SIZE (mode))
7516 src = build_va_arg_indirect_ref (src_addr);
7517 dest = build_va_arg_indirect_ref (dest_addr);
7519 gimplify_assign (dest, src, pre_p);
7524 = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
7525 3, dest_addr, src_addr,
7526 size_int (cur_size));
7527 gimplify_and_add (copy, pre_p);
7529 prev_size += cur_size;
7535 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7536 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7537 gimplify_assign (gpr, t, pre_p);
7542 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7543 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7544 gimplify_assign (fpr, t, pre_p);
7547 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7549 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7552 /* ... otherwise out of the overflow area. */
7554 /* When we align parameter on stack for caller, if the parameter
7555 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7556 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7557 here with caller. */
7558 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7559 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7560 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7562 /* Care for on-stack alignment if needed. */
7563 if (arg_boundary <= 64 || size == 0)
7567 HOST_WIDE_INT align = arg_boundary / 8;
7568 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7569 size_int (align - 1));
7570 t = fold_convert (sizetype, t);
7571 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7573 t = fold_convert (TREE_TYPE (ovf), t);
7576 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7577 gimplify_assign (addr, t, pre_p);
7579 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7580 size_int (rsize * UNITS_PER_WORD));
7581 gimplify_assign (unshare_expr (ovf), t, pre_p);
7584 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7586 ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
7587 addr = fold_convert (ptrtype, addr);
7590 addr = build_va_arg_indirect_ref (addr);
7591 return build_va_arg_indirect_ref (addr);
7594 /* Return nonzero if OPNUM's MEM should be matched
7595 in movabs* patterns. */
7598 ix86_check_movabs (rtx insn, int opnum)
7602 set = PATTERN (insn);
7603 if (GET_CODE (set) == PARALLEL)
7604 set = XVECEXP (set, 0, 0);
7605 gcc_assert (GET_CODE (set) == SET);
7606 mem = XEXP (set, opnum);
7607 while (GET_CODE (mem) == SUBREG)
7608 mem = SUBREG_REG (mem);
7609 gcc_assert (MEM_P (mem));
7610 return (volatile_ok || !MEM_VOLATILE_P (mem));
7613 /* Initialize the table of extra 80387 mathematical constants. */
7616 init_ext_80387_constants (void)
7618 static const char * cst[5] =
7620 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7621 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7622 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7623 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7624 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7628 for (i = 0; i < 5; i++)
7630 real_from_string (&ext_80387_constants_table[i], cst[i]);
7631 /* Ensure each constant is rounded to XFmode precision. */
7632 real_convert (&ext_80387_constants_table[i],
7633 XFmode, &ext_80387_constants_table[i]);
7636 ext_80387_constants_init = 1;
7639 /* Return true if the constant is something that can be loaded with
7640 a special instruction. */
7643 standard_80387_constant_p (rtx x)
7645 enum machine_mode mode = GET_MODE (x);
7649 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7652 if (x == CONST0_RTX (mode))
7654 if (x == CONST1_RTX (mode))
7657 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7659 /* For XFmode constants, try to find a special 80387 instruction when
7660 optimizing for size or on those CPUs that benefit from them. */
7662 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7666 if (! ext_80387_constants_init)
7667 init_ext_80387_constants ();
7669 for (i = 0; i < 5; i++)
7670 if (real_identical (&r, &ext_80387_constants_table[i]))
7674 /* Load of the constant -0.0 or -1.0 will be split as
7675 fldz;fchs or fld1;fchs sequence. */
7676 if (real_isnegzero (&r))
7678 if (real_identical (&r, &dconstm1))
7684 /* Return the opcode of the special instruction to be used to load
7688 standard_80387_constant_opcode (rtx x)
7690 switch (standard_80387_constant_p (x))
7714 /* Return the CONST_DOUBLE representing the 80387 constant that is
7715 loaded by the specified special instruction. The argument IDX
7716 matches the return value from standard_80387_constant_p. */
7719 standard_80387_constant_rtx (int idx)
7723 if (! ext_80387_constants_init)
7724 init_ext_80387_constants ();
7740 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7744 /* Return 1 if X is all 0s and 2 if x is all 1s
7745 in supported SSE vector mode. */
7748 standard_sse_constant_p (rtx x)
7750 enum machine_mode mode = GET_MODE (x);
7752 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7754 if (vector_all_ones_operand (x, mode))
7770 /* Return the opcode of the special instruction to be used to load
7774 standard_sse_constant_opcode (rtx insn, rtx x)
7776 switch (standard_sse_constant_p (x))
7779 switch (get_attr_mode (insn))
7782 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7784 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7785 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7787 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7789 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7790 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7792 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7794 return "vxorps\t%x0, %x0, %x0";
7796 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7797 return "vxorps\t%x0, %x0, %x0";
7799 return "vxorpd\t%x0, %x0, %x0";
7801 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
7802 return "vxorps\t%x0, %x0, %x0";
7804 return "vpxor\t%x0, %x0, %x0";
7809 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0";
7816 /* Returns 1 if OP contains a symbol reference */
7819 symbolic_reference_mentioned_p (rtx op)
7824 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7827 fmt = GET_RTX_FORMAT (GET_CODE (op));
7828 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7834 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7835 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7839 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7846 /* Return 1 if it is appropriate to emit `ret' instructions in the
7847 body of a function. Do this only if the epilogue is simple, needing a
7848 couple of insns. Prior to reloading, we can't tell how many registers
7849 must be saved, so return 0 then. Return 0 if there is no frame
7850 marker to de-allocate. */
7853 ix86_can_use_return_insn_p (void)
7855 struct ix86_frame frame;
7857 if (! reload_completed || frame_pointer_needed)
7860 /* Don't allow more than 32 pop, since that's all we can do
7861 with one instruction. */
7862 if (crtl->args.pops_args
7863 && crtl->args.size >= 32768)
7866 ix86_compute_frame_layout (&frame);
7867 return frame.to_allocate == 0 && frame.padding0 == 0
7868 && (frame.nregs + frame.nsseregs) == 0;
7871 /* Value should be nonzero if functions must have frame pointers.
7872 Zero means the frame pointer need not be set up (and parms may
7873 be accessed via the stack pointer) in functions that seem suitable. */
7876 ix86_frame_pointer_required (void)
7878 /* If we accessed previous frames, then the generated code expects
7879 to be able to access the saved ebp value in our frame. */
7880 if (cfun->machine->accesses_prev_frame)
7883 /* Several x86 os'es need a frame pointer for other reasons,
7884 usually pertaining to setjmp. */
7885 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7888 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7889 the frame pointer by default. Turn it back on now if we've not
7890 got a leaf function. */
7891 if (TARGET_OMIT_LEAF_FRAME_POINTER
7892 && (!current_function_is_leaf
7893 || ix86_current_function_calls_tls_descriptor))
7896 if (crtl->profile && !flag_fentry)
7902 /* Record that the current function accesses previous call frames. */
7905 ix86_setup_frame_addresses (void)
7907 cfun->machine->accesses_prev_frame = 1;
7910 #ifndef USE_HIDDEN_LINKONCE
7911 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7912 # define USE_HIDDEN_LINKONCE 1
7914 # define USE_HIDDEN_LINKONCE 0
7918 static int pic_labels_used;
7920 /* Fills in the label name that should be used for a pc thunk for
7921 the given register. */
7924 get_pc_thunk_name (char name[32], unsigned int regno)
7926 gcc_assert (!TARGET_64BIT);
7928 if (USE_HIDDEN_LINKONCE)
7929 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7931 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7935 /* This function generates code for -fpic that loads %ebx with
7936 the return address of the caller and then returns. */
7939 ix86_code_end (void)
7944 for (regno = 0; regno < 8; ++regno)
7949 if (! ((pic_labels_used >> regno) & 1))
7952 get_pc_thunk_name (name, regno);
7954 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
7955 get_identifier (name),
7956 build_function_type (void_type_node, void_list_node));
7957 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
7958 NULL_TREE, void_type_node);
7959 TREE_PUBLIC (decl) = 1;
7960 TREE_STATIC (decl) = 1;
7965 switch_to_section (darwin_sections[text_coal_section]);
7966 fputs ("\t.weak_definition\t", asm_out_file);
7967 assemble_name (asm_out_file, name);
7968 fputs ("\n\t.private_extern\t", asm_out_file);
7969 assemble_name (asm_out_file, name);
7970 putc ('\n', asm_out_file);
7971 ASM_OUTPUT_LABEL (asm_out_file, name);
7972 DECL_WEAK (decl) = 1;
7976 if (USE_HIDDEN_LINKONCE)
7978 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
7980 targetm.asm_out.unique_section (decl, 0);
7981 switch_to_section (get_named_section (decl, NULL, 0));
7983 targetm.asm_out.globalize_label (asm_out_file, name);
7984 fputs ("\t.hidden\t", asm_out_file);
7985 assemble_name (asm_out_file, name);
7986 putc ('\n', asm_out_file);
7987 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7991 switch_to_section (text_section);
7992 ASM_OUTPUT_LABEL (asm_out_file, name);
7995 DECL_INITIAL (decl) = make_node (BLOCK);
7996 current_function_decl = decl;
7997 init_function_start (decl);
7998 first_function_block_is_cold = false;
7999 /* Make sure unwind info is emitted for the thunk if needed. */
8000 final_start_function (emit_barrier (), asm_out_file, 1);
8002 xops[0] = gen_rtx_REG (Pmode, regno);
8003 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8004 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
8005 output_asm_insn ("ret", xops);
8006 final_end_function ();
8007 init_insn_lengths ();
8008 free_after_compilation (cfun);
8010 current_function_decl = NULL;
8014 /* Emit code for the SET_GOT patterns. */
8017 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
8023 if (TARGET_VXWORKS_RTP && flag_pic)
8025 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
8026 xops[2] = gen_rtx_MEM (Pmode,
8027 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
8028 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
8030 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
8031 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
8032 an unadorned address. */
8033 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
8034 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
8035 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
8039 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
8041 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
8043 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
8046 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
8049 output_asm_insn ("call\t%a2", xops);
8050 #ifdef DWARF2_UNWIND_INFO
8051 /* The call to next label acts as a push. */
8052 if (dwarf2out_do_frame ())
8056 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8057 gen_rtx_PLUS (Pmode,
8060 RTX_FRAME_RELATED_P (insn) = 1;
8061 dwarf2out_frame_debug (insn, true);
8068 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8069 is what will be referenced by the Mach-O PIC subsystem. */
8071 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8074 targetm.asm_out.internal_label (asm_out_file, "L",
8075 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
8079 output_asm_insn ("pop%z0\t%0", xops);
8080 #ifdef DWARF2_UNWIND_INFO
8081 /* The pop is a pop and clobbers dest, but doesn't restore it
8082 for unwind info purposes. */
8083 if (dwarf2out_do_frame ())
8087 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
8088 dwarf2out_frame_debug (insn, true);
8089 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8090 gen_rtx_PLUS (Pmode,
8093 RTX_FRAME_RELATED_P (insn) = 1;
8094 dwarf2out_frame_debug (insn, true);
8103 get_pc_thunk_name (name, REGNO (dest));
8104 pic_labels_used |= 1 << REGNO (dest);
8106 #ifdef DWARF2_UNWIND_INFO
8107 /* Ensure all queued register saves are flushed before the
8109 if (dwarf2out_do_frame ())
8113 insn = emit_barrier ();
8115 dwarf2out_frame_debug (insn, false);
8118 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
8119 xops[2] = gen_rtx_MEM (QImode, xops[2]);
8120 output_asm_insn ("call\t%X2", xops);
8121 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
8122 is what will be referenced by the Mach-O PIC subsystem. */
8125 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
8127 targetm.asm_out.internal_label (asm_out_file, "L",
8128 CODE_LABEL_NUMBER (label));
8135 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
8136 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
8138 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
8143 /* Generate an "push" pattern for input ARG. */
8148 if (ix86_cfa_state->reg == stack_pointer_rtx)
8149 ix86_cfa_state->offset += UNITS_PER_WORD;
8151 return gen_rtx_SET (VOIDmode,
8153 gen_rtx_PRE_DEC (Pmode,
8154 stack_pointer_rtx)),
8158 /* Return >= 0 if there is an unused call-clobbered register available
8159 for the entire function. */
8162 ix86_select_alt_pic_regnum (void)
8164 if (current_function_is_leaf
8166 && !ix86_current_function_calls_tls_descriptor)
8169 /* Can't use the same register for both PIC and DRAP. */
8171 drap = REGNO (crtl->drap_reg);
8174 for (i = 2; i >= 0; --i)
8175 if (i != drap && !df_regs_ever_live_p (i))
8179 return INVALID_REGNUM;
8182 /* Return 1 if we need to save REGNO. */
8184 ix86_save_reg (unsigned int regno, int maybe_eh_return)
8186 if (pic_offset_table_rtx
8187 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
8188 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8190 || crtl->calls_eh_return
8191 || crtl->uses_const_pool))
8193 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
8198 if (crtl->calls_eh_return && maybe_eh_return)
8203 unsigned test = EH_RETURN_DATA_REGNO (i);
8204 if (test == INVALID_REGNUM)
8211 if (crtl->drap_reg && regno == REGNO (crtl->drap_reg))
8214 return (df_regs_ever_live_p (regno)
8215 && !call_used_regs[regno]
8216 && !fixed_regs[regno]
8217 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
8220 /* Return number of saved general prupose registers. */
8223 ix86_nsaved_regs (void)
8228 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8229 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8234 /* Return number of saved SSE registrers. */
8237 ix86_nsaved_sseregs (void)
8242 if (ix86_cfun_abi () != MS_ABI)
8244 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8245 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8250 /* Given FROM and TO register numbers, say whether this elimination is
8251 allowed. If stack alignment is needed, we can only replace argument
8252 pointer with hard frame pointer, or replace frame pointer with stack
8253 pointer. Otherwise, frame pointer elimination is automatically
8254 handled and all other eliminations are valid. */
8257 ix86_can_eliminate (const int from, const int to)
8259 if (stack_realign_fp)
8260 return ((from == ARG_POINTER_REGNUM
8261 && to == HARD_FRAME_POINTER_REGNUM)
8262 || (from == FRAME_POINTER_REGNUM
8263 && to == STACK_POINTER_REGNUM));
8265 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : true;
8268 /* Return the offset between two registers, one to be eliminated, and the other
8269 its replacement, at the start of a routine. */
8272 ix86_initial_elimination_offset (int from, int to)
8274 struct ix86_frame frame;
8275 ix86_compute_frame_layout (&frame);
8277 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8278 return frame.hard_frame_pointer_offset;
8279 else if (from == FRAME_POINTER_REGNUM
8280 && to == HARD_FRAME_POINTER_REGNUM)
8281 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
8284 gcc_assert (to == STACK_POINTER_REGNUM);
8286 if (from == ARG_POINTER_REGNUM)
8287 return frame.stack_pointer_offset;
8289 gcc_assert (from == FRAME_POINTER_REGNUM);
8290 return frame.stack_pointer_offset - frame.frame_pointer_offset;
8294 /* In a dynamically-aligned function, we can't know the offset from
8295 stack pointer to frame pointer, so we must ensure that setjmp
8296 eliminates fp against the hard fp (%ebp) rather than trying to
8297 index from %esp up to the top of the frame across a gap that is
8298 of unknown (at compile-time) size. */
8300 ix86_builtin_setjmp_frame_value (void)
8302 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
8305 /* Fill structure ix86_frame about frame of currently computed function. */
8308 ix86_compute_frame_layout (struct ix86_frame *frame)
8310 unsigned int stack_alignment_needed;
8311 HOST_WIDE_INT offset;
8312 unsigned int preferred_alignment;
8313 HOST_WIDE_INT size = get_frame_size ();
8315 frame->nregs = ix86_nsaved_regs ();
8316 frame->nsseregs = ix86_nsaved_sseregs ();
8318 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
8319 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
8321 /* MS ABI seem to require stack alignment to be always 16 except for function
8322 prologues and leaf. */
8323 if ((ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
8324 && (!current_function_is_leaf || cfun->calls_alloca != 0
8325 || ix86_current_function_calls_tls_descriptor))
8327 preferred_alignment = 16;
8328 stack_alignment_needed = 16;
8329 crtl->preferred_stack_boundary = 128;
8330 crtl->stack_alignment_needed = 128;
8333 gcc_assert (!size || stack_alignment_needed);
8334 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
8335 gcc_assert (preferred_alignment <= stack_alignment_needed);
8337 /* During reload iteration the amount of registers saved can change.
8338 Recompute the value as needed. Do not recompute when amount of registers
8339 didn't change as reload does multiple calls to the function and does not
8340 expect the decision to change within single iteration. */
8341 if (!optimize_function_for_size_p (cfun)
8342 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
8344 int count = frame->nregs;
8345 struct cgraph_node *node = cgraph_node (current_function_decl);
8347 cfun->machine->use_fast_prologue_epilogue_nregs = count;
8348 /* The fast prologue uses move instead of push to save registers. This
8349 is significantly longer, but also executes faster as modern hardware
8350 can execute the moves in parallel, but can't do that for push/pop.
8352 Be careful about choosing what prologue to emit: When function takes
8353 many instructions to execute we may use slow version as well as in
8354 case function is known to be outside hot spot (this is known with
8355 feedback only). Weight the size of function by number of registers
8356 to save as it is cheap to use one or two push instructions but very
8357 slow to use many of them. */
8359 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
8360 if (node->frequency < NODE_FREQUENCY_NORMAL
8361 || (flag_branch_probabilities
8362 && node->frequency < NODE_FREQUENCY_HOT))
8363 cfun->machine->use_fast_prologue_epilogue = false;
8365 cfun->machine->use_fast_prologue_epilogue
8366 = !expensive_function_p (count);
8368 if (TARGET_PROLOGUE_USING_MOVE
8369 && cfun->machine->use_fast_prologue_epilogue)
8370 frame->save_regs_using_mov = true;
8372 frame->save_regs_using_mov = false;
8374 /* If static stack checking is enabled and done with probes, the registers
8375 need to be saved before allocating the frame. */
8376 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
8377 frame->save_regs_using_mov = false;
8379 /* Skip return address. */
8380 offset = UNITS_PER_WORD;
8382 /* Skip pushed static chain. */
8383 if (ix86_static_chain_on_stack)
8384 offset += UNITS_PER_WORD;
8386 /* Skip saved base pointer. */
8387 if (frame_pointer_needed)
8388 offset += UNITS_PER_WORD;
8390 frame->hard_frame_pointer_offset = offset;
8392 /* Set offset to aligned because the realigned frame starts from
8394 if (stack_realign_fp)
8395 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
8397 /* Register save area */
8398 offset += frame->nregs * UNITS_PER_WORD;
8400 /* Align SSE reg save area. */
8401 if (frame->nsseregs)
8402 frame->padding0 = ((offset + 16 - 1) & -16) - offset;
8404 frame->padding0 = 0;
8406 /* SSE register save area. */
8407 offset += frame->padding0 + frame->nsseregs * 16;
8410 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
8411 offset += frame->va_arg_size;
8413 /* Align start of frame for local function. */
8414 frame->padding1 = ((offset + stack_alignment_needed - 1)
8415 & -stack_alignment_needed) - offset;
8417 offset += frame->padding1;
8419 /* Frame pointer points here. */
8420 frame->frame_pointer_offset = offset;
8424 /* Add outgoing arguments area. Can be skipped if we eliminated
8425 all the function calls as dead code.
8426 Skipping is however impossible when function calls alloca. Alloca
8427 expander assumes that last crtl->outgoing_args_size
8428 of stack frame are unused. */
8429 if (ACCUMULATE_OUTGOING_ARGS
8430 && (!current_function_is_leaf || cfun->calls_alloca
8431 || ix86_current_function_calls_tls_descriptor))
8433 offset += crtl->outgoing_args_size;
8434 frame->outgoing_arguments_size = crtl->outgoing_args_size;
8437 frame->outgoing_arguments_size = 0;
8439 /* Align stack boundary. Only needed if we're calling another function
8441 if (!current_function_is_leaf || cfun->calls_alloca
8442 || ix86_current_function_calls_tls_descriptor)
8443 frame->padding2 = ((offset + preferred_alignment - 1)
8444 & -preferred_alignment) - offset;
8446 frame->padding2 = 0;
8448 offset += frame->padding2;
8450 /* We've reached end of stack frame. */
8451 frame->stack_pointer_offset = offset;
8453 /* Size prologue needs to allocate. */
8454 frame->to_allocate =
8455 (size + frame->padding1 + frame->padding2
8456 + frame->outgoing_arguments_size + frame->va_arg_size);
8458 if ((!frame->to_allocate && frame->nregs <= 1)
8459 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
8460 frame->save_regs_using_mov = false;
8462 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8463 && current_function_sp_is_unchanging
8464 && current_function_is_leaf
8465 && !ix86_current_function_calls_tls_descriptor)
8467 frame->red_zone_size = frame->to_allocate;
8468 if (frame->save_regs_using_mov)
8469 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
8470 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
8471 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
8474 frame->red_zone_size = 0;
8475 frame->to_allocate -= frame->red_zone_size;
8476 frame->stack_pointer_offset -= frame->red_zone_size;
8479 /* Emit code to save registers in the prologue. */
8482 ix86_emit_save_regs (void)
8487 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
8488 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8490 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
8491 RTX_FRAME_RELATED_P (insn) = 1;
8495 /* Emit code to save registers using MOV insns. First register
8496 is restored from POINTER + OFFSET. */
8498 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8503 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8504 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8506 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8508 gen_rtx_REG (Pmode, regno));
8509 RTX_FRAME_RELATED_P (insn) = 1;
8510 offset += UNITS_PER_WORD;
8514 /* Emit code to save registers using MOV insns. First register
8515 is restored from POINTER + OFFSET. */
8517 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8523 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8524 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8526 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8527 set_mem_align (mem, 128);
8528 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8529 RTX_FRAME_RELATED_P (insn) = 1;
8534 static GTY(()) rtx queued_cfa_restores;
8536 /* Add a REG_CFA_RESTORE REG note to INSN or queue them until next stack
8537 manipulation insn. Don't add it if the previously
8538 saved value will be left untouched within stack red-zone till return,
8539 as unwinders can find the same value in the register and
8543 ix86_add_cfa_restore_note (rtx insn, rtx reg, HOST_WIDE_INT red_offset)
8546 && !TARGET_64BIT_MS_ABI
8547 && red_offset + RED_ZONE_SIZE >= 0
8548 && crtl->args.pops_args < 65536)
8553 add_reg_note (insn, REG_CFA_RESTORE, reg);
8554 RTX_FRAME_RELATED_P (insn) = 1;
8558 = alloc_reg_note (REG_CFA_RESTORE, reg, queued_cfa_restores);
8561 /* Add queued REG_CFA_RESTORE notes if any to INSN. */
8564 ix86_add_queued_cfa_restore_notes (rtx insn)
8567 if (!queued_cfa_restores)
8569 for (last = queued_cfa_restores; XEXP (last, 1); last = XEXP (last, 1))
8571 XEXP (last, 1) = REG_NOTES (insn);
8572 REG_NOTES (insn) = queued_cfa_restores;
8573 queued_cfa_restores = NULL_RTX;
8574 RTX_FRAME_RELATED_P (insn) = 1;
8577 /* Expand prologue or epilogue stack adjustment.
8578 The pattern exist to put a dependency on all ebp-based memory accesses.
8579 STYLE should be negative if instructions should be marked as frame related,
8580 zero if %r11 register is live and cannot be freely used and positive
8584 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
8585 int style, bool set_cfa)
8590 insn = emit_insn (gen_pro_epilogue_adjust_stack_si_1 (dest, src, offset));
8591 else if (x86_64_immediate_operand (offset, DImode))
8592 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_1 (dest, src, offset));
8596 /* r11 is used by indirect sibcall return as well, set before the
8597 epilogue and used after the epilogue. */
8599 tmp = gen_rtx_REG (DImode, R11_REG);
8602 gcc_assert (src != hard_frame_pointer_rtx
8603 && dest != hard_frame_pointer_rtx);
8604 tmp = hard_frame_pointer_rtx;
8606 insn = emit_insn (gen_rtx_SET (DImode, tmp, offset));
8608 RTX_FRAME_RELATED_P (insn) = 1;
8609 insn = emit_insn (gen_pro_epilogue_adjust_stack_di_2 (dest, src, tmp,
8614 ix86_add_queued_cfa_restore_notes (insn);
8620 gcc_assert (ix86_cfa_state->reg == src);
8621 ix86_cfa_state->offset += INTVAL (offset);
8622 ix86_cfa_state->reg = dest;
8624 r = gen_rtx_PLUS (Pmode, src, offset);
8625 r = gen_rtx_SET (VOIDmode, dest, r);
8626 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
8627 RTX_FRAME_RELATED_P (insn) = 1;
8630 RTX_FRAME_RELATED_P (insn) = 1;
8633 /* Find an available register to be used as dynamic realign argument
8634 pointer regsiter. Such a register will be written in prologue and
8635 used in begin of body, so it must not be
8636 1. parameter passing register.
8638 We reuse static-chain register if it is available. Otherwise, we
8639 use DI for i386 and R13 for x86-64. We chose R13 since it has
8642 Return: the regno of chosen register. */
8645 find_drap_reg (void)
8647 tree decl = cfun->decl;
8651 /* Use R13 for nested function or function need static chain.
8652 Since function with tail call may use any caller-saved
8653 registers in epilogue, DRAP must not use caller-saved
8654 register in such case. */
8655 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8662 /* Use DI for nested function or function need static chain.
8663 Since function with tail call may use any caller-saved
8664 registers in epilogue, DRAP must not use caller-saved
8665 register in such case. */
8666 if (DECL_STATIC_CHAIN (decl) || crtl->tail_call_emit)
8669 /* Reuse static chain register if it isn't used for parameter
8671 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8672 && !lookup_attribute ("fastcall",
8673 TYPE_ATTRIBUTES (TREE_TYPE (decl)))
8674 && !lookup_attribute ("thiscall",
8675 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8682 /* Return minimum incoming stack alignment. */
8685 ix86_minimum_incoming_stack_boundary (bool sibcall)
8687 unsigned int incoming_stack_boundary;
8689 /* Prefer the one specified at command line. */
8690 if (ix86_user_incoming_stack_boundary)
8691 incoming_stack_boundary = ix86_user_incoming_stack_boundary;
8692 /* In 32bit, use MIN_STACK_BOUNDARY for incoming stack boundary
8693 if -mstackrealign is used, it isn't used for sibcall check and
8694 estimated stack alignment is 128bit. */
8697 && ix86_force_align_arg_pointer
8698 && crtl->stack_alignment_estimated == 128)
8699 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8701 incoming_stack_boundary = ix86_default_incoming_stack_boundary;
8703 /* Incoming stack alignment can be changed on individual functions
8704 via force_align_arg_pointer attribute. We use the smallest
8705 incoming stack boundary. */
8706 if (incoming_stack_boundary > MIN_STACK_BOUNDARY
8707 && lookup_attribute (ix86_force_align_arg_pointer_string,
8708 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8709 incoming_stack_boundary = MIN_STACK_BOUNDARY;
8711 /* The incoming stack frame has to be aligned at least at
8712 parm_stack_boundary. */
8713 if (incoming_stack_boundary < crtl->parm_stack_boundary)
8714 incoming_stack_boundary = crtl->parm_stack_boundary;
8716 /* Stack at entrance of main is aligned by runtime. We use the
8717 smallest incoming stack boundary. */
8718 if (incoming_stack_boundary > MAIN_STACK_BOUNDARY
8719 && DECL_NAME (current_function_decl)
8720 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8721 && DECL_FILE_SCOPE_P (current_function_decl))
8722 incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8724 return incoming_stack_boundary;
8727 /* Update incoming stack boundary and estimated stack alignment. */
8730 ix86_update_stack_boundary (void)
8732 ix86_incoming_stack_boundary
8733 = ix86_minimum_incoming_stack_boundary (false);
8735 /* x86_64 vararg needs 16byte stack alignment for register save
8739 && crtl->stack_alignment_estimated < 128)
8740 crtl->stack_alignment_estimated = 128;
8743 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8744 needed or an rtx for DRAP otherwise. */
8747 ix86_get_drap_rtx (void)
8749 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8750 crtl->need_drap = true;
8752 if (stack_realign_drap)
8754 /* Assign DRAP to vDRAP and returns vDRAP */
8755 unsigned int regno = find_drap_reg ();
8760 arg_ptr = gen_rtx_REG (Pmode, regno);
8761 crtl->drap_reg = arg_ptr;
8764 drap_vreg = copy_to_reg (arg_ptr);
8768 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8771 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg);
8772 RTX_FRAME_RELATED_P (insn) = 1;
8780 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8783 ix86_internal_arg_pointer (void)
8785 return virtual_incoming_args_rtx;
8788 struct scratch_reg {
8793 /* Return a short-lived scratch register for use on function entry.
8794 In 32-bit mode, it is valid only after the registers are saved
8795 in the prologue. This register must be released by means of
8796 release_scratch_register_on_entry once it is dead. */
8799 get_scratch_register_on_entry (struct scratch_reg *sr)
8807 /* We always use R11 in 64-bit mode. */
8812 tree decl = current_function_decl, fntype = TREE_TYPE (decl);
8814 = lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)) != NULL_TREE;
8815 bool static_chain_p = DECL_STATIC_CHAIN (decl);
8816 int regparm = ix86_function_regparm (fntype, decl);
8818 = crtl->drap_reg ? REGNO (crtl->drap_reg) : INVALID_REGNUM;
8820 /* 'fastcall' sets regparm to 2, uses ecx/edx for arguments and eax
8821 for the static chain register. */
8822 if ((regparm < 1 || (fastcall_p && !static_chain_p))
8823 && drap_regno != AX_REG)
8825 else if (regparm < 2 && drap_regno != DX_REG)
8827 /* ecx is the static chain register. */
8828 else if (regparm < 3 && !fastcall_p && !static_chain_p
8829 && drap_regno != CX_REG)
8831 else if (ix86_save_reg (BX_REG, true))
8833 /* esi is the static chain register. */
8834 else if (!(regparm == 3 && static_chain_p)
8835 && ix86_save_reg (SI_REG, true))
8837 else if (ix86_save_reg (DI_REG, true))
8841 regno = (drap_regno == AX_REG ? DX_REG : AX_REG);
8846 sr->reg = gen_rtx_REG (Pmode, regno);
8849 rtx insn = emit_insn (gen_push (sr->reg));
8850 RTX_FRAME_RELATED_P (insn) = 1;
8854 /* Release a scratch register obtained from the preceding function. */
8857 release_scratch_register_on_entry (struct scratch_reg *sr)
8861 rtx x, insn = emit_insn (ix86_gen_pop1 (sr->reg));
8863 /* The RTX_FRAME_RELATED_P mechanism doesn't know about pop. */
8864 RTX_FRAME_RELATED_P (insn) = 1;
8865 x = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (UNITS_PER_WORD));
8866 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
8867 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
8871 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
8873 /* Emit code to adjust the stack pointer by SIZE bytes while probing it. */
8876 ix86_adjust_stack_and_probe (HOST_WIDE_INT size)
8878 /* We skip the probe for the first interval + a small dope of 4 words and
8879 probe that many bytes past the specified size to maintain a protection
8880 area at the botton of the stack. */
8881 const int dope = 4 * UNITS_PER_WORD;
8882 rtx size_rtx = GEN_INT (size);
8884 /* See if we have a constant small number of probes to generate. If so,
8885 that's the easy case. The run-time loop is made up of 11 insns in the
8886 generic case while the compile-time loop is made up of 3+2*(n-1) insns
8887 for n # of intervals. */
8888 if (size <= 5 * PROBE_INTERVAL)
8890 HOST_WIDE_INT i, adjust;
8891 bool first_probe = true;
8893 /* Adjust SP and probe at PROBE_INTERVAL + N * PROBE_INTERVAL for
8894 values of N from 1 until it exceeds SIZE. If only one probe is
8895 needed, this will not generate any code. Then adjust and probe
8896 to PROBE_INTERVAL + SIZE. */
8897 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
8901 adjust = 2 * PROBE_INTERVAL + dope;
8902 first_probe = false;
8905 adjust = PROBE_INTERVAL;
8907 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8908 plus_constant (stack_pointer_rtx, -adjust)));
8909 emit_stack_probe (stack_pointer_rtx);
8913 adjust = size + PROBE_INTERVAL + dope;
8915 adjust = size + PROBE_INTERVAL - i;
8917 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8918 plus_constant (stack_pointer_rtx, -adjust)));
8919 emit_stack_probe (stack_pointer_rtx);
8921 /* Adjust back to account for the additional first interval. */
8922 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8923 plus_constant (stack_pointer_rtx,
8924 PROBE_INTERVAL + dope)));
8927 /* Otherwise, do the same as above, but in a loop. Note that we must be
8928 extra careful with variables wrapping around because we might be at
8929 the very top (or the very bottom) of the address space and we have
8930 to be able to handle this case properly; in particular, we use an
8931 equality test for the loop condition. */
8934 HOST_WIDE_INT rounded_size;
8935 struct scratch_reg sr;
8937 get_scratch_register_on_entry (&sr);
8940 /* Step 1: round SIZE to the previous multiple of the interval. */
8942 rounded_size = size & -PROBE_INTERVAL;
8945 /* Step 2: compute initial and final value of the loop counter. */
8947 /* SP = SP_0 + PROBE_INTERVAL. */
8948 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8949 plus_constant (stack_pointer_rtx,
8950 - (PROBE_INTERVAL + dope))));
8952 /* LAST_ADDR = SP_0 + PROBE_INTERVAL + ROUNDED_SIZE. */
8953 emit_move_insn (sr.reg, GEN_INT (-rounded_size));
8954 emit_insn (gen_rtx_SET (VOIDmode, sr.reg,
8955 gen_rtx_PLUS (Pmode, sr.reg,
8956 stack_pointer_rtx)));
8961 while (SP != LAST_ADDR)
8963 SP = SP + PROBE_INTERVAL
8967 adjusts SP and probes to PROBE_INTERVAL + N * PROBE_INTERVAL for
8968 values of N from 1 until it is equal to ROUNDED_SIZE. */
8970 emit_insn (ix86_gen_adjust_stack_and_probe (sr.reg, sr.reg, size_rtx));
8973 /* Step 4: adjust SP and probe at PROBE_INTERVAL + SIZE if we cannot
8974 assert at compile-time that SIZE is equal to ROUNDED_SIZE. */
8976 if (size != rounded_size)
8978 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8979 plus_constant (stack_pointer_rtx,
8980 rounded_size - size)));
8981 emit_stack_probe (stack_pointer_rtx);
8984 /* Adjust back to account for the additional first interval. */
8985 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8986 plus_constant (stack_pointer_rtx,
8987 PROBE_INTERVAL + dope)));
8989 release_scratch_register_on_entry (&sr);
8992 gcc_assert (ix86_cfa_state->reg != stack_pointer_rtx);
8994 /* Make sure nothing is scheduled before we are done. */
8995 emit_insn (gen_blockage ());
8998 /* Adjust the stack pointer up to REG while probing it. */
9001 output_adjust_stack_and_probe (rtx reg)
9003 static int labelno = 0;
9004 char loop_lab[32], end_lab[32];
9007 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9008 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9010 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9012 /* Jump to END_LAB if SP == LAST_ADDR. */
9013 xops[0] = stack_pointer_rtx;
9015 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9016 fputs ("\tje\t", asm_out_file);
9017 assemble_name_raw (asm_out_file, end_lab);
9018 fputc ('\n', asm_out_file);
9020 /* SP = SP + PROBE_INTERVAL. */
9021 xops[1] = GEN_INT (PROBE_INTERVAL);
9022 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9025 xops[1] = const0_rtx;
9026 output_asm_insn ("or%z0\t{%1, (%0)|DWORD PTR [%0], %1}", xops);
9028 fprintf (asm_out_file, "\tjmp\t");
9029 assemble_name_raw (asm_out_file, loop_lab);
9030 fputc ('\n', asm_out_file);
9032 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9037 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
9038 inclusive. These are offsets from the current stack pointer. */
9041 ix86_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
9043 /* See if we have a constant small number of probes to generate. If so,
9044 that's the easy case. The run-time loop is made up of 7 insns in the
9045 generic case while the compile-time loop is made up of n insns for n #
9047 if (size <= 7 * PROBE_INTERVAL)
9051 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
9052 it exceeds SIZE. If only one probe is needed, this will not
9053 generate any code. Then probe at FIRST + SIZE. */
9054 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
9055 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + i)));
9057 emit_stack_probe (plus_constant (stack_pointer_rtx, -(first + size)));
9060 /* Otherwise, do the same as above, but in a loop. Note that we must be
9061 extra careful with variables wrapping around because we might be at
9062 the very top (or the very bottom) of the address space and we have
9063 to be able to handle this case properly; in particular, we use an
9064 equality test for the loop condition. */
9067 HOST_WIDE_INT rounded_size, last;
9068 struct scratch_reg sr;
9070 get_scratch_register_on_entry (&sr);
9073 /* Step 1: round SIZE to the previous multiple of the interval. */
9075 rounded_size = size & -PROBE_INTERVAL;
9078 /* Step 2: compute initial and final value of the loop counter. */
9080 /* TEST_OFFSET = FIRST. */
9081 emit_move_insn (sr.reg, GEN_INT (-first));
9083 /* LAST_OFFSET = FIRST + ROUNDED_SIZE. */
9084 last = first + rounded_size;
9089 while (TEST_ADDR != LAST_ADDR)
9091 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
9095 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
9096 until it is equal to ROUNDED_SIZE. */
9098 emit_insn (ix86_gen_probe_stack_range (sr.reg, sr.reg, GEN_INT (-last)));
9101 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
9102 that SIZE is equal to ROUNDED_SIZE. */
9104 if (size != rounded_size)
9105 emit_stack_probe (plus_constant (gen_rtx_PLUS (Pmode,
9108 rounded_size - size));
9110 release_scratch_register_on_entry (&sr);
9113 /* Make sure nothing is scheduled before we are done. */
9114 emit_insn (gen_blockage ());
9117 /* Probe a range of stack addresses from REG to END, inclusive. These are
9118 offsets from the current stack pointer. */
9121 output_probe_stack_range (rtx reg, rtx end)
9123 static int labelno = 0;
9124 char loop_lab[32], end_lab[32];
9127 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
9128 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
9130 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
9132 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
9135 output_asm_insn ("cmp%z0\t{%1, %0|%0, %1}", xops);
9136 fputs ("\tje\t", asm_out_file);
9137 assemble_name_raw (asm_out_file, end_lab);
9138 fputc ('\n', asm_out_file);
9140 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
9141 xops[1] = GEN_INT (PROBE_INTERVAL);
9142 output_asm_insn ("sub%z0\t{%1, %0|%0, %1}", xops);
9144 /* Probe at TEST_ADDR. */
9145 xops[0] = stack_pointer_rtx;
9147 xops[2] = const0_rtx;
9148 output_asm_insn ("or%z0\t{%2, (%0,%1)|DWORD PTR [%0+%1], %2}", xops);
9150 fprintf (asm_out_file, "\tjmp\t");
9151 assemble_name_raw (asm_out_file, loop_lab);
9152 fputc ('\n', asm_out_file);
9154 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
9159 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
9160 to be generated in correct form. */
9162 ix86_finalize_stack_realign_flags (void)
9164 /* Check if stack realign is really needed after reload, and
9165 stores result in cfun */
9166 unsigned int incoming_stack_boundary
9167 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
9168 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
9169 unsigned int stack_realign = (incoming_stack_boundary
9170 < (current_function_is_leaf
9171 ? crtl->max_used_stack_slot_alignment
9172 : crtl->stack_alignment_needed));
9174 if (crtl->stack_realign_finalized)
9176 /* After stack_realign_needed is finalized, we can't no longer
9178 gcc_assert (crtl->stack_realign_needed == stack_realign);
9182 crtl->stack_realign_needed = stack_realign;
9183 crtl->stack_realign_finalized = true;
9187 /* Expand the prologue into a bunch of separate insns. */
9190 ix86_expand_prologue (void)
9194 struct ix86_frame frame;
9195 HOST_WIDE_INT allocate;
9196 int gen_frame_pointer = frame_pointer_needed;
9198 ix86_finalize_stack_realign_flags ();
9200 /* DRAP should not coexist with stack_realign_fp */
9201 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
9203 /* Initialize CFA state for before the prologue. */
9204 ix86_cfa_state->reg = stack_pointer_rtx;
9205 ix86_cfa_state->offset = INCOMING_FRAME_SP_OFFSET;
9207 ix86_compute_frame_layout (&frame);
9209 if (!TARGET_64BIT && ix86_function_ms_hook_prologue (current_function_decl))
9213 /* Check if profiling is active and we shall use profiling before
9214 prologue variant. If so sorry. */
9215 if (crtl->profile && flag_fentry != 0)
9216 sorry ("ms_hook_prologue attribute isn't compatible with -mfentry for 32-bit");
9218 /* Make sure the function starts with
9219 8b ff movl.s %edi,%edi (emited by ix86_asm_output_function_label)
9221 8b ec movl.s %esp,%ebp
9223 This matches the hookable function prologue in Win32 API
9224 functions in Microsoft Windows XP Service Pack 2 and newer.
9225 Wine uses this to enable Windows apps to hook the Win32 API
9226 functions provided by Wine. */
9227 push = emit_insn (gen_push (hard_frame_pointer_rtx));
9228 mov = emit_insn (gen_vswapmov (hard_frame_pointer_rtx,
9229 stack_pointer_rtx));
9231 if (frame_pointer_needed && !(crtl->drap_reg
9232 && crtl->stack_realign_needed))
9234 /* The push %ebp and movl.s %esp, %ebp already set up
9235 the frame pointer. No need to do this again. */
9236 gen_frame_pointer = 0;
9237 RTX_FRAME_RELATED_P (push) = 1;
9238 RTX_FRAME_RELATED_P (mov) = 1;
9239 if (ix86_cfa_state->reg == stack_pointer_rtx)
9240 ix86_cfa_state->reg = hard_frame_pointer_rtx;
9243 /* If the frame pointer is not needed, pop %ebp again. This
9244 could be optimized for cases where ebp needs to be backed up
9245 for some other reason. If stack realignment is needed, pop
9246 the base pointer again, align the stack, and later regenerate
9247 the frame pointer setup. The frame pointer generated by the
9248 hook prologue is not aligned, so it can't be used. */
9249 insn = emit_insn (ix86_gen_pop1 (hard_frame_pointer_rtx));
9252 /* The first insn of a function that accepts its static chain on the
9253 stack is to push the register that would be filled in by a direct
9254 call. This insn will be skipped by the trampoline. */
9255 if (ix86_static_chain_on_stack)
9259 insn = emit_insn (gen_push (ix86_static_chain (cfun->decl, false)));
9260 emit_insn (gen_blockage ());
9262 /* We don't want to interpret this push insn as a register save,
9263 only as a stack adjustment. The real copy of the register as
9264 a save will be done later, if needed. */
9265 t = plus_constant (stack_pointer_rtx, -UNITS_PER_WORD);
9266 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9267 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9268 RTX_FRAME_RELATED_P (insn) = 1;
9271 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
9272 of DRAP is needed and stack realignment is really needed after reload */
9273 if (crtl->drap_reg && crtl->stack_realign_needed)
9276 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9277 int param_ptr_offset = UNITS_PER_WORD;
9279 if (ix86_static_chain_on_stack)
9280 param_ptr_offset += UNITS_PER_WORD;
9281 if (!call_used_regs[REGNO (crtl->drap_reg)])
9282 param_ptr_offset += UNITS_PER_WORD;
9284 gcc_assert (stack_realign_drap);
9286 /* Grab the argument pointer. */
9287 x = plus_constant (stack_pointer_rtx, param_ptr_offset);
9290 /* Only need to push parameter pointer reg if it is caller
9292 if (!call_used_regs[REGNO (crtl->drap_reg)])
9294 /* Push arg pointer reg */
9295 insn = emit_insn (gen_push (y));
9296 RTX_FRAME_RELATED_P (insn) = 1;
9299 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
9300 RTX_FRAME_RELATED_P (insn) = 1;
9301 ix86_cfa_state->reg = crtl->drap_reg;
9303 /* Align the stack. */
9304 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9306 GEN_INT (-align_bytes)));
9307 RTX_FRAME_RELATED_P (insn) = 1;
9309 /* Replicate the return address on the stack so that return
9310 address can be reached via (argp - 1) slot. This is needed
9311 to implement macro RETURN_ADDR_RTX and intrinsic function
9312 expand_builtin_return_addr etc. */
9314 x = gen_frame_mem (Pmode,
9315 plus_constant (x, -UNITS_PER_WORD));
9316 insn = emit_insn (gen_push (x));
9317 RTX_FRAME_RELATED_P (insn) = 1;
9320 /* Note: AT&T enter does NOT have reversed args. Enter is probably
9321 slower on all targets. Also sdb doesn't like it. */
9323 if (gen_frame_pointer)
9325 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
9326 RTX_FRAME_RELATED_P (insn) = 1;
9328 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
9329 RTX_FRAME_RELATED_P (insn) = 1;
9331 if (ix86_cfa_state->reg == stack_pointer_rtx)
9332 ix86_cfa_state->reg = hard_frame_pointer_rtx;
9335 if (stack_realign_fp)
9337 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
9338 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
9340 /* Align the stack. */
9341 insn = emit_insn (ix86_gen_andsp (stack_pointer_rtx,
9343 GEN_INT (-align_bytes)));
9344 RTX_FRAME_RELATED_P (insn) = 1;
9347 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding0;
9349 if (!frame.save_regs_using_mov)
9350 ix86_emit_save_regs ();
9352 allocate += frame.nregs * UNITS_PER_WORD;
9354 /* The stack has already been decremented by the instruction calling us
9355 so we need to probe unconditionally to preserve the protection area. */
9356 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
9358 /* We expect the registers to be saved when probes are used. */
9359 gcc_assert (!frame.save_regs_using_mov);
9361 if (STACK_CHECK_MOVING_SP)
9363 ix86_adjust_stack_and_probe (allocate);
9368 HOST_WIDE_INT size = allocate;
9370 if (TARGET_64BIT && size >= (HOST_WIDE_INT) 0x80000000)
9371 size = 0x80000000 - STACK_CHECK_PROTECT - 1;
9373 if (TARGET_STACK_PROBE)
9374 ix86_emit_probe_stack_range (0, size + STACK_CHECK_PROTECT);
9376 ix86_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
9380 /* When using red zone we may start register saving before allocating
9381 the stack frame saving one cycle of the prologue. However I will
9382 avoid doing this if I am going to have to probe the stack since
9383 at least on x86_64 the stack probe can turn into a call that clobbers
9384 a red zone location */
9385 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
9386 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
9387 ix86_emit_save_regs_using_mov ((frame_pointer_needed
9388 && !crtl->stack_realign_needed)
9389 ? hard_frame_pointer_rtx
9390 : stack_pointer_rtx,
9391 -frame.nregs * UNITS_PER_WORD);
9395 else if (!ix86_target_stack_probe () || allocate < CHECK_STACK_LIMIT)
9396 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9397 GEN_INT (-allocate), -1,
9398 ix86_cfa_state->reg == stack_pointer_rtx);
9401 rtx eax = gen_rtx_REG (Pmode, AX_REG);
9405 if (cfun->machine->call_abi == MS_ABI)
9408 eax_live = ix86_eax_live_at_start_p ();
9412 emit_insn (gen_push (eax));
9413 allocate -= UNITS_PER_WORD;
9416 emit_move_insn (eax, GEN_INT (allocate));
9418 insn = emit_insn (ix86_gen_allocate_stack_worker (eax, eax));
9420 if (ix86_cfa_state->reg == stack_pointer_rtx)
9422 ix86_cfa_state->offset += allocate;
9423 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
9424 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
9425 add_reg_note (insn, REG_CFA_ADJUST_CFA, t);
9426 RTX_FRAME_RELATED_P (insn) = 1;
9431 if (frame_pointer_needed)
9432 t = plus_constant (hard_frame_pointer_rtx,
9435 - frame.nregs * UNITS_PER_WORD);
9437 t = plus_constant (stack_pointer_rtx, allocate);
9438 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
9442 if (frame.save_regs_using_mov
9443 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
9444 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
9446 if (!frame_pointer_needed
9447 || !(frame.to_allocate + frame.padding0)
9448 || crtl->stack_realign_needed)
9449 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
9451 + frame.nsseregs * 16 + frame.padding0);
9453 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
9454 -frame.nregs * UNITS_PER_WORD);
9456 if (!frame_pointer_needed
9457 || !(frame.to_allocate + frame.padding0)
9458 || crtl->stack_realign_needed)
9459 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
9462 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
9463 - frame.nregs * UNITS_PER_WORD
9464 - frame.nsseregs * 16
9467 pic_reg_used = false;
9468 if (pic_offset_table_rtx
9469 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
9472 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
9474 if (alt_pic_reg_used != INVALID_REGNUM)
9475 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
9477 pic_reg_used = true;
9484 if (ix86_cmodel == CM_LARGE_PIC)
9486 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
9487 rtx label = gen_label_rtx ();
9489 LABEL_PRESERVE_P (label) = 1;
9490 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
9491 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
9492 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
9493 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
9494 pic_offset_table_rtx, tmp_reg));
9497 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
9500 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
9503 /* In the pic_reg_used case, make sure that the got load isn't deleted
9504 when mcount needs it. Blockage to avoid call movement across mcount
9505 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
9507 if (crtl->profile && !flag_fentry && pic_reg_used)
9508 emit_insn (gen_prologue_use (pic_offset_table_rtx));
9510 if (crtl->drap_reg && !crtl->stack_realign_needed)
9512 /* vDRAP is setup but after reload it turns out stack realign
9513 isn't necessary, here we will emit prologue to setup DRAP
9514 without stack realign adjustment */
9516 int drap_bp_offset = UNITS_PER_WORD * 2;
9518 if (ix86_static_chain_on_stack)
9519 drap_bp_offset += UNITS_PER_WORD;
9520 x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
9521 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
9524 /* Prevent instructions from being scheduled into register save push
9525 sequence when access to the redzone area is done through frame pointer.
9526 The offset between the frame pointer and the stack pointer is calculated
9527 relative to the value of the stack pointer at the end of the function
9528 prologue, and moving instructions that access redzone area via frame
9529 pointer inside push sequence violates this assumption. */
9530 if (frame_pointer_needed && frame.red_zone_size)
9531 emit_insn (gen_memory_blockage ());
9533 /* Emit cld instruction if stringops are used in the function. */
9534 if (TARGET_CLD && ix86_current_function_needs_cld)
9535 emit_insn (gen_cld ());
9538 /* Emit code to restore REG using a POP insn. */
9541 ix86_emit_restore_reg_using_pop (rtx reg, HOST_WIDE_INT red_offset)
9543 rtx insn = emit_insn (ix86_gen_pop1 (reg));
9545 if (ix86_cfa_state->reg == crtl->drap_reg
9546 && REGNO (reg) == REGNO (crtl->drap_reg))
9548 /* Previously we'd represented the CFA as an expression
9549 like *(%ebp - 8). We've just popped that value from
9550 the stack, which means we need to reset the CFA to
9551 the drap register. This will remain until we restore
9552 the stack pointer. */
9553 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9554 RTX_FRAME_RELATED_P (insn) = 1;
9558 if (ix86_cfa_state->reg == stack_pointer_rtx)
9560 ix86_cfa_state->offset -= UNITS_PER_WORD;
9561 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9562 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
9563 RTX_FRAME_RELATED_P (insn) = 1;
9566 /* When the frame pointer is the CFA, and we pop it, we are
9567 swapping back to the stack pointer as the CFA. This happens
9568 for stack frames that don't allocate other data, so we assume
9569 the stack pointer is now pointing at the return address, i.e.
9570 the function entry state, which makes the offset be 1 word. */
9571 else if (ix86_cfa_state->reg == hard_frame_pointer_rtx
9572 && reg == hard_frame_pointer_rtx)
9574 ix86_cfa_state->reg = stack_pointer_rtx;
9575 ix86_cfa_state->offset -= UNITS_PER_WORD;
9577 add_reg_note (insn, REG_CFA_DEF_CFA,
9578 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
9579 GEN_INT (ix86_cfa_state->offset)));
9580 RTX_FRAME_RELATED_P (insn) = 1;
9583 ix86_add_cfa_restore_note (insn, reg, red_offset);
9586 /* Emit code to restore saved registers using POP insns. */
9589 ix86_emit_restore_regs_using_pop (HOST_WIDE_INT red_offset)
9593 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9594 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
9596 ix86_emit_restore_reg_using_pop (gen_rtx_REG (Pmode, regno),
9598 red_offset += UNITS_PER_WORD;
9602 /* Emit code and notes for the LEAVE instruction. */
9605 ix86_emit_leave (HOST_WIDE_INT red_offset)
9607 rtx insn = emit_insn (ix86_gen_leave ());
9609 ix86_add_queued_cfa_restore_notes (insn);
9611 if (ix86_cfa_state->reg == hard_frame_pointer_rtx)
9613 ix86_cfa_state->reg = stack_pointer_rtx;
9614 ix86_cfa_state->offset -= UNITS_PER_WORD;
9616 add_reg_note (insn, REG_CFA_ADJUST_CFA,
9617 copy_rtx (XVECEXP (PATTERN (insn), 0, 0)));
9618 RTX_FRAME_RELATED_P (insn) = 1;
9619 ix86_add_cfa_restore_note (insn, hard_frame_pointer_rtx, red_offset);
9623 /* Emit code to restore saved registers using MOV insns. First register
9624 is restored from POINTER + OFFSET. */
9626 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
9627 HOST_WIDE_INT red_offset,
9628 int maybe_eh_return)
9631 rtx base_address = gen_rtx_MEM (Pmode, pointer);
9634 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9635 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9637 rtx reg = gen_rtx_REG (Pmode, regno);
9639 /* Ensure that adjust_address won't be forced to produce pointer
9640 out of range allowed by x86-64 instruction set. */
9641 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
9645 r11 = gen_rtx_REG (DImode, R11_REG);
9646 emit_move_insn (r11, GEN_INT (offset));
9647 emit_insn (gen_adddi3 (r11, r11, pointer));
9648 base_address = gen_rtx_MEM (Pmode, r11);
9651 insn = emit_move_insn (reg,
9652 adjust_address (base_address, Pmode, offset));
9653 offset += UNITS_PER_WORD;
9655 if (ix86_cfa_state->reg == crtl->drap_reg
9656 && regno == REGNO (crtl->drap_reg))
9658 /* Previously we'd represented the CFA as an expression
9659 like *(%ebp - 8). We've just popped that value from
9660 the stack, which means we need to reset the CFA to
9661 the drap register. This will remain until we restore
9662 the stack pointer. */
9663 add_reg_note (insn, REG_CFA_DEF_CFA, reg);
9664 RTX_FRAME_RELATED_P (insn) = 1;
9667 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
9669 red_offset += UNITS_PER_WORD;
9673 /* Emit code to restore saved registers using MOV insns. First register
9674 is restored from POINTER + OFFSET. */
9676 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
9677 HOST_WIDE_INT red_offset,
9678 int maybe_eh_return)
9681 rtx base_address = gen_rtx_MEM (TImode, pointer);
9684 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
9685 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
9687 rtx reg = gen_rtx_REG (TImode, regno);
9689 /* Ensure that adjust_address won't be forced to produce pointer
9690 out of range allowed by x86-64 instruction set. */
9691 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
9695 r11 = gen_rtx_REG (DImode, R11_REG);
9696 emit_move_insn (r11, GEN_INT (offset));
9697 emit_insn (gen_adddi3 (r11, r11, pointer));
9698 base_address = gen_rtx_MEM (TImode, r11);
9701 mem = adjust_address (base_address, TImode, offset);
9702 set_mem_align (mem, 128);
9703 emit_move_insn (reg, mem);
9706 ix86_add_cfa_restore_note (NULL_RTX, reg, red_offset);
9712 /* Restore function stack, frame, and registers. */
9715 ix86_expand_epilogue (int style)
9718 struct ix86_frame frame;
9719 HOST_WIDE_INT offset, red_offset;
9720 struct machine_cfa_state cfa_state_save = *ix86_cfa_state;
9723 ix86_finalize_stack_realign_flags ();
9725 /* When stack is realigned, SP must be valid. */
9726 sp_valid = (!frame_pointer_needed
9727 || current_function_sp_is_unchanging
9728 || stack_realign_fp);
9730 ix86_compute_frame_layout (&frame);
9732 /* See the comment about red zone and frame
9733 pointer usage in ix86_expand_prologue. */
9734 if (frame_pointer_needed && frame.red_zone_size)
9735 emit_insn (gen_memory_blockage ());
9737 using_drap = crtl->drap_reg && crtl->stack_realign_needed;
9738 gcc_assert (!using_drap || ix86_cfa_state->reg == crtl->drap_reg);
9740 /* Calculate start of saved registers relative to ebp. Special care
9741 must be taken for the normal return case of a function using
9742 eh_return: the eax and edx registers are marked as saved, but not
9743 restored along this path. */
9744 offset = frame.nregs;
9745 if (crtl->calls_eh_return && style != 2)
9747 offset *= -UNITS_PER_WORD;
9748 offset -= frame.nsseregs * 16 + frame.padding0;
9750 /* Calculate start of saved registers relative to esp on entry of the
9751 function. When realigning stack, this needs to be the most negative
9752 value possible at runtime. */
9753 red_offset = offset;
9755 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
9757 else if (stack_realign_fp)
9758 red_offset -= crtl->stack_alignment_needed / BITS_PER_UNIT
9760 if (ix86_static_chain_on_stack)
9761 red_offset -= UNITS_PER_WORD;
9762 if (frame_pointer_needed)
9763 red_offset -= UNITS_PER_WORD;
9765 /* If we're only restoring one register and sp is not valid then
9766 using a move instruction to restore the register since it's
9767 less work than reloading sp and popping the register.
9769 The default code result in stack adjustment using add/lea instruction,
9770 while this code results in LEAVE instruction (or discrete equivalent),
9771 so it is profitable in some other cases as well. Especially when there
9772 are no registers to restore. We also use this code when TARGET_USE_LEAVE
9773 and there is exactly one register to pop. This heuristic may need some
9774 tuning in future. */
9775 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
9776 || (TARGET_EPILOGUE_USING_MOVE
9777 && cfun->machine->use_fast_prologue_epilogue
9778 && ((frame.nregs + frame.nsseregs) > 1
9779 || (frame.to_allocate + frame.padding0) != 0))
9780 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
9781 && (frame.to_allocate + frame.padding0) != 0)
9782 || (frame_pointer_needed && TARGET_USE_LEAVE
9783 && cfun->machine->use_fast_prologue_epilogue
9784 && (frame.nregs + frame.nsseregs) == 1)
9785 || crtl->calls_eh_return)
9787 /* Restore registers. We can use ebp or esp to address the memory
9788 locations. If both are available, default to ebp, since offsets
9789 are known to be small. Only exception is esp pointing directly
9790 to the end of block of saved registers, where we may simplify
9793 If we are realigning stack with bp and sp, regs restore can't
9794 be addressed by bp. sp must be used instead. */
9796 if (!frame_pointer_needed
9797 || (sp_valid && !(frame.to_allocate + frame.padding0))
9798 || stack_realign_fp)
9800 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9801 frame.to_allocate, red_offset,
9803 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
9805 + frame.nsseregs * 16
9808 + frame.nsseregs * 16
9809 + frame.padding0, style == 2);
9813 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
9816 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
9818 + frame.nsseregs * 16
9821 + frame.nsseregs * 16
9822 + frame.padding0, style == 2);
9825 red_offset -= offset;
9827 /* eh_return epilogues need %ecx added to the stack pointer. */
9830 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
9832 /* Stack align doesn't work with eh_return. */
9833 gcc_assert (!crtl->stack_realign_needed);
9834 /* Neither does regparm nested functions. */
9835 gcc_assert (!ix86_static_chain_on_stack);
9837 if (frame_pointer_needed)
9839 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
9840 tmp = plus_constant (tmp, UNITS_PER_WORD);
9841 tmp = emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
9843 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
9844 tmp = emit_move_insn (hard_frame_pointer_rtx, tmp);
9846 /* Note that we use SA as a temporary CFA, as the return
9847 address is at the proper place relative to it. We
9848 pretend this happens at the FP restore insn because
9849 prior to this insn the FP would be stored at the wrong
9850 offset relative to SA, and after this insn we have no
9851 other reasonable register to use for the CFA. We don't
9852 bother resetting the CFA to the SP for the duration of
9854 add_reg_note (tmp, REG_CFA_DEF_CFA,
9855 plus_constant (sa, UNITS_PER_WORD));
9856 ix86_add_queued_cfa_restore_notes (tmp);
9857 add_reg_note (tmp, REG_CFA_RESTORE, hard_frame_pointer_rtx);
9858 RTX_FRAME_RELATED_P (tmp) = 1;
9859 ix86_cfa_state->reg = sa;
9860 ix86_cfa_state->offset = UNITS_PER_WORD;
9862 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
9863 const0_rtx, style, false);
9867 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
9868 tmp = plus_constant (tmp, (frame.to_allocate
9869 + frame.nregs * UNITS_PER_WORD
9870 + frame.nsseregs * 16
9872 tmp = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
9873 ix86_add_queued_cfa_restore_notes (tmp);
9875 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
9876 if (ix86_cfa_state->offset != UNITS_PER_WORD)
9878 ix86_cfa_state->offset = UNITS_PER_WORD;
9879 add_reg_note (tmp, REG_CFA_DEF_CFA,
9880 plus_constant (stack_pointer_rtx,
9882 RTX_FRAME_RELATED_P (tmp) = 1;
9886 else if (!frame_pointer_needed)
9887 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9888 GEN_INT (frame.to_allocate
9889 + frame.nregs * UNITS_PER_WORD
9890 + frame.nsseregs * 16
9892 style, !using_drap);
9893 /* If not an i386, mov & pop is faster than "leave". */
9894 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
9895 || !cfun->machine->use_fast_prologue_epilogue)
9896 ix86_emit_leave (red_offset);
9899 pro_epilogue_adjust_stack (stack_pointer_rtx,
9900 hard_frame_pointer_rtx,
9901 const0_rtx, style, !using_drap);
9903 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx, red_offset);
9908 /* First step is to deallocate the stack frame so that we can
9911 If we realign stack with frame pointer, then stack pointer
9912 won't be able to recover via lea $offset(%bp), %sp, because
9913 there is a padding area between bp and sp for realign.
9914 "add $to_allocate, %sp" must be used instead. */
9917 gcc_assert (frame_pointer_needed);
9918 gcc_assert (!stack_realign_fp);
9919 pro_epilogue_adjust_stack (stack_pointer_rtx,
9920 hard_frame_pointer_rtx,
9921 GEN_INT (offset), style, false);
9922 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9925 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9926 GEN_INT (frame.nsseregs * 16
9930 else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
9932 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
9933 frame.to_allocate, red_offset,
9935 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
9936 GEN_INT (frame.to_allocate
9937 + frame.nsseregs * 16
9938 + frame.padding0), style,
9939 !using_drap && !frame_pointer_needed);
9942 ix86_emit_restore_regs_using_pop (red_offset + frame.nsseregs * 16
9944 red_offset -= offset;
9946 if (frame_pointer_needed)
9948 /* Leave results in shorter dependency chains on CPUs that are
9949 able to grok it fast. */
9950 if (TARGET_USE_LEAVE)
9951 ix86_emit_leave (red_offset);
9954 /* For stack realigned really happens, recover stack
9955 pointer to hard frame pointer is a must, if not using
9957 if (stack_realign_fp)
9958 pro_epilogue_adjust_stack (stack_pointer_rtx,
9959 hard_frame_pointer_rtx,
9960 const0_rtx, style, !using_drap);
9961 ix86_emit_restore_reg_using_pop (hard_frame_pointer_rtx,
9969 int param_ptr_offset = UNITS_PER_WORD;
9972 gcc_assert (stack_realign_drap);
9974 if (ix86_static_chain_on_stack)
9975 param_ptr_offset += UNITS_PER_WORD;
9976 if (!call_used_regs[REGNO (crtl->drap_reg)])
9977 param_ptr_offset += UNITS_PER_WORD;
9979 insn = emit_insn (gen_rtx_SET
9980 (VOIDmode, stack_pointer_rtx,
9981 gen_rtx_PLUS (Pmode,
9983 GEN_INT (-param_ptr_offset))));
9984 ix86_cfa_state->reg = stack_pointer_rtx;
9985 ix86_cfa_state->offset = param_ptr_offset;
9987 add_reg_note (insn, REG_CFA_DEF_CFA,
9988 gen_rtx_PLUS (Pmode, ix86_cfa_state->reg,
9989 GEN_INT (ix86_cfa_state->offset)));
9990 RTX_FRAME_RELATED_P (insn) = 1;
9992 if (!call_used_regs[REGNO (crtl->drap_reg)])
9993 ix86_emit_restore_reg_using_pop (crtl->drap_reg, -UNITS_PER_WORD);
9996 /* Remove the saved static chain from the stack. The use of ECX is
9997 merely as a scratch register, not as the actual static chain. */
9998 if (ix86_static_chain_on_stack)
10002 gcc_assert (ix86_cfa_state->reg == stack_pointer_rtx);
10003 ix86_cfa_state->offset += UNITS_PER_WORD;
10005 r = gen_rtx_REG (Pmode, CX_REG);
10006 insn = emit_insn (ix86_gen_pop1 (r));
10008 r = plus_constant (stack_pointer_rtx, UNITS_PER_WORD);
10009 r = gen_rtx_SET (VOIDmode, stack_pointer_rtx, r);
10010 add_reg_note (insn, REG_CFA_ADJUST_CFA, r);
10011 RTX_FRAME_RELATED_P (insn) = 1;
10014 /* Sibcall epilogues don't want a return instruction. */
10017 *ix86_cfa_state = cfa_state_save;
10021 if (crtl->args.pops_args && crtl->args.size)
10023 rtx popc = GEN_INT (crtl->args.pops_args);
10025 /* i386 can only pop 64K bytes. If asked to pop more, pop return
10026 address, do explicit add, and jump indirectly to the caller. */
10028 if (crtl->args.pops_args >= 65536)
10030 rtx ecx = gen_rtx_REG (SImode, CX_REG);
10033 /* There is no "pascal" calling convention in any 64bit ABI. */
10034 gcc_assert (!TARGET_64BIT);
10036 insn = emit_insn (gen_popsi1 (ecx));
10037 ix86_cfa_state->offset -= UNITS_PER_WORD;
10039 add_reg_note (insn, REG_CFA_ADJUST_CFA,
10040 copy_rtx (XVECEXP (PATTERN (insn), 0, 1)));
10041 add_reg_note (insn, REG_CFA_REGISTER,
10042 gen_rtx_SET (VOIDmode, ecx, pc_rtx));
10043 RTX_FRAME_RELATED_P (insn) = 1;
10045 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
10047 emit_jump_insn (gen_return_indirect_internal (ecx));
10050 emit_jump_insn (gen_return_pop_internal (popc));
10053 emit_jump_insn (gen_return_internal ());
10055 /* Restore the state back to the state from the prologue,
10056 so that it's correct for the next epilogue. */
10057 *ix86_cfa_state = cfa_state_save;
10060 /* Reset from the function's potential modifications. */
10063 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
10064 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
10066 if (pic_offset_table_rtx)
10067 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
10069 /* Mach-O doesn't support labels at the end of objects, so if
10070 it looks like we might want one, insert a NOP. */
10072 rtx insn = get_last_insn ();
10075 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
10076 insn = PREV_INSN (insn);
10080 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
10081 fputs ("\tnop\n", file);
10087 /* Extract the parts of an RTL expression that is a valid memory address
10088 for an instruction. Return 0 if the structure of the address is
10089 grossly off. Return -1 if the address contains ASHIFT, so it is not
10090 strictly valid, but still used for computing length of lea instruction. */
10093 ix86_decompose_address (rtx addr, struct ix86_address *out)
10095 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
10096 rtx base_reg, index_reg;
10097 HOST_WIDE_INT scale = 1;
10098 rtx scale_rtx = NULL_RTX;
10101 enum ix86_address_seg seg = SEG_DEFAULT;
10103 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
10105 else if (GET_CODE (addr) == PLUS)
10107 rtx addends[4], op;
10115 addends[n++] = XEXP (op, 1);
10118 while (GET_CODE (op) == PLUS);
10123 for (i = n; i >= 0; --i)
10126 switch (GET_CODE (op))
10131 index = XEXP (op, 0);
10132 scale_rtx = XEXP (op, 1);
10138 index = XEXP (op, 0);
10139 tmp = XEXP (op, 1);
10140 if (!CONST_INT_P (tmp))
10142 scale = INTVAL (tmp);
10143 if ((unsigned HOST_WIDE_INT) scale > 3)
10145 scale = 1 << scale;
10149 if (XINT (op, 1) == UNSPEC_TP
10150 && TARGET_TLS_DIRECT_SEG_REFS
10151 && seg == SEG_DEFAULT)
10152 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
10181 else if (GET_CODE (addr) == MULT)
10183 index = XEXP (addr, 0); /* index*scale */
10184 scale_rtx = XEXP (addr, 1);
10186 else if (GET_CODE (addr) == ASHIFT)
10188 /* We're called for lea too, which implements ashift on occasion. */
10189 index = XEXP (addr, 0);
10190 tmp = XEXP (addr, 1);
10191 if (!CONST_INT_P (tmp))
10193 scale = INTVAL (tmp);
10194 if ((unsigned HOST_WIDE_INT) scale > 3)
10196 scale = 1 << scale;
10200 disp = addr; /* displacement */
10202 /* Extract the integral value of scale. */
10205 if (!CONST_INT_P (scale_rtx))
10207 scale = INTVAL (scale_rtx);
10210 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
10211 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
10213 /* Avoid useless 0 displacement. */
10214 if (disp == const0_rtx && (base || index))
10217 /* Allow arg pointer and stack pointer as index if there is not scaling. */
10218 if (base_reg && index_reg && scale == 1
10219 && (index_reg == arg_pointer_rtx
10220 || index_reg == frame_pointer_rtx
10221 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
10224 tmp = base, base = index, index = tmp;
10225 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
10228 /* Special case: %ebp cannot be encoded as a base without a displacement.
10232 && (base_reg == hard_frame_pointer_rtx
10233 || base_reg == frame_pointer_rtx
10234 || base_reg == arg_pointer_rtx
10235 || (REG_P (base_reg)
10236 && (REGNO (base_reg) == HARD_FRAME_POINTER_REGNUM
10237 || REGNO (base_reg) == R13_REG))))
10240 /* Special case: on K6, [%esi] makes the instruction vector decoded.
10241 Avoid this by transforming to [%esi+0].
10242 Reload calls address legitimization without cfun defined, so we need
10243 to test cfun for being non-NULL. */
10244 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
10245 && base_reg && !index_reg && !disp
10246 && REG_P (base_reg)
10247 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
10250 /* Special case: encode reg+reg instead of reg*2. */
10251 if (!base && index && scale == 2)
10252 base = index, base_reg = index_reg, scale = 1;
10254 /* Special case: scaling cannot be encoded without base or displacement. */
10255 if (!base && !disp && index && scale != 1)
10259 out->index = index;
10261 out->scale = scale;
10267 /* Return cost of the memory address x.
10268 For i386, it is better to use a complex address than let gcc copy
10269 the address into a reg and make a new pseudo. But not if the address
10270 requires to two regs - that would mean more pseudos with longer
10273 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
10275 struct ix86_address parts;
10277 int ok = ix86_decompose_address (x, &parts);
10281 if (parts.base && GET_CODE (parts.base) == SUBREG)
10282 parts.base = SUBREG_REG (parts.base);
10283 if (parts.index && GET_CODE (parts.index) == SUBREG)
10284 parts.index = SUBREG_REG (parts.index);
10286 /* Attempt to minimize number of registers in the address. */
10288 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
10290 && (!REG_P (parts.index)
10291 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
10295 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
10297 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
10298 && parts.base != parts.index)
10301 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
10302 since it's predecode logic can't detect the length of instructions
10303 and it degenerates to vector decoded. Increase cost of such
10304 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
10305 to split such addresses or even refuse such addresses at all.
10307 Following addressing modes are affected:
10312 The first and last case may be avoidable by explicitly coding the zero in
10313 memory address, but I don't have AMD-K6 machine handy to check this
10317 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
10318 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
10319 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
10325 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
10326 this is used for to form addresses to local data when -fPIC is in
10330 darwin_local_data_pic (rtx disp)
10332 return (GET_CODE (disp) == UNSPEC
10333 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
10336 /* Determine if a given RTX is a valid constant. We already know this
10337 satisfies CONSTANT_P. */
10340 legitimate_constant_p (rtx x)
10342 switch (GET_CODE (x))
10347 if (GET_CODE (x) == PLUS)
10349 if (!CONST_INT_P (XEXP (x, 1)))
10354 if (TARGET_MACHO && darwin_local_data_pic (x))
10357 /* Only some unspecs are valid as "constants". */
10358 if (GET_CODE (x) == UNSPEC)
10359 switch (XINT (x, 1))
10362 case UNSPEC_GOTOFF:
10363 case UNSPEC_PLTOFF:
10364 return TARGET_64BIT;
10366 case UNSPEC_NTPOFF:
10367 x = XVECEXP (x, 0, 0);
10368 return (GET_CODE (x) == SYMBOL_REF
10369 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10370 case UNSPEC_DTPOFF:
10371 x = XVECEXP (x, 0, 0);
10372 return (GET_CODE (x) == SYMBOL_REF
10373 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
10378 /* We must have drilled down to a symbol. */
10379 if (GET_CODE (x) == LABEL_REF)
10381 if (GET_CODE (x) != SYMBOL_REF)
10386 /* TLS symbols are never valid. */
10387 if (SYMBOL_REF_TLS_MODEL (x))
10390 /* DLLIMPORT symbols are never valid. */
10391 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
10392 && SYMBOL_REF_DLLIMPORT_P (x))
10397 if (GET_MODE (x) == TImode
10398 && x != CONST0_RTX (TImode)
10404 if (!standard_sse_constant_p (x))
10411 /* Otherwise we handle everything else in the move patterns. */
10415 /* Determine if it's legal to put X into the constant pool. This
10416 is not possible for the address of thread-local symbols, which
10417 is checked above. */
10420 ix86_cannot_force_const_mem (rtx x)
10422 /* We can always put integral constants and vectors in memory. */
10423 switch (GET_CODE (x))
10433 return !legitimate_constant_p (x);
10437 /* Nonzero if the constant value X is a legitimate general operand
10438 when generating PIC code. It is given that flag_pic is on and
10439 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
10442 legitimate_pic_operand_p (rtx x)
10446 switch (GET_CODE (x))
10449 inner = XEXP (x, 0);
10450 if (GET_CODE (inner) == PLUS
10451 && CONST_INT_P (XEXP (inner, 1)))
10452 inner = XEXP (inner, 0);
10454 /* Only some unspecs are valid as "constants". */
10455 if (GET_CODE (inner) == UNSPEC)
10456 switch (XINT (inner, 1))
10459 case UNSPEC_GOTOFF:
10460 case UNSPEC_PLTOFF:
10461 return TARGET_64BIT;
10463 x = XVECEXP (inner, 0, 0);
10464 return (GET_CODE (x) == SYMBOL_REF
10465 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
10466 case UNSPEC_MACHOPIC_OFFSET:
10467 return legitimate_pic_address_disp_p (x);
10475 return legitimate_pic_address_disp_p (x);
10482 /* Determine if a given CONST RTX is a valid memory displacement
10486 legitimate_pic_address_disp_p (rtx disp)
10490 /* In 64bit mode we can allow direct addresses of symbols and labels
10491 when they are not dynamic symbols. */
10494 rtx op0 = disp, op1;
10496 switch (GET_CODE (disp))
10502 if (GET_CODE (XEXP (disp, 0)) != PLUS)
10504 op0 = XEXP (XEXP (disp, 0), 0);
10505 op1 = XEXP (XEXP (disp, 0), 1);
10506 if (!CONST_INT_P (op1)
10507 || INTVAL (op1) >= 16*1024*1024
10508 || INTVAL (op1) < -16*1024*1024)
10510 if (GET_CODE (op0) == LABEL_REF)
10512 if (GET_CODE (op0) != SYMBOL_REF)
10517 /* TLS references should always be enclosed in UNSPEC. */
10518 if (SYMBOL_REF_TLS_MODEL (op0))
10520 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
10521 && ix86_cmodel != CM_LARGE_PIC)
10529 if (GET_CODE (disp) != CONST)
10531 disp = XEXP (disp, 0);
10535 /* We are unsafe to allow PLUS expressions. This limit allowed distance
10536 of GOT tables. We should not need these anyway. */
10537 if (GET_CODE (disp) != UNSPEC
10538 || (XINT (disp, 1) != UNSPEC_GOTPCREL
10539 && XINT (disp, 1) != UNSPEC_GOTOFF
10540 && XINT (disp, 1) != UNSPEC_PLTOFF))
10543 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
10544 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
10550 if (GET_CODE (disp) == PLUS)
10552 if (!CONST_INT_P (XEXP (disp, 1)))
10554 disp = XEXP (disp, 0);
10558 if (TARGET_MACHO && darwin_local_data_pic (disp))
10561 if (GET_CODE (disp) != UNSPEC)
10564 switch (XINT (disp, 1))
10569 /* We need to check for both symbols and labels because VxWorks loads
10570 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
10572 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10573 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
10574 case UNSPEC_GOTOFF:
10575 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
10576 While ABI specify also 32bit relocation but we don't produce it in
10577 small PIC model at all. */
10578 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
10579 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
10581 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
10583 case UNSPEC_GOTTPOFF:
10584 case UNSPEC_GOTNTPOFF:
10585 case UNSPEC_INDNTPOFF:
10588 disp = XVECEXP (disp, 0, 0);
10589 return (GET_CODE (disp) == SYMBOL_REF
10590 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
10591 case UNSPEC_NTPOFF:
10592 disp = XVECEXP (disp, 0, 0);
10593 return (GET_CODE (disp) == SYMBOL_REF
10594 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
10595 case UNSPEC_DTPOFF:
10596 disp = XVECEXP (disp, 0, 0);
10597 return (GET_CODE (disp) == SYMBOL_REF
10598 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
10604 /* Recognizes RTL expressions that are valid memory addresses for an
10605 instruction. The MODE argument is the machine mode for the MEM
10606 expression that wants to use this address.
10608 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
10609 convert common non-canonical forms to canonical form so that they will
10613 ix86_legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
10614 rtx addr, bool strict)
10616 struct ix86_address parts;
10617 rtx base, index, disp;
10618 HOST_WIDE_INT scale;
10620 if (ix86_decompose_address (addr, &parts) <= 0)
10621 /* Decomposition failed. */
10625 index = parts.index;
10627 scale = parts.scale;
10629 /* Validate base register.
10631 Don't allow SUBREG's that span more than a word here. It can lead to spill
10632 failures when the base is one word out of a two word structure, which is
10633 represented internally as a DImode int. */
10641 else if (GET_CODE (base) == SUBREG
10642 && REG_P (SUBREG_REG (base))
10643 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
10645 reg = SUBREG_REG (base);
10647 /* Base is not a register. */
10650 if (GET_MODE (base) != Pmode)
10651 /* Base is not in Pmode. */
10654 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
10655 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
10656 /* Base is not valid. */
10660 /* Validate index register.
10662 Don't allow SUBREG's that span more than a word here -- same as above. */
10670 else if (GET_CODE (index) == SUBREG
10671 && REG_P (SUBREG_REG (index))
10672 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
10674 reg = SUBREG_REG (index);
10676 /* Index is not a register. */
10679 if (GET_MODE (index) != Pmode)
10680 /* Index is not in Pmode. */
10683 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
10684 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
10685 /* Index is not valid. */
10689 /* Validate scale factor. */
10693 /* Scale without index. */
10696 if (scale != 2 && scale != 4 && scale != 8)
10697 /* Scale is not a valid multiplier. */
10701 /* Validate displacement. */
10704 if (GET_CODE (disp) == CONST
10705 && GET_CODE (XEXP (disp, 0)) == UNSPEC
10706 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
10707 switch (XINT (XEXP (disp, 0), 1))
10709 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
10710 used. While ABI specify also 32bit relocations, we don't produce
10711 them at all and use IP relative instead. */
10713 case UNSPEC_GOTOFF:
10714 gcc_assert (flag_pic);
10716 goto is_legitimate_pic;
10718 /* 64bit address unspec. */
10721 case UNSPEC_GOTPCREL:
10722 gcc_assert (flag_pic);
10723 goto is_legitimate_pic;
10725 case UNSPEC_GOTTPOFF:
10726 case UNSPEC_GOTNTPOFF:
10727 case UNSPEC_INDNTPOFF:
10728 case UNSPEC_NTPOFF:
10729 case UNSPEC_DTPOFF:
10733 /* Invalid address unspec. */
10737 else if (SYMBOLIC_CONST (disp)
10741 && MACHOPIC_INDIRECT
10742 && !machopic_operand_p (disp)
10748 if (TARGET_64BIT && (index || base))
10750 /* foo@dtpoff(%rX) is ok. */
10751 if (GET_CODE (disp) != CONST
10752 || GET_CODE (XEXP (disp, 0)) != PLUS
10753 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
10754 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
10755 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
10756 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
10757 /* Non-constant pic memory reference. */
10760 else if (! legitimate_pic_address_disp_p (disp))
10761 /* Displacement is an invalid pic construct. */
10764 /* This code used to verify that a symbolic pic displacement
10765 includes the pic_offset_table_rtx register.
10767 While this is good idea, unfortunately these constructs may
10768 be created by "adds using lea" optimization for incorrect
10777 This code is nonsensical, but results in addressing
10778 GOT table with pic_offset_table_rtx base. We can't
10779 just refuse it easily, since it gets matched by
10780 "addsi3" pattern, that later gets split to lea in the
10781 case output register differs from input. While this
10782 can be handled by separate addsi pattern for this case
10783 that never results in lea, this seems to be easier and
10784 correct fix for crash to disable this test. */
10786 else if (GET_CODE (disp) != LABEL_REF
10787 && !CONST_INT_P (disp)
10788 && (GET_CODE (disp) != CONST
10789 || !legitimate_constant_p (disp))
10790 && (GET_CODE (disp) != SYMBOL_REF
10791 || !legitimate_constant_p (disp)))
10792 /* Displacement is not constant. */
10794 else if (TARGET_64BIT
10795 && !x86_64_immediate_operand (disp, VOIDmode))
10796 /* Displacement is out of range. */
10800 /* Everything looks valid. */
10804 /* Determine if a given RTX is a valid constant address. */
10807 constant_address_p (rtx x)
10809 return CONSTANT_P (x) && ix86_legitimate_address_p (Pmode, x, 1);
10812 /* Return a unique alias set for the GOT. */
10814 static alias_set_type
10815 ix86_GOT_alias_set (void)
10817 static alias_set_type set = -1;
10819 set = new_alias_set ();
10823 /* Return a legitimate reference for ORIG (an address) using the
10824 register REG. If REG is 0, a new pseudo is generated.
10826 There are two types of references that must be handled:
10828 1. Global data references must load the address from the GOT, via
10829 the PIC reg. An insn is emitted to do this load, and the reg is
10832 2. Static data references, constant pool addresses, and code labels
10833 compute the address as an offset from the GOT, whose base is in
10834 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
10835 differentiate them from global data objects. The returned
10836 address is the PIC reg + an unspec constant.
10838 TARGET_LEGITIMATE_ADDRESS_P rejects symbolic references unless the PIC
10839 reg also appears in the address. */
10842 legitimize_pic_address (rtx orig, rtx reg)
10845 rtx new_rtx = orig;
10849 if (TARGET_MACHO && !TARGET_64BIT)
10852 reg = gen_reg_rtx (Pmode);
10853 /* Use the generic Mach-O PIC machinery. */
10854 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
10858 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
10860 else if (TARGET_64BIT
10861 && ix86_cmodel != CM_SMALL_PIC
10862 && gotoff_operand (addr, Pmode))
10865 /* This symbol may be referenced via a displacement from the PIC
10866 base address (@GOTOFF). */
10868 if (reload_in_progress)
10869 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10870 if (GET_CODE (addr) == CONST)
10871 addr = XEXP (addr, 0);
10872 if (GET_CODE (addr) == PLUS)
10874 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10876 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10879 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10880 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10882 tmpreg = gen_reg_rtx (Pmode);
10885 emit_move_insn (tmpreg, new_rtx);
10889 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
10890 tmpreg, 1, OPTAB_DIRECT);
10893 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
10895 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
10897 /* This symbol may be referenced via a displacement from the PIC
10898 base address (@GOTOFF). */
10900 if (reload_in_progress)
10901 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10902 if (GET_CODE (addr) == CONST)
10903 addr = XEXP (addr, 0);
10904 if (GET_CODE (addr) == PLUS)
10906 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
10908 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
10911 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
10912 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10913 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10917 emit_move_insn (reg, new_rtx);
10921 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
10922 /* We can't use @GOTOFF for text labels on VxWorks;
10923 see gotoff_operand. */
10924 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
10926 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10928 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
10929 return legitimize_dllimport_symbol (addr, true);
10930 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
10931 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
10932 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
10934 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
10935 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
10939 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
10941 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
10942 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10943 new_rtx = gen_const_mem (Pmode, new_rtx);
10944 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10947 reg = gen_reg_rtx (Pmode);
10948 /* Use directly gen_movsi, otherwise the address is loaded
10949 into register for CSE. We don't want to CSE this addresses,
10950 instead we CSE addresses from the GOT table, so skip this. */
10951 emit_insn (gen_movsi (reg, new_rtx));
10956 /* This symbol must be referenced via a load from the
10957 Global Offset Table (@GOT). */
10959 if (reload_in_progress)
10960 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
10961 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
10962 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
10964 new_rtx = force_reg (Pmode, new_rtx);
10965 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
10966 new_rtx = gen_const_mem (Pmode, new_rtx);
10967 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
10970 reg = gen_reg_rtx (Pmode);
10971 emit_move_insn (reg, new_rtx);
10977 if (CONST_INT_P (addr)
10978 && !x86_64_immediate_operand (addr, VOIDmode))
10982 emit_move_insn (reg, addr);
10986 new_rtx = force_reg (Pmode, addr);
10988 else if (GET_CODE (addr) == CONST)
10990 addr = XEXP (addr, 0);
10992 /* We must match stuff we generate before. Assume the only
10993 unspecs that can get here are ours. Not that we could do
10994 anything with them anyway.... */
10995 if (GET_CODE (addr) == UNSPEC
10996 || (GET_CODE (addr) == PLUS
10997 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
10999 gcc_assert (GET_CODE (addr) == PLUS);
11001 if (GET_CODE (addr) == PLUS)
11003 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
11005 /* Check first to see if this is a constant offset from a @GOTOFF
11006 symbol reference. */
11007 if (gotoff_operand (op0, Pmode)
11008 && CONST_INT_P (op1))
11012 if (reload_in_progress)
11013 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11014 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
11016 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
11017 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
11018 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
11022 emit_move_insn (reg, new_rtx);
11028 if (INTVAL (op1) < -16*1024*1024
11029 || INTVAL (op1) >= 16*1024*1024)
11031 if (!x86_64_immediate_operand (op1, Pmode))
11032 op1 = force_reg (Pmode, op1);
11033 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
11039 base = legitimize_pic_address (XEXP (addr, 0), reg);
11040 new_rtx = legitimize_pic_address (XEXP (addr, 1),
11041 base == reg ? NULL_RTX : reg);
11043 if (CONST_INT_P (new_rtx))
11044 new_rtx = plus_constant (base, INTVAL (new_rtx));
11047 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
11049 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
11050 new_rtx = XEXP (new_rtx, 1);
11052 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
11060 /* Load the thread pointer. If TO_REG is true, force it into a register. */
11063 get_thread_pointer (int to_reg)
11067 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
11071 reg = gen_reg_rtx (Pmode);
11072 insn = gen_rtx_SET (VOIDmode, reg, tp);
11073 insn = emit_insn (insn);
11078 /* A subroutine of ix86_legitimize_address and ix86_expand_move. FOR_MOV is
11079 false if we expect this to be used for a memory address and true if
11080 we expect to load the address into a register. */
11083 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
11085 rtx dest, base, off, pic, tp;
11090 case TLS_MODEL_GLOBAL_DYNAMIC:
11091 dest = gen_reg_rtx (Pmode);
11092 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11094 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11096 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
11099 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
11100 insns = get_insns ();
11103 RTL_CONST_CALL_P (insns) = 1;
11104 emit_libcall_block (insns, dest, rax, x);
11106 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11107 emit_insn (gen_tls_global_dynamic_64 (dest, x));
11109 emit_insn (gen_tls_global_dynamic_32 (dest, x));
11111 if (TARGET_GNU2_TLS)
11113 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
11115 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11119 case TLS_MODEL_LOCAL_DYNAMIC:
11120 base = gen_reg_rtx (Pmode);
11121 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
11123 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
11125 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
11128 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
11129 insns = get_insns ();
11132 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
11133 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
11134 RTL_CONST_CALL_P (insns) = 1;
11135 emit_libcall_block (insns, base, rax, note);
11137 else if (TARGET_64BIT && TARGET_GNU2_TLS)
11138 emit_insn (gen_tls_local_dynamic_base_64 (base));
11140 emit_insn (gen_tls_local_dynamic_base_32 (base));
11142 if (TARGET_GNU2_TLS)
11144 rtx x = ix86_tls_module_base ();
11146 set_unique_reg_note (get_last_insn (), REG_EQUIV,
11147 gen_rtx_MINUS (Pmode, x, tp));
11150 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
11151 off = gen_rtx_CONST (Pmode, off);
11153 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
11155 if (TARGET_GNU2_TLS)
11157 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
11159 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
11164 case TLS_MODEL_INITIAL_EXEC:
11168 type = UNSPEC_GOTNTPOFF;
11172 if (reload_in_progress)
11173 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
11174 pic = pic_offset_table_rtx;
11175 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
11177 else if (!TARGET_ANY_GNU_TLS)
11179 pic = gen_reg_rtx (Pmode);
11180 emit_insn (gen_set_got (pic));
11181 type = UNSPEC_GOTTPOFF;
11186 type = UNSPEC_INDNTPOFF;
11189 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
11190 off = gen_rtx_CONST (Pmode, off);
11192 off = gen_rtx_PLUS (Pmode, pic, off);
11193 off = gen_const_mem (Pmode, off);
11194 set_mem_alias_set (off, ix86_GOT_alias_set ());
11196 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11198 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11199 off = force_reg (Pmode, off);
11200 return gen_rtx_PLUS (Pmode, base, off);
11204 base = get_thread_pointer (true);
11205 dest = gen_reg_rtx (Pmode);
11206 emit_insn (gen_subsi3 (dest, base, off));
11210 case TLS_MODEL_LOCAL_EXEC:
11211 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
11212 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11213 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
11214 off = gen_rtx_CONST (Pmode, off);
11216 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
11218 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
11219 return gen_rtx_PLUS (Pmode, base, off);
11223 base = get_thread_pointer (true);
11224 dest = gen_reg_rtx (Pmode);
11225 emit_insn (gen_subsi3 (dest, base, off));
11230 gcc_unreachable ();
11236 /* Create or return the unique __imp_DECL dllimport symbol corresponding
11239 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
11240 htab_t dllimport_map;
11243 get_dllimport_decl (tree decl)
11245 struct tree_map *h, in;
11248 const char *prefix;
11249 size_t namelen, prefixlen;
11254 if (!dllimport_map)
11255 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
11257 in.hash = htab_hash_pointer (decl);
11258 in.base.from = decl;
11259 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
11260 h = (struct tree_map *) *loc;
11264 *loc = h = ggc_alloc_tree_map ();
11266 h->base.from = decl;
11267 h->to = to = build_decl (DECL_SOURCE_LOCATION (decl),
11268 VAR_DECL, NULL, ptr_type_node);
11269 DECL_ARTIFICIAL (to) = 1;
11270 DECL_IGNORED_P (to) = 1;
11271 DECL_EXTERNAL (to) = 1;
11272 TREE_READONLY (to) = 1;
11274 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
11275 name = targetm.strip_name_encoding (name);
11276 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
11277 ? "*__imp_" : "*__imp__";
11278 namelen = strlen (name);
11279 prefixlen = strlen (prefix);
11280 imp_name = (char *) alloca (namelen + prefixlen + 1);
11281 memcpy (imp_name, prefix, prefixlen);
11282 memcpy (imp_name + prefixlen, name, namelen + 1);
11284 name = ggc_alloc_string (imp_name, namelen + prefixlen);
11285 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
11286 SET_SYMBOL_REF_DECL (rtl, to);
11287 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
11289 rtl = gen_const_mem (Pmode, rtl);
11290 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
11292 SET_DECL_RTL (to, rtl);
11293 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
11298 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
11299 true if we require the result be a register. */
11302 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
11307 gcc_assert (SYMBOL_REF_DECL (symbol));
11308 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
11310 x = DECL_RTL (imp_decl);
11312 x = force_reg (Pmode, x);
11316 /* Try machine-dependent ways of modifying an illegitimate address
11317 to be legitimate. If we find one, return the new, valid address.
11318 This macro is used in only one place: `memory_address' in explow.c.
11320 OLDX is the address as it was before break_out_memory_refs was called.
11321 In some cases it is useful to look at this to decide what needs to be done.
11323 It is always safe for this macro to do nothing. It exists to recognize
11324 opportunities to optimize the output.
11326 For the 80386, we handle X+REG by loading X into a register R and
11327 using R+REG. R will go in a general reg and indexing will be used.
11328 However, if REG is a broken-out memory address or multiplication,
11329 nothing needs to be done because REG can certainly go in a general reg.
11331 When -fpic is used, special handling is needed for symbolic references.
11332 See comments by legitimize_pic_address in i386.c for details. */
11335 ix86_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
11336 enum machine_mode mode)
11341 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
11343 return legitimize_tls_address (x, (enum tls_model) log, false);
11344 if (GET_CODE (x) == CONST
11345 && GET_CODE (XEXP (x, 0)) == PLUS
11346 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11347 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
11349 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
11350 (enum tls_model) log, false);
11351 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11354 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
11356 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
11357 return legitimize_dllimport_symbol (x, true);
11358 if (GET_CODE (x) == CONST
11359 && GET_CODE (XEXP (x, 0)) == PLUS
11360 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
11361 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
11363 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
11364 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
11368 if (flag_pic && SYMBOLIC_CONST (x))
11369 return legitimize_pic_address (x, 0);
11371 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
11372 if (GET_CODE (x) == ASHIFT
11373 && CONST_INT_P (XEXP (x, 1))
11374 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
11377 log = INTVAL (XEXP (x, 1));
11378 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
11379 GEN_INT (1 << log));
11382 if (GET_CODE (x) == PLUS)
11384 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
11386 if (GET_CODE (XEXP (x, 0)) == ASHIFT
11387 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
11388 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
11391 log = INTVAL (XEXP (XEXP (x, 0), 1));
11392 XEXP (x, 0) = gen_rtx_MULT (Pmode,
11393 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
11394 GEN_INT (1 << log));
11397 if (GET_CODE (XEXP (x, 1)) == ASHIFT
11398 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
11399 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
11402 log = INTVAL (XEXP (XEXP (x, 1), 1));
11403 XEXP (x, 1) = gen_rtx_MULT (Pmode,
11404 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
11405 GEN_INT (1 << log));
11408 /* Put multiply first if it isn't already. */
11409 if (GET_CODE (XEXP (x, 1)) == MULT)
11411 rtx tmp = XEXP (x, 0);
11412 XEXP (x, 0) = XEXP (x, 1);
11417 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
11418 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
11419 created by virtual register instantiation, register elimination, and
11420 similar optimizations. */
11421 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
11424 x = gen_rtx_PLUS (Pmode,
11425 gen_rtx_PLUS (Pmode, XEXP (x, 0),
11426 XEXP (XEXP (x, 1), 0)),
11427 XEXP (XEXP (x, 1), 1));
11431 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
11432 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
11433 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
11434 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11435 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
11436 && CONSTANT_P (XEXP (x, 1)))
11439 rtx other = NULL_RTX;
11441 if (CONST_INT_P (XEXP (x, 1)))
11443 constant = XEXP (x, 1);
11444 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
11446 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
11448 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
11449 other = XEXP (x, 1);
11457 x = gen_rtx_PLUS (Pmode,
11458 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
11459 XEXP (XEXP (XEXP (x, 0), 1), 0)),
11460 plus_constant (other, INTVAL (constant)));
11464 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11467 if (GET_CODE (XEXP (x, 0)) == MULT)
11470 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
11473 if (GET_CODE (XEXP (x, 1)) == MULT)
11476 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
11480 && REG_P (XEXP (x, 1))
11481 && REG_P (XEXP (x, 0)))
11484 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
11487 x = legitimize_pic_address (x, 0);
11490 if (changed && ix86_legitimate_address_p (mode, x, FALSE))
11493 if (REG_P (XEXP (x, 0)))
11495 rtx temp = gen_reg_rtx (Pmode);
11496 rtx val = force_operand (XEXP (x, 1), temp);
11498 emit_move_insn (temp, val);
11500 XEXP (x, 1) = temp;
11504 else if (REG_P (XEXP (x, 1)))
11506 rtx temp = gen_reg_rtx (Pmode);
11507 rtx val = force_operand (XEXP (x, 0), temp);
11509 emit_move_insn (temp, val);
11511 XEXP (x, 0) = temp;
11519 /* Print an integer constant expression in assembler syntax. Addition
11520 and subtraction are the only arithmetic that may appear in these
11521 expressions. FILE is the stdio stream to write to, X is the rtx, and
11522 CODE is the operand print code from the output string. */
11525 output_pic_addr_const (FILE *file, rtx x, int code)
11529 switch (GET_CODE (x))
11532 gcc_assert (flag_pic);
11537 if (TARGET_64BIT || ! TARGET_MACHO_BRANCH_ISLANDS)
11538 output_addr_const (file, x);
11541 const char *name = XSTR (x, 0);
11543 /* Mark the decl as referenced so that cgraph will
11544 output the function. */
11545 if (SYMBOL_REF_DECL (x))
11546 mark_decl_referenced (SYMBOL_REF_DECL (x));
11549 if (MACHOPIC_INDIRECT
11550 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
11551 name = machopic_indirection_name (x, /*stub_p=*/true);
11553 assemble_name (file, name);
11555 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
11556 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
11557 fputs ("@PLT", file);
11564 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
11565 assemble_name (asm_out_file, buf);
11569 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11573 /* This used to output parentheses around the expression,
11574 but that does not work on the 386 (either ATT or BSD assembler). */
11575 output_pic_addr_const (file, XEXP (x, 0), code);
11579 if (GET_MODE (x) == VOIDmode)
11581 /* We can use %d if the number is <32 bits and positive. */
11582 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
11583 fprintf (file, "0x%lx%08lx",
11584 (unsigned long) CONST_DOUBLE_HIGH (x),
11585 (unsigned long) CONST_DOUBLE_LOW (x));
11587 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
11590 /* We can't handle floating point constants;
11591 TARGET_PRINT_OPERAND must handle them. */
11592 output_operand_lossage ("floating constant misused");
11596 /* Some assemblers need integer constants to appear first. */
11597 if (CONST_INT_P (XEXP (x, 0)))
11599 output_pic_addr_const (file, XEXP (x, 0), code);
11601 output_pic_addr_const (file, XEXP (x, 1), code);
11605 gcc_assert (CONST_INT_P (XEXP (x, 1)));
11606 output_pic_addr_const (file, XEXP (x, 1), code);
11608 output_pic_addr_const (file, XEXP (x, 0), code);
11614 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
11615 output_pic_addr_const (file, XEXP (x, 0), code);
11617 output_pic_addr_const (file, XEXP (x, 1), code);
11619 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
11623 gcc_assert (XVECLEN (x, 0) == 1);
11624 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
11625 switch (XINT (x, 1))
11628 fputs ("@GOT", file);
11630 case UNSPEC_GOTOFF:
11631 fputs ("@GOTOFF", file);
11633 case UNSPEC_PLTOFF:
11634 fputs ("@PLTOFF", file);
11636 case UNSPEC_GOTPCREL:
11637 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11638 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
11640 case UNSPEC_GOTTPOFF:
11641 /* FIXME: This might be @TPOFF in Sun ld too. */
11642 fputs ("@gottpoff", file);
11645 fputs ("@tpoff", file);
11647 case UNSPEC_NTPOFF:
11649 fputs ("@tpoff", file);
11651 fputs ("@ntpoff", file);
11653 case UNSPEC_DTPOFF:
11654 fputs ("@dtpoff", file);
11656 case UNSPEC_GOTNTPOFF:
11658 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11659 "@gottpoff(%rip)": "@gottpoff[rip]", file);
11661 fputs ("@gotntpoff", file);
11663 case UNSPEC_INDNTPOFF:
11664 fputs ("@indntpoff", file);
11667 case UNSPEC_MACHOPIC_OFFSET:
11669 machopic_output_function_base_name (file);
11673 output_operand_lossage ("invalid UNSPEC as operand");
11679 output_operand_lossage ("invalid expression as operand");
11683 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
11684 We need to emit DTP-relative relocations. */
11686 static void ATTRIBUTE_UNUSED
11687 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
11689 fputs (ASM_LONG, file);
11690 output_addr_const (file, x);
11691 fputs ("@dtpoff", file);
11697 fputs (", 0", file);
11700 gcc_unreachable ();
11704 /* Return true if X is a representation of the PIC register. This copes
11705 with calls from ix86_find_base_term, where the register might have
11706 been replaced by a cselib value. */
11709 ix86_pic_register_p (rtx x)
11711 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
11712 return (pic_offset_table_rtx
11713 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
11715 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
11718 /* In the name of slightly smaller debug output, and to cater to
11719 general assembler lossage, recognize PIC+GOTOFF and turn it back
11720 into a direct symbol reference.
11722 On Darwin, this is necessary to avoid a crash, because Darwin
11723 has a different PIC label for each routine but the DWARF debugging
11724 information is not associated with any particular routine, so it's
11725 necessary to remove references to the PIC label from RTL stored by
11726 the DWARF output code. */
11729 ix86_delegitimize_address (rtx x)
11731 rtx orig_x = delegitimize_mem_from_attrs (x);
11732 /* addend is NULL or some rtx if x is something+GOTOFF where
11733 something doesn't include the PIC register. */
11734 rtx addend = NULL_RTX;
11735 /* reg_addend is NULL or a multiple of some register. */
11736 rtx reg_addend = NULL_RTX;
11737 /* const_addend is NULL or a const_int. */
11738 rtx const_addend = NULL_RTX;
11739 /* This is the result, or NULL. */
11740 rtx result = NULL_RTX;
11749 if (GET_CODE (x) != CONST
11750 || GET_CODE (XEXP (x, 0)) != UNSPEC
11751 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
11752 || !MEM_P (orig_x))
11754 x = XVECEXP (XEXP (x, 0), 0, 0);
11755 if (GET_MODE (orig_x) != Pmode)
11756 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0);
11760 if (GET_CODE (x) != PLUS
11761 || GET_CODE (XEXP (x, 1)) != CONST)
11764 if (ix86_pic_register_p (XEXP (x, 0)))
11765 /* %ebx + GOT/GOTOFF */
11767 else if (GET_CODE (XEXP (x, 0)) == PLUS)
11769 /* %ebx + %reg * scale + GOT/GOTOFF */
11770 reg_addend = XEXP (x, 0);
11771 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
11772 reg_addend = XEXP (reg_addend, 1);
11773 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
11774 reg_addend = XEXP (reg_addend, 0);
11777 reg_addend = NULL_RTX;
11778 addend = XEXP (x, 0);
11782 addend = XEXP (x, 0);
11784 x = XEXP (XEXP (x, 1), 0);
11785 if (GET_CODE (x) == PLUS
11786 && CONST_INT_P (XEXP (x, 1)))
11788 const_addend = XEXP (x, 1);
11792 if (GET_CODE (x) == UNSPEC
11793 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend)
11794 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
11795 result = XVECEXP (x, 0, 0);
11797 if (TARGET_MACHO && darwin_local_data_pic (x)
11798 && !MEM_P (orig_x))
11799 result = XVECEXP (x, 0, 0);
11805 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
11807 result = gen_rtx_PLUS (Pmode, reg_addend, result);
11810 /* If the rest of original X doesn't involve the PIC register, add
11811 addend and subtract pic_offset_table_rtx. This can happen e.g.
11813 leal (%ebx, %ecx, 4), %ecx
11815 movl foo@GOTOFF(%ecx), %edx
11816 in which case we return (%ecx - %ebx) + foo. */
11817 if (pic_offset_table_rtx)
11818 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend),
11819 pic_offset_table_rtx),
11824 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x))
11825 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0);
11829 /* If X is a machine specific address (i.e. a symbol or label being
11830 referenced as a displacement from the GOT implemented using an
11831 UNSPEC), then return the base term. Otherwise return X. */
11834 ix86_find_base_term (rtx x)
11840 if (GET_CODE (x) != CONST)
11842 term = XEXP (x, 0);
11843 if (GET_CODE (term) == PLUS
11844 && (CONST_INT_P (XEXP (term, 1))
11845 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
11846 term = XEXP (term, 0);
11847 if (GET_CODE (term) != UNSPEC
11848 || XINT (term, 1) != UNSPEC_GOTPCREL)
11851 return XVECEXP (term, 0, 0);
11854 return ix86_delegitimize_address (x);
11858 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
11859 int fp, FILE *file)
11861 const char *suffix;
11863 if (mode == CCFPmode || mode == CCFPUmode)
11865 code = ix86_fp_compare_code_to_integer (code);
11869 code = reverse_condition (code);
11920 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
11924 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
11925 Those same assemblers have the same but opposite lossage on cmov. */
11926 if (mode == CCmode)
11927 suffix = fp ? "nbe" : "a";
11928 else if (mode == CCCmode)
11931 gcc_unreachable ();
11947 gcc_unreachable ();
11951 gcc_assert (mode == CCmode || mode == CCCmode);
11968 gcc_unreachable ();
11972 /* ??? As above. */
11973 gcc_assert (mode == CCmode || mode == CCCmode);
11974 suffix = fp ? "nb" : "ae";
11977 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
11981 /* ??? As above. */
11982 if (mode == CCmode)
11984 else if (mode == CCCmode)
11985 suffix = fp ? "nb" : "ae";
11987 gcc_unreachable ();
11990 suffix = fp ? "u" : "p";
11993 suffix = fp ? "nu" : "np";
11996 gcc_unreachable ();
11998 fputs (suffix, file);
12001 /* Print the name of register X to FILE based on its machine mode and number.
12002 If CODE is 'w', pretend the mode is HImode.
12003 If CODE is 'b', pretend the mode is QImode.
12004 If CODE is 'k', pretend the mode is SImode.
12005 If CODE is 'q', pretend the mode is DImode.
12006 If CODE is 'x', pretend the mode is V4SFmode.
12007 If CODE is 't', pretend the mode is V8SFmode.
12008 If CODE is 'h', pretend the reg is the 'high' byte register.
12009 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
12010 If CODE is 'd', duplicate the operand for AVX instruction.
12014 print_reg (rtx x, int code, FILE *file)
12017 bool duplicated = code == 'd' && TARGET_AVX;
12019 gcc_assert (x == pc_rtx
12020 || (REGNO (x) != ARG_POINTER_REGNUM
12021 && REGNO (x) != FRAME_POINTER_REGNUM
12022 && REGNO (x) != FLAGS_REG
12023 && REGNO (x) != FPSR_REG
12024 && REGNO (x) != FPCR_REG));
12026 if (ASSEMBLER_DIALECT == ASM_ATT)
12031 gcc_assert (TARGET_64BIT);
12032 fputs ("rip", file);
12036 if (code == 'w' || MMX_REG_P (x))
12038 else if (code == 'b')
12040 else if (code == 'k')
12042 else if (code == 'q')
12044 else if (code == 'y')
12046 else if (code == 'h')
12048 else if (code == 'x')
12050 else if (code == 't')
12053 code = GET_MODE_SIZE (GET_MODE (x));
12055 /* Irritatingly, AMD extended registers use different naming convention
12056 from the normal registers. */
12057 if (REX_INT_REG_P (x))
12059 gcc_assert (TARGET_64BIT);
12063 error ("extended registers have no high halves");
12066 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
12069 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
12072 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
12075 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
12078 error ("unsupported operand size for extended register");
12088 if (STACK_TOP_P (x))
12097 if (! ANY_FP_REG_P (x))
12098 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
12103 reg = hi_reg_name[REGNO (x)];
12106 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
12108 reg = qi_reg_name[REGNO (x)];
12111 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
12113 reg = qi_high_reg_name[REGNO (x)];
12118 gcc_assert (!duplicated);
12120 fputs (hi_reg_name[REGNO (x)] + 1, file);
12125 gcc_unreachable ();
12131 if (ASSEMBLER_DIALECT == ASM_ATT)
12132 fprintf (file, ", %%%s", reg);
12134 fprintf (file, ", %s", reg);
12138 /* Locate some local-dynamic symbol still in use by this function
12139 so that we can print its name in some tls_local_dynamic_base
12143 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
12147 if (GET_CODE (x) == SYMBOL_REF
12148 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
12150 cfun->machine->some_ld_name = XSTR (x, 0);
12157 static const char *
12158 get_some_local_dynamic_name (void)
12162 if (cfun->machine->some_ld_name)
12163 return cfun->machine->some_ld_name;
12165 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
12166 if (NONDEBUG_INSN_P (insn)
12167 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
12168 return cfun->machine->some_ld_name;
12173 /* Meaning of CODE:
12174 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
12175 C -- print opcode suffix for set/cmov insn.
12176 c -- like C, but print reversed condition
12177 F,f -- likewise, but for floating-point.
12178 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
12180 R -- print the prefix for register names.
12181 z -- print the opcode suffix for the size of the current operand.
12182 Z -- likewise, with special suffixes for x87 instructions.
12183 * -- print a star (in certain assembler syntax)
12184 A -- print an absolute memory reference.
12185 w -- print the operand as if it's a "word" (HImode) even if it isn't.
12186 s -- print a shift double count, followed by the assemblers argument
12188 b -- print the QImode name of the register for the indicated operand.
12189 %b0 would print %al if operands[0] is reg 0.
12190 w -- likewise, print the HImode name of the register.
12191 k -- likewise, print the SImode name of the register.
12192 q -- likewise, print the DImode name of the register.
12193 x -- likewise, print the V4SFmode name of the register.
12194 t -- likewise, print the V8SFmode name of the register.
12195 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
12196 y -- print "st(0)" instead of "st" as a register.
12197 d -- print duplicated register operand for AVX instruction.
12198 D -- print condition for SSE cmp instruction.
12199 P -- if PIC, print an @PLT suffix.
12200 X -- don't print any sort of PIC '@' suffix for a symbol.
12201 & -- print some in-use local-dynamic symbol name.
12202 H -- print a memory address offset by 8; used for sse high-parts
12203 Y -- print condition for XOP pcom* instruction.
12204 + -- print a branch hint as 'cs' or 'ds' prefix
12205 ; -- print a semicolon (after prefixes due to bug in older gas).
12209 ix86_print_operand (FILE *file, rtx x, int code)
12216 if (ASSEMBLER_DIALECT == ASM_ATT)
12222 const char *name = get_some_local_dynamic_name ();
12224 output_operand_lossage ("'%%&' used without any "
12225 "local dynamic TLS references");
12227 assemble_name (file, name);
12232 switch (ASSEMBLER_DIALECT)
12239 /* Intel syntax. For absolute addresses, registers should not
12240 be surrounded by braces. */
12244 ix86_print_operand (file, x, 0);
12251 gcc_unreachable ();
12254 ix86_print_operand (file, x, 0);
12259 if (ASSEMBLER_DIALECT == ASM_ATT)
12264 if (ASSEMBLER_DIALECT == ASM_ATT)
12269 if (ASSEMBLER_DIALECT == ASM_ATT)
12274 if (ASSEMBLER_DIALECT == ASM_ATT)
12279 if (ASSEMBLER_DIALECT == ASM_ATT)
12284 if (ASSEMBLER_DIALECT == ASM_ATT)
12289 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12291 /* Opcodes don't get size suffixes if using Intel opcodes. */
12292 if (ASSEMBLER_DIALECT == ASM_INTEL)
12295 switch (GET_MODE_SIZE (GET_MODE (x)))
12314 output_operand_lossage
12315 ("invalid operand size for operand code '%c'", code);
12320 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12322 (0, "non-integer operand used with operand code '%c'", code);
12326 /* 387 opcodes don't get size suffixes if using Intel opcodes. */
12327 if (ASSEMBLER_DIALECT == ASM_INTEL)
12330 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
12332 switch (GET_MODE_SIZE (GET_MODE (x)))
12335 #ifdef HAVE_AS_IX86_FILDS
12345 #ifdef HAVE_AS_IX86_FILDQ
12348 fputs ("ll", file);
12356 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
12358 /* 387 opcodes don't get size suffixes
12359 if the operands are registers. */
12360 if (STACK_REG_P (x))
12363 switch (GET_MODE_SIZE (GET_MODE (x)))
12384 output_operand_lossage
12385 ("invalid operand type used with operand code '%c'", code);
12389 output_operand_lossage
12390 ("invalid operand size for operand code '%c'", code);
12407 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
12409 ix86_print_operand (file, x, 0);
12410 fputs (", ", file);
12415 /* Little bit of braindamage here. The SSE compare instructions
12416 does use completely different names for the comparisons that the
12417 fp conditional moves. */
12420 switch (GET_CODE (x))
12423 fputs ("eq", file);
12426 fputs ("eq_us", file);
12429 fputs ("lt", file);
12432 fputs ("nge", file);
12435 fputs ("le", file);
12438 fputs ("ngt", file);
12441 fputs ("unord", file);
12444 fputs ("neq", file);
12447 fputs ("neq_oq", file);
12450 fputs ("ge", file);
12453 fputs ("nlt", file);
12456 fputs ("gt", file);
12459 fputs ("nle", file);
12462 fputs ("ord", file);
12465 output_operand_lossage ("operand is not a condition code, "
12466 "invalid operand code 'D'");
12472 switch (GET_CODE (x))
12476 fputs ("eq", file);
12480 fputs ("lt", file);
12484 fputs ("le", file);
12487 fputs ("unord", file);
12491 fputs ("neq", file);
12495 fputs ("nlt", file);
12499 fputs ("nle", file);
12502 fputs ("ord", file);
12505 output_operand_lossage ("operand is not a condition code, "
12506 "invalid operand code 'D'");
12512 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12513 if (ASSEMBLER_DIALECT == ASM_ATT)
12515 switch (GET_MODE (x))
12517 case HImode: putc ('w', file); break;
12519 case SFmode: putc ('l', file); break;
12521 case DFmode: putc ('q', file); break;
12522 default: gcc_unreachable ();
12529 if (!COMPARISON_P (x))
12531 output_operand_lossage ("operand is neither a constant nor a "
12532 "condition code, invalid operand code "
12536 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
12539 if (!COMPARISON_P (x))
12541 output_operand_lossage ("operand is neither a constant nor a "
12542 "condition code, invalid operand code "
12546 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12547 if (ASSEMBLER_DIALECT == ASM_ATT)
12550 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
12553 /* Like above, but reverse condition */
12555 /* Check to see if argument to %c is really a constant
12556 and not a condition code which needs to be reversed. */
12557 if (!COMPARISON_P (x))
12559 output_operand_lossage ("operand is neither a constant nor a "
12560 "condition code, invalid operand "
12564 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
12567 if (!COMPARISON_P (x))
12569 output_operand_lossage ("operand is neither a constant nor a "
12570 "condition code, invalid operand "
12574 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
12575 if (ASSEMBLER_DIALECT == ASM_ATT)
12578 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
12582 /* It doesn't actually matter what mode we use here, as we're
12583 only going to use this for printing. */
12584 x = adjust_address_nv (x, DImode, 8);
12592 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
12595 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
12598 int pred_val = INTVAL (XEXP (x, 0));
12600 if (pred_val < REG_BR_PROB_BASE * 45 / 100
12601 || pred_val > REG_BR_PROB_BASE * 55 / 100)
12603 int taken = pred_val > REG_BR_PROB_BASE / 2;
12604 int cputaken = final_forward_branch_p (current_output_insn) == 0;
12606 /* Emit hints only in the case default branch prediction
12607 heuristics would fail. */
12608 if (taken != cputaken)
12610 /* We use 3e (DS) prefix for taken branches and
12611 2e (CS) prefix for not taken branches. */
12613 fputs ("ds ; ", file);
12615 fputs ("cs ; ", file);
12623 switch (GET_CODE (x))
12626 fputs ("neq", file);
12629 fputs ("eq", file);
12633 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
12637 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
12641 fputs ("le", file);
12645 fputs ("lt", file);
12648 fputs ("unord", file);
12651 fputs ("ord", file);
12654 fputs ("ueq", file);
12657 fputs ("nlt", file);
12660 fputs ("nle", file);
12663 fputs ("ule", file);
12666 fputs ("ult", file);
12669 fputs ("une", file);
12672 output_operand_lossage ("operand is not a condition code, "
12673 "invalid operand code 'Y'");
12679 #ifndef HAVE_AS_IX86_REP_LOCK_PREFIX
12685 output_operand_lossage ("invalid operand code '%c'", code);
12690 print_reg (x, code, file);
12692 else if (MEM_P (x))
12694 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
12695 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
12696 && GET_MODE (x) != BLKmode)
12699 switch (GET_MODE_SIZE (GET_MODE (x)))
12701 case 1: size = "BYTE"; break;
12702 case 2: size = "WORD"; break;
12703 case 4: size = "DWORD"; break;
12704 case 8: size = "QWORD"; break;
12705 case 12: size = "TBYTE"; break;
12707 if (GET_MODE (x) == XFmode)
12712 case 32: size = "YMMWORD"; break;
12714 gcc_unreachable ();
12717 /* Check for explicit size override (codes 'b', 'w' and 'k') */
12720 else if (code == 'w')
12722 else if (code == 'k')
12725 fputs (size, file);
12726 fputs (" PTR ", file);
12730 /* Avoid (%rip) for call operands. */
12731 if (CONSTANT_ADDRESS_P (x) && code == 'P'
12732 && !CONST_INT_P (x))
12733 output_addr_const (file, x);
12734 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
12735 output_operand_lossage ("invalid constraints for operand");
12737 output_address (x);
12740 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
12745 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12746 REAL_VALUE_TO_TARGET_SINGLE (r, l);
12748 if (ASSEMBLER_DIALECT == ASM_ATT)
12750 fprintf (file, "0x%08lx", (long unsigned int) l);
12753 /* These float cases don't actually occur as immediate operands. */
12754 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
12758 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12759 fputs (dstr, file);
12762 else if (GET_CODE (x) == CONST_DOUBLE
12763 && GET_MODE (x) == XFmode)
12767 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
12768 fputs (dstr, file);
12773 /* We have patterns that allow zero sets of memory, for instance.
12774 In 64-bit mode, we should probably support all 8-byte vectors,
12775 since we can in fact encode that into an immediate. */
12776 if (GET_CODE (x) == CONST_VECTOR)
12778 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
12784 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
12786 if (ASSEMBLER_DIALECT == ASM_ATT)
12789 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
12790 || GET_CODE (x) == LABEL_REF)
12792 if (ASSEMBLER_DIALECT == ASM_ATT)
12795 fputs ("OFFSET FLAT:", file);
12798 if (CONST_INT_P (x))
12799 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
12801 output_pic_addr_const (file, x, code);
12803 output_addr_const (file, x);
12808 ix86_print_operand_punct_valid_p (unsigned char code)
12810 return (code == '*' || code == '+' || code == '&' || code == ';');
12813 /* Print a memory operand whose address is ADDR. */
12816 ix86_print_operand_address (FILE *file, rtx addr)
12818 struct ix86_address parts;
12819 rtx base, index, disp;
12821 int ok = ix86_decompose_address (addr, &parts);
12826 index = parts.index;
12828 scale = parts.scale;
12836 if (ASSEMBLER_DIALECT == ASM_ATT)
12838 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
12841 gcc_unreachable ();
12844 /* Use one byte shorter RIP relative addressing for 64bit mode. */
12845 if (TARGET_64BIT && !base && !index)
12849 if (GET_CODE (disp) == CONST
12850 && GET_CODE (XEXP (disp, 0)) == PLUS
12851 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12852 symbol = XEXP (XEXP (disp, 0), 0);
12854 if (GET_CODE (symbol) == LABEL_REF
12855 || (GET_CODE (symbol) == SYMBOL_REF
12856 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
12859 if (!base && !index)
12861 /* Displacement only requires special attention. */
12863 if (CONST_INT_P (disp))
12865 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
12866 fputs ("ds:", file);
12867 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
12870 output_pic_addr_const (file, disp, 0);
12872 output_addr_const (file, disp);
12876 if (ASSEMBLER_DIALECT == ASM_ATT)
12881 output_pic_addr_const (file, disp, 0);
12882 else if (GET_CODE (disp) == LABEL_REF)
12883 output_asm_label (disp);
12885 output_addr_const (file, disp);
12890 print_reg (base, 0, file);
12894 print_reg (index, 0, file);
12896 fprintf (file, ",%d", scale);
12902 rtx offset = NULL_RTX;
12906 /* Pull out the offset of a symbol; print any symbol itself. */
12907 if (GET_CODE (disp) == CONST
12908 && GET_CODE (XEXP (disp, 0)) == PLUS
12909 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
12911 offset = XEXP (XEXP (disp, 0), 1);
12912 disp = gen_rtx_CONST (VOIDmode,
12913 XEXP (XEXP (disp, 0), 0));
12917 output_pic_addr_const (file, disp, 0);
12918 else if (GET_CODE (disp) == LABEL_REF)
12919 output_asm_label (disp);
12920 else if (CONST_INT_P (disp))
12923 output_addr_const (file, disp);
12929 print_reg (base, 0, file);
12932 if (INTVAL (offset) >= 0)
12934 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12938 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
12945 print_reg (index, 0, file);
12947 fprintf (file, "*%d", scale);
12955 output_addr_const_extra (FILE *file, rtx x)
12959 if (GET_CODE (x) != UNSPEC)
12962 op = XVECEXP (x, 0, 0);
12963 switch (XINT (x, 1))
12965 case UNSPEC_GOTTPOFF:
12966 output_addr_const (file, op);
12967 /* FIXME: This might be @TPOFF in Sun ld. */
12968 fputs ("@gottpoff", file);
12971 output_addr_const (file, op);
12972 fputs ("@tpoff", file);
12974 case UNSPEC_NTPOFF:
12975 output_addr_const (file, op);
12977 fputs ("@tpoff", file);
12979 fputs ("@ntpoff", file);
12981 case UNSPEC_DTPOFF:
12982 output_addr_const (file, op);
12983 fputs ("@dtpoff", file);
12985 case UNSPEC_GOTNTPOFF:
12986 output_addr_const (file, op);
12988 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
12989 "@gottpoff(%rip)" : "@gottpoff[rip]", file);
12991 fputs ("@gotntpoff", file);
12993 case UNSPEC_INDNTPOFF:
12994 output_addr_const (file, op);
12995 fputs ("@indntpoff", file);
12998 case UNSPEC_MACHOPIC_OFFSET:
12999 output_addr_const (file, op);
13001 machopic_output_function_base_name (file);
13012 /* Split one or more DImode RTL references into pairs of SImode
13013 references. The RTL can be REG, offsettable MEM, integer constant, or
13014 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13015 split and "num" is its length. lo_half and hi_half are output arrays
13016 that parallel "operands". */
13019 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13023 rtx op = operands[num];
13025 /* simplify_subreg refuse to split volatile memory addresses,
13026 but we still have to handle it. */
13029 lo_half[num] = adjust_address (op, SImode, 0);
13030 hi_half[num] = adjust_address (op, SImode, 4);
13034 lo_half[num] = simplify_gen_subreg (SImode, op,
13035 GET_MODE (op) == VOIDmode
13036 ? DImode : GET_MODE (op), 0);
13037 hi_half[num] = simplify_gen_subreg (SImode, op,
13038 GET_MODE (op) == VOIDmode
13039 ? DImode : GET_MODE (op), 4);
13043 /* Split one or more TImode RTL references into pairs of DImode
13044 references. The RTL can be REG, offsettable MEM, integer constant, or
13045 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
13046 split and "num" is its length. lo_half and hi_half are output arrays
13047 that parallel "operands". */
13050 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
13054 rtx op = operands[num];
13056 /* simplify_subreg refuse to split volatile memory addresses, but we
13057 still have to handle it. */
13060 lo_half[num] = adjust_address (op, DImode, 0);
13061 hi_half[num] = adjust_address (op, DImode, 8);
13065 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
13066 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
13071 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
13072 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
13073 is the expression of the binary operation. The output may either be
13074 emitted here, or returned to the caller, like all output_* functions.
13076 There is no guarantee that the operands are the same mode, as they
13077 might be within FLOAT or FLOAT_EXTEND expressions. */
13079 #ifndef SYSV386_COMPAT
13080 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
13081 wants to fix the assemblers because that causes incompatibility
13082 with gcc. No-one wants to fix gcc because that causes
13083 incompatibility with assemblers... You can use the option of
13084 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
13085 #define SYSV386_COMPAT 1
13089 output_387_binary_op (rtx insn, rtx *operands)
13091 static char buf[40];
13094 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
13096 #ifdef ENABLE_CHECKING
13097 /* Even if we do not want to check the inputs, this documents input
13098 constraints. Which helps in understanding the following code. */
13099 if (STACK_REG_P (operands[0])
13100 && ((REG_P (operands[1])
13101 && REGNO (operands[0]) == REGNO (operands[1])
13102 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
13103 || (REG_P (operands[2])
13104 && REGNO (operands[0]) == REGNO (operands[2])
13105 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
13106 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
13109 gcc_assert (is_sse);
13112 switch (GET_CODE (operands[3]))
13115 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13116 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13124 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13125 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13133 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13134 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13142 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
13143 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
13151 gcc_unreachable ();
13158 strcpy (buf, ssep);
13159 if (GET_MODE (operands[0]) == SFmode)
13160 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
13162 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
13166 strcpy (buf, ssep + 1);
13167 if (GET_MODE (operands[0]) == SFmode)
13168 strcat (buf, "ss\t{%2, %0|%0, %2}");
13170 strcat (buf, "sd\t{%2, %0|%0, %2}");
13176 switch (GET_CODE (operands[3]))
13180 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
13182 rtx temp = operands[2];
13183 operands[2] = operands[1];
13184 operands[1] = temp;
13187 /* know operands[0] == operands[1]. */
13189 if (MEM_P (operands[2]))
13195 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13197 if (STACK_TOP_P (operands[0]))
13198 /* How is it that we are storing to a dead operand[2]?
13199 Well, presumably operands[1] is dead too. We can't
13200 store the result to st(0) as st(0) gets popped on this
13201 instruction. Instead store to operands[2] (which I
13202 think has to be st(1)). st(1) will be popped later.
13203 gcc <= 2.8.1 didn't have this check and generated
13204 assembly code that the Unixware assembler rejected. */
13205 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13207 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13211 if (STACK_TOP_P (operands[0]))
13212 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13214 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13219 if (MEM_P (operands[1]))
13225 if (MEM_P (operands[2]))
13231 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
13234 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
13235 derived assemblers, confusingly reverse the direction of
13236 the operation for fsub{r} and fdiv{r} when the
13237 destination register is not st(0). The Intel assembler
13238 doesn't have this brain damage. Read !SYSV386_COMPAT to
13239 figure out what the hardware really does. */
13240 if (STACK_TOP_P (operands[0]))
13241 p = "{p\t%0, %2|rp\t%2, %0}";
13243 p = "{rp\t%2, %0|p\t%0, %2}";
13245 if (STACK_TOP_P (operands[0]))
13246 /* As above for fmul/fadd, we can't store to st(0). */
13247 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
13249 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
13254 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
13257 if (STACK_TOP_P (operands[0]))
13258 p = "{rp\t%0, %1|p\t%1, %0}";
13260 p = "{p\t%1, %0|rp\t%0, %1}";
13262 if (STACK_TOP_P (operands[0]))
13263 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
13265 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
13270 if (STACK_TOP_P (operands[0]))
13272 if (STACK_TOP_P (operands[1]))
13273 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
13275 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
13278 else if (STACK_TOP_P (operands[1]))
13281 p = "{\t%1, %0|r\t%0, %1}";
13283 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
13289 p = "{r\t%2, %0|\t%0, %2}";
13291 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
13297 gcc_unreachable ();
13304 /* Return needed mode for entity in optimize_mode_switching pass. */
13307 ix86_mode_needed (int entity, rtx insn)
13309 enum attr_i387_cw mode;
13311 /* The mode UNINITIALIZED is used to store control word after a
13312 function call or ASM pattern. The mode ANY specify that function
13313 has no requirements on the control word and make no changes in the
13314 bits we are interested in. */
13317 || (NONJUMP_INSN_P (insn)
13318 && (asm_noperands (PATTERN (insn)) >= 0
13319 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
13320 return I387_CW_UNINITIALIZED;
13322 if (recog_memoized (insn) < 0)
13323 return I387_CW_ANY;
13325 mode = get_attr_i387_cw (insn);
13330 if (mode == I387_CW_TRUNC)
13335 if (mode == I387_CW_FLOOR)
13340 if (mode == I387_CW_CEIL)
13345 if (mode == I387_CW_MASK_PM)
13350 gcc_unreachable ();
13353 return I387_CW_ANY;
13356 /* Output code to initialize control word copies used by trunc?f?i and
13357 rounding patterns. CURRENT_MODE is set to current control word,
13358 while NEW_MODE is set to new control word. */
13361 emit_i387_cw_initialization (int mode)
13363 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
13366 enum ix86_stack_slot slot;
13368 rtx reg = gen_reg_rtx (HImode);
13370 emit_insn (gen_x86_fnstcw_1 (stored_mode));
13371 emit_move_insn (reg, copy_rtx (stored_mode));
13373 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
13374 || optimize_function_for_size_p (cfun))
13378 case I387_CW_TRUNC:
13379 /* round toward zero (truncate) */
13380 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
13381 slot = SLOT_CW_TRUNC;
13384 case I387_CW_FLOOR:
13385 /* round down toward -oo */
13386 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13387 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
13388 slot = SLOT_CW_FLOOR;
13392 /* round up toward +oo */
13393 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
13394 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
13395 slot = SLOT_CW_CEIL;
13398 case I387_CW_MASK_PM:
13399 /* mask precision exception for nearbyint() */
13400 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13401 slot = SLOT_CW_MASK_PM;
13405 gcc_unreachable ();
13412 case I387_CW_TRUNC:
13413 /* round toward zero (truncate) */
13414 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
13415 slot = SLOT_CW_TRUNC;
13418 case I387_CW_FLOOR:
13419 /* round down toward -oo */
13420 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
13421 slot = SLOT_CW_FLOOR;
13425 /* round up toward +oo */
13426 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
13427 slot = SLOT_CW_CEIL;
13430 case I387_CW_MASK_PM:
13431 /* mask precision exception for nearbyint() */
13432 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
13433 slot = SLOT_CW_MASK_PM;
13437 gcc_unreachable ();
13441 gcc_assert (slot < MAX_386_STACK_LOCALS);
13443 new_mode = assign_386_stack_local (HImode, slot);
13444 emit_move_insn (new_mode, reg);
13447 /* Output code for INSN to convert a float to a signed int. OPERANDS
13448 are the insn operands. The output may be [HSD]Imode and the input
13449 operand may be [SDX]Fmode. */
13452 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
13454 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13455 int dimode_p = GET_MODE (operands[0]) == DImode;
13456 int round_mode = get_attr_i387_cw (insn);
13458 /* Jump through a hoop or two for DImode, since the hardware has no
13459 non-popping instruction. We used to do this a different way, but
13460 that was somewhat fragile and broke with post-reload splitters. */
13461 if ((dimode_p || fisttp) && !stack_top_dies)
13462 output_asm_insn ("fld\t%y1", operands);
13464 gcc_assert (STACK_TOP_P (operands[1]));
13465 gcc_assert (MEM_P (operands[0]));
13466 gcc_assert (GET_MODE (operands[1]) != TFmode);
13469 output_asm_insn ("fisttp%Z0\t%0", operands);
13472 if (round_mode != I387_CW_ANY)
13473 output_asm_insn ("fldcw\t%3", operands);
13474 if (stack_top_dies || dimode_p)
13475 output_asm_insn ("fistp%Z0\t%0", operands);
13477 output_asm_insn ("fist%Z0\t%0", operands);
13478 if (round_mode != I387_CW_ANY)
13479 output_asm_insn ("fldcw\t%2", operands);
13485 /* Output code for x87 ffreep insn. The OPNO argument, which may only
13486 have the values zero or one, indicates the ffreep insn's operand
13487 from the OPERANDS array. */
13489 static const char *
13490 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
13492 if (TARGET_USE_FFREEP)
13493 #ifdef HAVE_AS_IX86_FFREEP
13494 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
13497 static char retval[32];
13498 int regno = REGNO (operands[opno]);
13500 gcc_assert (FP_REGNO_P (regno));
13502 regno -= FIRST_STACK_REG;
13504 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
13509 return opno ? "fstp\t%y1" : "fstp\t%y0";
13513 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
13514 should be used. UNORDERED_P is true when fucom should be used. */
13517 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
13519 int stack_top_dies;
13520 rtx cmp_op0, cmp_op1;
13521 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
13525 cmp_op0 = operands[0];
13526 cmp_op1 = operands[1];
13530 cmp_op0 = operands[1];
13531 cmp_op1 = operands[2];
13536 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
13537 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
13538 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
13539 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
13541 if (GET_MODE (operands[0]) == SFmode)
13543 return &ucomiss[TARGET_AVX ? 0 : 1];
13545 return &comiss[TARGET_AVX ? 0 : 1];
13548 return &ucomisd[TARGET_AVX ? 0 : 1];
13550 return &comisd[TARGET_AVX ? 0 : 1];
13553 gcc_assert (STACK_TOP_P (cmp_op0));
13555 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
13557 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
13559 if (stack_top_dies)
13561 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
13562 return output_387_ffreep (operands, 1);
13565 return "ftst\n\tfnstsw\t%0";
13568 if (STACK_REG_P (cmp_op1)
13570 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
13571 && REGNO (cmp_op1) != FIRST_STACK_REG)
13573 /* If both the top of the 387 stack dies, and the other operand
13574 is also a stack register that dies, then this must be a
13575 `fcompp' float compare */
13579 /* There is no double popping fcomi variant. Fortunately,
13580 eflags is immune from the fstp's cc clobbering. */
13582 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
13584 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
13585 return output_387_ffreep (operands, 0);
13590 return "fucompp\n\tfnstsw\t%0";
13592 return "fcompp\n\tfnstsw\t%0";
13597 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
13599 static const char * const alt[16] =
13601 "fcom%Z2\t%y2\n\tfnstsw\t%0",
13602 "fcomp%Z2\t%y2\n\tfnstsw\t%0",
13603 "fucom%Z2\t%y2\n\tfnstsw\t%0",
13604 "fucomp%Z2\t%y2\n\tfnstsw\t%0",
13606 "ficom%Z2\t%y2\n\tfnstsw\t%0",
13607 "ficomp%Z2\t%y2\n\tfnstsw\t%0",
13611 "fcomi\t{%y1, %0|%0, %y1}",
13612 "fcomip\t{%y1, %0|%0, %y1}",
13613 "fucomi\t{%y1, %0|%0, %y1}",
13614 "fucomip\t{%y1, %0|%0, %y1}",
13625 mask = eflags_p << 3;
13626 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
13627 mask |= unordered_p << 1;
13628 mask |= stack_top_dies;
13630 gcc_assert (mask < 16);
13639 ix86_output_addr_vec_elt (FILE *file, int value)
13641 const char *directive = ASM_LONG;
13645 directive = ASM_QUAD;
13647 gcc_assert (!TARGET_64BIT);
13650 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
13654 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
13656 const char *directive = ASM_LONG;
13659 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
13660 directive = ASM_QUAD;
13662 gcc_assert (!TARGET_64BIT);
13664 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
13665 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
13666 fprintf (file, "%s%s%d-%s%d\n",
13667 directive, LPREFIX, value, LPREFIX, rel);
13668 else if (HAVE_AS_GOTOFF_IN_DATA)
13669 fprintf (file, ASM_LONG "%s%d@GOTOFF\n", LPREFIX, value);
13671 else if (TARGET_MACHO)
13673 fprintf (file, ASM_LONG "%s%d-", LPREFIX, value);
13674 machopic_output_function_base_name (file);
13679 asm_fprintf (file, ASM_LONG "%U%s+[.-%s%d]\n",
13680 GOT_SYMBOL_NAME, LPREFIX, value);
13683 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
13687 ix86_expand_clear (rtx dest)
13691 /* We play register width games, which are only valid after reload. */
13692 gcc_assert (reload_completed);
13694 /* Avoid HImode and its attendant prefix byte. */
13695 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
13696 dest = gen_rtx_REG (SImode, REGNO (dest));
13697 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
13699 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
13700 if (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())
13702 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13703 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
13709 /* X is an unchanging MEM. If it is a constant pool reference, return
13710 the constant pool rtx, else NULL. */
13713 maybe_get_pool_constant (rtx x)
13715 x = ix86_delegitimize_address (XEXP (x, 0));
13717 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
13718 return get_pool_constant (x);
13724 ix86_expand_move (enum machine_mode mode, rtx operands[])
13727 enum tls_model model;
13732 if (GET_CODE (op1) == SYMBOL_REF)
13734 model = SYMBOL_REF_TLS_MODEL (op1);
13737 op1 = legitimize_tls_address (op1, model, true);
13738 op1 = force_operand (op1, op0);
13742 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13743 && SYMBOL_REF_DLLIMPORT_P (op1))
13744 op1 = legitimize_dllimport_symbol (op1, false);
13746 else if (GET_CODE (op1) == CONST
13747 && GET_CODE (XEXP (op1, 0)) == PLUS
13748 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
13750 rtx addend = XEXP (XEXP (op1, 0), 1);
13751 rtx symbol = XEXP (XEXP (op1, 0), 0);
13754 model = SYMBOL_REF_TLS_MODEL (symbol);
13756 tmp = legitimize_tls_address (symbol, model, true);
13757 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
13758 && SYMBOL_REF_DLLIMPORT_P (symbol))
13759 tmp = legitimize_dllimport_symbol (symbol, true);
13763 tmp = force_operand (tmp, NULL);
13764 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
13765 op0, 1, OPTAB_DIRECT);
13771 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
13773 if (TARGET_MACHO && !TARGET_64BIT)
13778 rtx temp = ((reload_in_progress
13779 || ((op0 && REG_P (op0))
13781 ? op0 : gen_reg_rtx (Pmode));
13782 op1 = machopic_indirect_data_reference (op1, temp);
13783 op1 = machopic_legitimize_pic_address (op1, mode,
13784 temp == op1 ? 0 : temp);
13786 else if (MACHOPIC_INDIRECT)
13787 op1 = machopic_indirect_data_reference (op1, 0);
13795 op1 = force_reg (Pmode, op1);
13796 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
13798 rtx reg = can_create_pseudo_p () ? NULL_RTX : op0;
13799 op1 = legitimize_pic_address (op1, reg);
13808 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
13809 || !push_operand (op0, mode))
13811 op1 = force_reg (mode, op1);
13813 if (push_operand (op0, mode)
13814 && ! general_no_elim_operand (op1, mode))
13815 op1 = copy_to_mode_reg (mode, op1);
13817 /* Force large constants in 64bit compilation into register
13818 to get them CSEed. */
13819 if (can_create_pseudo_p ()
13820 && (mode == DImode) && TARGET_64BIT
13821 && immediate_operand (op1, mode)
13822 && !x86_64_zext_immediate_operand (op1, VOIDmode)
13823 && !register_operand (op0, mode)
13825 op1 = copy_to_mode_reg (mode, op1);
13827 if (can_create_pseudo_p ()
13828 && FLOAT_MODE_P (mode)
13829 && GET_CODE (op1) == CONST_DOUBLE)
13831 /* If we are loading a floating point constant to a register,
13832 force the value to memory now, since we'll get better code
13833 out the back end. */
13835 op1 = validize_mem (force_const_mem (mode, op1));
13836 if (!register_operand (op0, mode))
13838 rtx temp = gen_reg_rtx (mode);
13839 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
13840 emit_move_insn (op0, temp);
13846 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13850 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
13852 rtx op0 = operands[0], op1 = operands[1];
13853 unsigned int align = GET_MODE_ALIGNMENT (mode);
13855 /* Force constants other than zero into memory. We do not know how
13856 the instructions used to build constants modify the upper 64 bits
13857 of the register, once we have that information we may be able
13858 to handle some of them more efficiently. */
13859 if (can_create_pseudo_p ()
13860 && register_operand (op0, mode)
13861 && (CONSTANT_P (op1)
13862 || (GET_CODE (op1) == SUBREG
13863 && CONSTANT_P (SUBREG_REG (op1))))
13864 && !standard_sse_constant_p (op1))
13865 op1 = validize_mem (force_const_mem (mode, op1));
13867 /* We need to check memory alignment for SSE mode since attribute
13868 can make operands unaligned. */
13869 if (can_create_pseudo_p ()
13870 && SSE_REG_MODE_P (mode)
13871 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
13872 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
13876 /* ix86_expand_vector_move_misalign() does not like constants ... */
13877 if (CONSTANT_P (op1)
13878 || (GET_CODE (op1) == SUBREG
13879 && CONSTANT_P (SUBREG_REG (op1))))
13880 op1 = validize_mem (force_const_mem (mode, op1));
13882 /* ... nor both arguments in memory. */
13883 if (!register_operand (op0, mode)
13884 && !register_operand (op1, mode))
13885 op1 = force_reg (mode, op1);
13887 tmp[0] = op0; tmp[1] = op1;
13888 ix86_expand_vector_move_misalign (mode, tmp);
13892 /* Make operand1 a register if it isn't already. */
13893 if (can_create_pseudo_p ()
13894 && !register_operand (op0, mode)
13895 && !register_operand (op1, mode))
13897 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
13901 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
13904 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
13905 straight to ix86_expand_vector_move. */
13906 /* Code generation for scalar reg-reg moves of single and double precision data:
13907 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
13911 if (x86_sse_partial_reg_dependency == true)
13916 Code generation for scalar loads of double precision data:
13917 if (x86_sse_split_regs == true)
13918 movlpd mem, reg (gas syntax)
13922 Code generation for unaligned packed loads of single precision data
13923 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
13924 if (x86_sse_unaligned_move_optimal)
13927 if (x86_sse_partial_reg_dependency == true)
13939 Code generation for unaligned packed loads of double precision data
13940 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
13941 if (x86_sse_unaligned_move_optimal)
13944 if (x86_sse_split_regs == true)
13957 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
13966 switch (GET_MODE_CLASS (mode))
13968 case MODE_VECTOR_INT:
13970 switch (GET_MODE_SIZE (mode))
13973 /* If we're optimizing for size, movups is the smallest. */
13974 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
13976 op0 = gen_lowpart (V4SFmode, op0);
13977 op1 = gen_lowpart (V4SFmode, op1);
13978 emit_insn (gen_avx_movups (op0, op1));
13981 op0 = gen_lowpart (V16QImode, op0);
13982 op1 = gen_lowpart (V16QImode, op1);
13983 emit_insn (gen_avx_movdqu (op0, op1));
13986 op0 = gen_lowpart (V32QImode, op0);
13987 op1 = gen_lowpart (V32QImode, op1);
13988 emit_insn (gen_avx_movdqu256 (op0, op1));
13991 gcc_unreachable ();
13994 case MODE_VECTOR_FLOAT:
13995 op0 = gen_lowpart (mode, op0);
13996 op1 = gen_lowpart (mode, op1);
14001 emit_insn (gen_avx_movups (op0, op1));
14004 emit_insn (gen_avx_movups256 (op0, op1));
14007 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14009 op0 = gen_lowpart (V4SFmode, op0);
14010 op1 = gen_lowpart (V4SFmode, op1);
14011 emit_insn (gen_avx_movups (op0, op1));
14014 emit_insn (gen_avx_movupd (op0, op1));
14017 emit_insn (gen_avx_movupd256 (op0, op1));
14020 gcc_unreachable ();
14025 gcc_unreachable ();
14033 /* If we're optimizing for size, movups is the smallest. */
14034 if (optimize_insn_for_size_p ()
14035 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14037 op0 = gen_lowpart (V4SFmode, op0);
14038 op1 = gen_lowpart (V4SFmode, op1);
14039 emit_insn (gen_sse_movups (op0, op1));
14043 /* ??? If we have typed data, then it would appear that using
14044 movdqu is the only way to get unaligned data loaded with
14046 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14048 op0 = gen_lowpart (V16QImode, op0);
14049 op1 = gen_lowpart (V16QImode, op1);
14050 emit_insn (gen_sse2_movdqu (op0, op1));
14054 if (TARGET_SSE2 && mode == V2DFmode)
14058 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14060 op0 = gen_lowpart (V2DFmode, op0);
14061 op1 = gen_lowpart (V2DFmode, op1);
14062 emit_insn (gen_sse2_movupd (op0, op1));
14066 /* When SSE registers are split into halves, we can avoid
14067 writing to the top half twice. */
14068 if (TARGET_SSE_SPLIT_REGS)
14070 emit_clobber (op0);
14075 /* ??? Not sure about the best option for the Intel chips.
14076 The following would seem to satisfy; the register is
14077 entirely cleared, breaking the dependency chain. We
14078 then store to the upper half, with a dependency depth
14079 of one. A rumor has it that Intel recommends two movsd
14080 followed by an unpacklpd, but this is unconfirmed. And
14081 given that the dependency depth of the unpacklpd would
14082 still be one, I'm not sure why this would be better. */
14083 zero = CONST0_RTX (V2DFmode);
14086 m = adjust_address (op1, DFmode, 0);
14087 emit_insn (gen_sse2_loadlpd (op0, zero, m));
14088 m = adjust_address (op1, DFmode, 8);
14089 emit_insn (gen_sse2_loadhpd (op0, op0, m));
14093 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL)
14095 op0 = gen_lowpart (V4SFmode, op0);
14096 op1 = gen_lowpart (V4SFmode, op1);
14097 emit_insn (gen_sse_movups (op0, op1));
14101 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
14102 emit_move_insn (op0, CONST0_RTX (mode));
14104 emit_clobber (op0);
14106 if (mode != V4SFmode)
14107 op0 = gen_lowpart (V4SFmode, op0);
14108 m = adjust_address (op1, V2SFmode, 0);
14109 emit_insn (gen_sse_loadlps (op0, op0, m));
14110 m = adjust_address (op1, V2SFmode, 8);
14111 emit_insn (gen_sse_loadhps (op0, op0, m));
14114 else if (MEM_P (op0))
14116 /* If we're optimizing for size, movups is the smallest. */
14117 if (optimize_insn_for_size_p ()
14118 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL)
14120 op0 = gen_lowpart (V4SFmode, op0);
14121 op1 = gen_lowpart (V4SFmode, op1);
14122 emit_insn (gen_sse_movups (op0, op1));
14126 /* ??? Similar to above, only less clear because of quote
14127 typeless stores unquote. */
14128 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
14129 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
14131 op0 = gen_lowpart (V16QImode, op0);
14132 op1 = gen_lowpart (V16QImode, op1);
14133 emit_insn (gen_sse2_movdqu (op0, op1));
14137 if (TARGET_SSE2 && mode == V2DFmode)
14139 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14141 op0 = gen_lowpart (V2DFmode, op0);
14142 op1 = gen_lowpart (V2DFmode, op1);
14143 emit_insn (gen_sse2_movupd (op0, op1));
14147 m = adjust_address (op0, DFmode, 0);
14148 emit_insn (gen_sse2_storelpd (m, op1));
14149 m = adjust_address (op0, DFmode, 8);
14150 emit_insn (gen_sse2_storehpd (m, op1));
14155 if (mode != V4SFmode)
14156 op1 = gen_lowpart (V4SFmode, op1);
14158 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL)
14160 op0 = gen_lowpart (V4SFmode, op0);
14161 emit_insn (gen_sse_movups (op0, op1));
14165 m = adjust_address (op0, V2SFmode, 0);
14166 emit_insn (gen_sse_storelps (m, op1));
14167 m = adjust_address (op0, V2SFmode, 8);
14168 emit_insn (gen_sse_storehps (m, op1));
14173 gcc_unreachable ();
14176 /* Expand a push in MODE. This is some mode for which we do not support
14177 proper push instructions, at least from the registers that we expect
14178 the value to live in. */
14181 ix86_expand_push (enum machine_mode mode, rtx x)
14185 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
14186 GEN_INT (-GET_MODE_SIZE (mode)),
14187 stack_pointer_rtx, 1, OPTAB_DIRECT);
14188 if (tmp != stack_pointer_rtx)
14189 emit_move_insn (stack_pointer_rtx, tmp);
14191 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
14193 /* When we push an operand onto stack, it has to be aligned at least
14194 at the function argument boundary. However since we don't have
14195 the argument type, we can't determine the actual argument
14197 emit_move_insn (tmp, x);
14200 /* Helper function of ix86_fixup_binary_operands to canonicalize
14201 operand order. Returns true if the operands should be swapped. */
14204 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
14207 rtx dst = operands[0];
14208 rtx src1 = operands[1];
14209 rtx src2 = operands[2];
14211 /* If the operation is not commutative, we can't do anything. */
14212 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
14215 /* Highest priority is that src1 should match dst. */
14216 if (rtx_equal_p (dst, src1))
14218 if (rtx_equal_p (dst, src2))
14221 /* Next highest priority is that immediate constants come second. */
14222 if (immediate_operand (src2, mode))
14224 if (immediate_operand (src1, mode))
14227 /* Lowest priority is that memory references should come second. */
14237 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
14238 destination to use for the operation. If different from the true
14239 destination in operands[0], a copy operation will be required. */
14242 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
14245 rtx dst = operands[0];
14246 rtx src1 = operands[1];
14247 rtx src2 = operands[2];
14249 /* Canonicalize operand order. */
14250 if (ix86_swap_binary_operands_p (code, mode, operands))
14254 /* It is invalid to swap operands of different modes. */
14255 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
14262 /* Both source operands cannot be in memory. */
14263 if (MEM_P (src1) && MEM_P (src2))
14265 /* Optimization: Only read from memory once. */
14266 if (rtx_equal_p (src1, src2))
14268 src2 = force_reg (mode, src2);
14272 src2 = force_reg (mode, src2);
14275 /* If the destination is memory, and we do not have matching source
14276 operands, do things in registers. */
14277 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14278 dst = gen_reg_rtx (mode);
14280 /* Source 1 cannot be a constant. */
14281 if (CONSTANT_P (src1))
14282 src1 = force_reg (mode, src1);
14284 /* Source 1 cannot be a non-matching memory. */
14285 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14286 src1 = force_reg (mode, src1);
14288 operands[1] = src1;
14289 operands[2] = src2;
14293 /* Similarly, but assume that the destination has already been
14294 set up properly. */
14297 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
14298 enum machine_mode mode, rtx operands[])
14300 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
14301 gcc_assert (dst == operands[0]);
14304 /* Attempt to expand a binary operator. Make the expansion closer to the
14305 actual machine, then just general_operand, which will allow 3 separate
14306 memory references (one output, two input) in a single insn. */
14309 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
14312 rtx src1, src2, dst, op, clob;
14314 dst = ix86_fixup_binary_operands (code, mode, operands);
14315 src1 = operands[1];
14316 src2 = operands[2];
14318 /* Emit the instruction. */
14320 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
14321 if (reload_in_progress)
14323 /* Reload doesn't know about the flags register, and doesn't know that
14324 it doesn't want to clobber it. We can only do this with PLUS. */
14325 gcc_assert (code == PLUS);
14330 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14331 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14334 /* Fix up the destination if needed. */
14335 if (dst != operands[0])
14336 emit_move_insn (operands[0], dst);
14339 /* Return TRUE or FALSE depending on whether the binary operator meets the
14340 appropriate constraints. */
14343 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
14346 rtx dst = operands[0];
14347 rtx src1 = operands[1];
14348 rtx src2 = operands[2];
14350 /* Both source operands cannot be in memory. */
14351 if (MEM_P (src1) && MEM_P (src2))
14354 /* Canonicalize operand order for commutative operators. */
14355 if (ix86_swap_binary_operands_p (code, mode, operands))
14362 /* If the destination is memory, we must have a matching source operand. */
14363 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
14366 /* Source 1 cannot be a constant. */
14367 if (CONSTANT_P (src1))
14370 /* Source 1 cannot be a non-matching memory. */
14371 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
14377 /* Attempt to expand a unary operator. Make the expansion closer to the
14378 actual machine, then just general_operand, which will allow 2 separate
14379 memory references (one output, one input) in a single insn. */
14382 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
14385 int matching_memory;
14386 rtx src, dst, op, clob;
14391 /* If the destination is memory, and we do not have matching source
14392 operands, do things in registers. */
14393 matching_memory = 0;
14396 if (rtx_equal_p (dst, src))
14397 matching_memory = 1;
14399 dst = gen_reg_rtx (mode);
14402 /* When source operand is memory, destination must match. */
14403 if (MEM_P (src) && !matching_memory)
14404 src = force_reg (mode, src);
14406 /* Emit the instruction. */
14408 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
14409 if (reload_in_progress || code == NOT)
14411 /* Reload doesn't know about the flags register, and doesn't know that
14412 it doesn't want to clobber it. */
14413 gcc_assert (code == NOT);
14418 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
14419 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
14422 /* Fix up the destination if needed. */
14423 if (dst != operands[0])
14424 emit_move_insn (operands[0], dst);
14427 #define LEA_SEARCH_THRESHOLD 12
14429 /* Search backward for non-agu definition of register number REGNO1
14430 or register number REGNO2 in INSN's basic block until
14431 1. Pass LEA_SEARCH_THRESHOLD instructions, or
14432 2. Reach BB boundary, or
14433 3. Reach agu definition.
14434 Returns the distance between the non-agu definition point and INSN.
14435 If no definition point, returns -1. */
14438 distance_non_agu_define (unsigned int regno1, unsigned int regno2,
14441 basic_block bb = BLOCK_FOR_INSN (insn);
14444 enum attr_type insn_type;
14446 if (insn != BB_HEAD (bb))
14448 rtx prev = PREV_INSN (insn);
14449 while (prev && distance < LEA_SEARCH_THRESHOLD)
14451 if (NONDEBUG_INSN_P (prev))
14454 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14455 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14456 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14457 && (regno1 == DF_REF_REGNO (*def_rec)
14458 || regno2 == DF_REF_REGNO (*def_rec)))
14460 insn_type = get_attr_type (prev);
14461 if (insn_type != TYPE_LEA)
14465 if (prev == BB_HEAD (bb))
14467 prev = PREV_INSN (prev);
14471 if (distance < LEA_SEARCH_THRESHOLD)
14475 bool simple_loop = false;
14477 FOR_EACH_EDGE (e, ei, bb->preds)
14480 simple_loop = true;
14486 rtx prev = BB_END (bb);
14489 && distance < LEA_SEARCH_THRESHOLD)
14491 if (NONDEBUG_INSN_P (prev))
14494 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++)
14495 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14496 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14497 && (regno1 == DF_REF_REGNO (*def_rec)
14498 || regno2 == DF_REF_REGNO (*def_rec)))
14500 insn_type = get_attr_type (prev);
14501 if (insn_type != TYPE_LEA)
14505 prev = PREV_INSN (prev);
14513 /* get_attr_type may modify recog data. We want to make sure
14514 that recog data is valid for instruction INSN, on which
14515 distance_non_agu_define is called. INSN is unchanged here. */
14516 extract_insn_cached (insn);
14520 /* Return the distance between INSN and the next insn that uses
14521 register number REGNO0 in memory address. Return -1 if no such
14522 a use is found within LEA_SEARCH_THRESHOLD or REGNO0 is set. */
14525 distance_agu_use (unsigned int regno0, rtx insn)
14527 basic_block bb = BLOCK_FOR_INSN (insn);
14532 if (insn != BB_END (bb))
14534 rtx next = NEXT_INSN (insn);
14535 while (next && distance < LEA_SEARCH_THRESHOLD)
14537 if (NONDEBUG_INSN_P (next))
14541 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14542 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14543 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14544 && regno0 == DF_REF_REGNO (*use_rec))
14546 /* Return DISTANCE if OP0 is used in memory
14547 address in NEXT. */
14551 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14552 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14553 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14554 && regno0 == DF_REF_REGNO (*def_rec))
14556 /* Return -1 if OP0 is set in NEXT. */
14560 if (next == BB_END (bb))
14562 next = NEXT_INSN (next);
14566 if (distance < LEA_SEARCH_THRESHOLD)
14570 bool simple_loop = false;
14572 FOR_EACH_EDGE (e, ei, bb->succs)
14575 simple_loop = true;
14581 rtx next = BB_HEAD (bb);
14584 && distance < LEA_SEARCH_THRESHOLD)
14586 if (NONDEBUG_INSN_P (next))
14590 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++)
14591 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD
14592 || DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_STORE)
14593 && regno0 == DF_REF_REGNO (*use_rec))
14595 /* Return DISTANCE if OP0 is used in memory
14596 address in NEXT. */
14600 for (def_rec = DF_INSN_DEFS (next); *def_rec; def_rec++)
14601 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF
14602 && !DF_REF_IS_ARTIFICIAL (*def_rec)
14603 && regno0 == DF_REF_REGNO (*def_rec))
14605 /* Return -1 if OP0 is set in NEXT. */
14610 next = NEXT_INSN (next);
14618 /* Define this macro to tune LEA priority vs ADD, it take effect when
14619 there is a dilemma of choicing LEA or ADD
14620 Negative value: ADD is more preferred than LEA
14622 Positive value: LEA is more preferred than ADD*/
14623 #define IX86_LEA_PRIORITY 2
14625 /* Return true if it is ok to optimize an ADD operation to LEA
14626 operation to avoid flag register consumation. For the processors
14627 like ATOM, if the destination register of LEA holds an actual
14628 address which will be used soon, LEA is better and otherwise ADD
14632 ix86_lea_for_add_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14633 rtx insn, rtx operands[])
14635 unsigned int regno0 = true_regnum (operands[0]);
14636 unsigned int regno1 = true_regnum (operands[1]);
14637 unsigned int regno2;
14639 if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
14640 return regno0 != regno1;
14642 regno2 = true_regnum (operands[2]);
14644 /* If a = b + c, (a!=b && a!=c), must use lea form. */
14645 if (regno0 != regno1 && regno0 != regno2)
14649 int dist_define, dist_use;
14650 dist_define = distance_non_agu_define (regno1, regno2, insn);
14651 if (dist_define <= 0)
14654 /* If this insn has both backward non-agu dependence and forward
14655 agu dependence, the one with short distance take effect. */
14656 dist_use = distance_agu_use (regno0, insn);
14658 || (dist_define + IX86_LEA_PRIORITY) < dist_use)
14665 /* Return true if destination reg of SET_BODY is shift count of
14669 ix86_dep_by_shift_count_body (const_rtx set_body, const_rtx use_body)
14675 /* Retrieve destination of SET_BODY. */
14676 switch (GET_CODE (set_body))
14679 set_dest = SET_DEST (set_body);
14680 if (!set_dest || !REG_P (set_dest))
14684 for (i = XVECLEN (set_body, 0) - 1; i >= 0; i--)
14685 if (ix86_dep_by_shift_count_body (XVECEXP (set_body, 0, i),
14693 /* Retrieve shift count of USE_BODY. */
14694 switch (GET_CODE (use_body))
14697 shift_rtx = XEXP (use_body, 1);
14700 for (i = XVECLEN (use_body, 0) - 1; i >= 0; i--)
14701 if (ix86_dep_by_shift_count_body (set_body,
14702 XVECEXP (use_body, 0, i)))
14710 && (GET_CODE (shift_rtx) == ASHIFT
14711 || GET_CODE (shift_rtx) == LSHIFTRT
14712 || GET_CODE (shift_rtx) == ASHIFTRT
14713 || GET_CODE (shift_rtx) == ROTATE
14714 || GET_CODE (shift_rtx) == ROTATERT))
14716 rtx shift_count = XEXP (shift_rtx, 1);
14718 /* Return true if shift count is dest of SET_BODY. */
14719 if (REG_P (shift_count)
14720 && true_regnum (set_dest) == true_regnum (shift_count))
14727 /* Return true if destination reg of SET_INSN is shift count of
14731 ix86_dep_by_shift_count (const_rtx set_insn, const_rtx use_insn)
14733 return ix86_dep_by_shift_count_body (PATTERN (set_insn),
14734 PATTERN (use_insn));
14737 /* Return TRUE or FALSE depending on whether the unary operator meets the
14738 appropriate constraints. */
14741 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
14742 enum machine_mode mode ATTRIBUTE_UNUSED,
14743 rtx operands[2] ATTRIBUTE_UNUSED)
14745 /* If one of operands is memory, source and destination must match. */
14746 if ((MEM_P (operands[0])
14747 || MEM_P (operands[1]))
14748 && ! rtx_equal_p (operands[0], operands[1]))
14753 /* Return TRUE if the operands to a vec_interleave_{high,low}v2df
14754 are ok, keeping in mind the possible movddup alternative. */
14757 ix86_vec_interleave_v2df_operator_ok (rtx operands[3], bool high)
14759 if (MEM_P (operands[0]))
14760 return rtx_equal_p (operands[0], operands[1 + high]);
14761 if (MEM_P (operands[1]) && MEM_P (operands[2]))
14762 return TARGET_SSE3 && rtx_equal_p (operands[1], operands[2]);
14766 /* Post-reload splitter for converting an SF or DFmode value in an
14767 SSE register into an unsigned SImode. */
14770 ix86_split_convert_uns_si_sse (rtx operands[])
14772 enum machine_mode vecmode;
14773 rtx value, large, zero_or_two31, input, two31, x;
14775 large = operands[1];
14776 zero_or_two31 = operands[2];
14777 input = operands[3];
14778 two31 = operands[4];
14779 vecmode = GET_MODE (large);
14780 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
14782 /* Load up the value into the low element. We must ensure that the other
14783 elements are valid floats -- zero is the easiest such value. */
14786 if (vecmode == V4SFmode)
14787 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
14789 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
14793 input = gen_rtx_REG (vecmode, REGNO (input));
14794 emit_move_insn (value, CONST0_RTX (vecmode));
14795 if (vecmode == V4SFmode)
14796 emit_insn (gen_sse_movss (value, value, input));
14798 emit_insn (gen_sse2_movsd (value, value, input));
14801 emit_move_insn (large, two31);
14802 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
14804 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
14805 emit_insn (gen_rtx_SET (VOIDmode, large, x));
14807 x = gen_rtx_AND (vecmode, zero_or_two31, large);
14808 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
14810 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
14811 emit_insn (gen_rtx_SET (VOIDmode, value, x));
14813 large = gen_rtx_REG (V4SImode, REGNO (large));
14814 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
14816 x = gen_rtx_REG (V4SImode, REGNO (value));
14817 if (vecmode == V4SFmode)
14818 emit_insn (gen_sse2_cvttps2dq (x, value));
14820 emit_insn (gen_sse2_cvttpd2dq (x, value));
14823 emit_insn (gen_xorv4si3 (value, value, large));
14826 /* Convert an unsigned DImode value into a DFmode, using only SSE.
14827 Expects the 64-bit DImode to be supplied in a pair of integral
14828 registers. Requires SSE2; will use SSE3 if available. For x86_32,
14829 -mfpmath=sse, !optimize_size only. */
14832 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
14834 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
14835 rtx int_xmm, fp_xmm;
14836 rtx biases, exponents;
14839 int_xmm = gen_reg_rtx (V4SImode);
14840 if (TARGET_INTER_UNIT_MOVES)
14841 emit_insn (gen_movdi_to_sse (int_xmm, input));
14842 else if (TARGET_SSE_SPLIT_REGS)
14844 emit_clobber (int_xmm);
14845 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
14849 x = gen_reg_rtx (V2DImode);
14850 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
14851 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
14854 x = gen_rtx_CONST_VECTOR (V4SImode,
14855 gen_rtvec (4, GEN_INT (0x43300000UL),
14856 GEN_INT (0x45300000UL),
14857 const0_rtx, const0_rtx));
14858 exponents = validize_mem (force_const_mem (V4SImode, x));
14860 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
14861 emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
14863 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
14864 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
14865 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
14866 (0x1.0p84 + double(fp_value_hi_xmm)).
14867 Note these exponents differ by 32. */
14869 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
14871 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
14872 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
14873 real_ldexp (&bias_lo_rvt, &dconst1, 52);
14874 real_ldexp (&bias_hi_rvt, &dconst1, 84);
14875 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
14876 x = const_double_from_real_value (bias_hi_rvt, DFmode);
14877 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
14878 biases = validize_mem (force_const_mem (V2DFmode, biases));
14879 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
14881 /* Add the upper and lower DFmode values together. */
14883 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
14886 x = copy_to_mode_reg (V2DFmode, fp_xmm);
14887 emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
14888 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
14891 ix86_expand_vector_extract (false, target, fp_xmm, 0);
14894 /* Not used, but eases macroization of patterns. */
14896 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
14897 rtx input ATTRIBUTE_UNUSED)
14899 gcc_unreachable ();
14902 /* Convert an unsigned SImode value into a DFmode. Only currently used
14903 for SSE, but applicable anywhere. */
14906 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
14908 REAL_VALUE_TYPE TWO31r;
14911 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
14912 NULL, 1, OPTAB_DIRECT);
14914 fp = gen_reg_rtx (DFmode);
14915 emit_insn (gen_floatsidf2 (fp, x));
14917 real_ldexp (&TWO31r, &dconst1, 31);
14918 x = const_double_from_real_value (TWO31r, DFmode);
14920 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
14922 emit_move_insn (target, x);
14925 /* Convert a signed DImode value into a DFmode. Only used for SSE in
14926 32-bit mode; otherwise we have a direct convert instruction. */
14929 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
14931 REAL_VALUE_TYPE TWO32r;
14932 rtx fp_lo, fp_hi, x;
14934 fp_lo = gen_reg_rtx (DFmode);
14935 fp_hi = gen_reg_rtx (DFmode);
14937 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
14939 real_ldexp (&TWO32r, &dconst1, 32);
14940 x = const_double_from_real_value (TWO32r, DFmode);
14941 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
14943 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
14945 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
14948 emit_move_insn (target, x);
14951 /* Convert an unsigned SImode value into a SFmode, using only SSE.
14952 For x86_32, -mfpmath=sse, !optimize_size only. */
14954 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
14956 REAL_VALUE_TYPE ONE16r;
14957 rtx fp_hi, fp_lo, int_hi, int_lo, x;
14959 real_ldexp (&ONE16r, &dconst1, 16);
14960 x = const_double_from_real_value (ONE16r, SFmode);
14961 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
14962 NULL, 0, OPTAB_DIRECT);
14963 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
14964 NULL, 0, OPTAB_DIRECT);
14965 fp_hi = gen_reg_rtx (SFmode);
14966 fp_lo = gen_reg_rtx (SFmode);
14967 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
14968 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
14969 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
14971 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
14973 if (!rtx_equal_p (target, fp_hi))
14974 emit_move_insn (target, fp_hi);
14977 /* A subroutine of ix86_build_signbit_mask. If VECT is true,
14978 then replicate the value for all elements of the vector
14982 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
14989 v = gen_rtvec (4, value, value, value, value);
14990 return gen_rtx_CONST_VECTOR (V4SImode, v);
14994 v = gen_rtvec (2, value, value);
14995 return gen_rtx_CONST_VECTOR (V2DImode, v);
14999 v = gen_rtvec (4, value, value, value, value);
15001 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
15002 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
15003 return gen_rtx_CONST_VECTOR (V4SFmode, v);
15007 v = gen_rtvec (2, value, value);
15009 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
15010 return gen_rtx_CONST_VECTOR (V2DFmode, v);
15013 gcc_unreachable ();
15017 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
15018 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
15019 for an SSE register. If VECT is true, then replicate the mask for
15020 all elements of the vector register. If INVERT is true, then create
15021 a mask excluding the sign bit. */
15024 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
15026 enum machine_mode vec_mode, imode;
15027 HOST_WIDE_INT hi, lo;
15032 /* Find the sign bit, sign extended to 2*HWI. */
15038 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
15039 lo = 0x80000000, hi = lo < 0;
15045 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
15046 if (HOST_BITS_PER_WIDE_INT >= 64)
15047 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
15049 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15054 vec_mode = VOIDmode;
15055 if (HOST_BITS_PER_WIDE_INT >= 64)
15058 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
15065 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
15069 lo = ~lo, hi = ~hi;
15075 mask = immed_double_const (lo, hi, imode);
15077 vec = gen_rtvec (2, v, mask);
15078 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
15079 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
15086 gcc_unreachable ();
15090 lo = ~lo, hi = ~hi;
15092 /* Force this value into the low part of a fp vector constant. */
15093 mask = immed_double_const (lo, hi, imode);
15094 mask = gen_lowpart (mode, mask);
15096 if (vec_mode == VOIDmode)
15097 return force_reg (mode, mask);
15099 v = ix86_build_const_vector (mode, vect, mask);
15100 return force_reg (vec_mode, v);
15103 /* Generate code for floating point ABS or NEG. */
15106 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
15109 rtx mask, set, use, clob, dst, src;
15110 bool use_sse = false;
15111 bool vector_mode = VECTOR_MODE_P (mode);
15112 enum machine_mode elt_mode = mode;
15116 elt_mode = GET_MODE_INNER (mode);
15119 else if (mode == TFmode)
15121 else if (TARGET_SSE_MATH)
15122 use_sse = SSE_FLOAT_MODE_P (mode);
15124 /* NEG and ABS performed with SSE use bitwise mask operations.
15125 Create the appropriate mask now. */
15127 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
15136 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
15137 set = gen_rtx_SET (VOIDmode, dst, set);
15142 set = gen_rtx_fmt_e (code, mode, src);
15143 set = gen_rtx_SET (VOIDmode, dst, set);
15146 use = gen_rtx_USE (VOIDmode, mask);
15147 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
15148 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15149 gen_rtvec (3, set, use, clob)));
15156 /* Expand a copysign operation. Special case operand 0 being a constant. */
15159 ix86_expand_copysign (rtx operands[])
15161 enum machine_mode mode;
15162 rtx dest, op0, op1, mask, nmask;
15164 dest = operands[0];
15168 mode = GET_MODE (dest);
15170 if (GET_CODE (op0) == CONST_DOUBLE)
15172 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
15174 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
15175 op0 = simplify_unary_operation (ABS, mode, op0, mode);
15177 if (mode == SFmode || mode == DFmode)
15179 enum machine_mode vmode;
15181 vmode = mode == SFmode ? V4SFmode : V2DFmode;
15183 if (op0 == CONST0_RTX (mode))
15184 op0 = CONST0_RTX (vmode);
15187 rtx v = ix86_build_const_vector (mode, false, op0);
15189 op0 = force_reg (vmode, v);
15192 else if (op0 != CONST0_RTX (mode))
15193 op0 = force_reg (mode, op0);
15195 mask = ix86_build_signbit_mask (mode, 0, 0);
15197 if (mode == SFmode)
15198 copysign_insn = gen_copysignsf3_const;
15199 else if (mode == DFmode)
15200 copysign_insn = gen_copysigndf3_const;
15202 copysign_insn = gen_copysigntf3_const;
15204 emit_insn (copysign_insn (dest, op0, op1, mask));
15208 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
15210 nmask = ix86_build_signbit_mask (mode, 0, 1);
15211 mask = ix86_build_signbit_mask (mode, 0, 0);
15213 if (mode == SFmode)
15214 copysign_insn = gen_copysignsf3_var;
15215 else if (mode == DFmode)
15216 copysign_insn = gen_copysigndf3_var;
15218 copysign_insn = gen_copysigntf3_var;
15220 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
15224 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
15225 be a constant, and so has already been expanded into a vector constant. */
15228 ix86_split_copysign_const (rtx operands[])
15230 enum machine_mode mode, vmode;
15231 rtx dest, op0, mask, x;
15233 dest = operands[0];
15235 mask = operands[3];
15237 mode = GET_MODE (dest);
15238 vmode = GET_MODE (mask);
15240 dest = simplify_gen_subreg (vmode, dest, mode, 0);
15241 x = gen_rtx_AND (vmode, dest, mask);
15242 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15244 if (op0 != CONST0_RTX (vmode))
15246 x = gen_rtx_IOR (vmode, dest, op0);
15247 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15251 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
15252 so we have to do two masks. */
15255 ix86_split_copysign_var (rtx operands[])
15257 enum machine_mode mode, vmode;
15258 rtx dest, scratch, op0, op1, mask, nmask, x;
15260 dest = operands[0];
15261 scratch = operands[1];
15264 nmask = operands[4];
15265 mask = operands[5];
15267 mode = GET_MODE (dest);
15268 vmode = GET_MODE (mask);
15270 if (rtx_equal_p (op0, op1))
15272 /* Shouldn't happen often (it's useless, obviously), but when it does
15273 we'd generate incorrect code if we continue below. */
15274 emit_move_insn (dest, op0);
15278 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
15280 gcc_assert (REGNO (op1) == REGNO (scratch));
15282 x = gen_rtx_AND (vmode, scratch, mask);
15283 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15286 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15287 x = gen_rtx_NOT (vmode, dest);
15288 x = gen_rtx_AND (vmode, x, op0);
15289 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15293 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
15295 x = gen_rtx_AND (vmode, scratch, mask);
15297 else /* alternative 2,4 */
15299 gcc_assert (REGNO (mask) == REGNO (scratch));
15300 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
15301 x = gen_rtx_AND (vmode, scratch, op1);
15303 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
15305 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
15307 dest = simplify_gen_subreg (vmode, op0, mode, 0);
15308 x = gen_rtx_AND (vmode, dest, nmask);
15310 else /* alternative 3,4 */
15312 gcc_assert (REGNO (nmask) == REGNO (dest));
15314 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
15315 x = gen_rtx_AND (vmode, dest, op0);
15317 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15320 x = gen_rtx_IOR (vmode, dest, scratch);
15321 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15324 /* Return TRUE or FALSE depending on whether the first SET in INSN
15325 has source and destination with matching CC modes, and that the
15326 CC mode is at least as constrained as REQ_MODE. */
15329 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
15332 enum machine_mode set_mode;
15334 set = PATTERN (insn);
15335 if (GET_CODE (set) == PARALLEL)
15336 set = XVECEXP (set, 0, 0);
15337 gcc_assert (GET_CODE (set) == SET);
15338 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
15340 set_mode = GET_MODE (SET_DEST (set));
15344 if (req_mode != CCNOmode
15345 && (req_mode != CCmode
15346 || XEXP (SET_SRC (set), 1) != const0_rtx))
15350 if (req_mode == CCGCmode)
15354 if (req_mode == CCGOCmode || req_mode == CCNOmode)
15358 if (req_mode == CCZmode)
15369 gcc_unreachable ();
15372 return (GET_MODE (SET_SRC (set)) == set_mode);
15375 /* Generate insn patterns to do an integer compare of OPERANDS. */
15378 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
15380 enum machine_mode cmpmode;
15383 cmpmode = SELECT_CC_MODE (code, op0, op1);
15384 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
15386 /* This is very simple, but making the interface the same as in the
15387 FP case makes the rest of the code easier. */
15388 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
15389 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
15391 /* Return the test that should be put into the flags user, i.e.
15392 the bcc, scc, or cmov instruction. */
15393 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
15396 /* Figure out whether to use ordered or unordered fp comparisons.
15397 Return the appropriate mode to use. */
15400 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
15402 /* ??? In order to make all comparisons reversible, we do all comparisons
15403 non-trapping when compiling for IEEE. Once gcc is able to distinguish
15404 all forms trapping and nontrapping comparisons, we can make inequality
15405 comparisons trapping again, since it results in better code when using
15406 FCOM based compares. */
15407 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
15411 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
15413 enum machine_mode mode = GET_MODE (op0);
15415 if (SCALAR_FLOAT_MODE_P (mode))
15417 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
15418 return ix86_fp_compare_mode (code);
15423 /* Only zero flag is needed. */
15424 case EQ: /* ZF=0 */
15425 case NE: /* ZF!=0 */
15427 /* Codes needing carry flag. */
15428 case GEU: /* CF=0 */
15429 case LTU: /* CF=1 */
15430 /* Detect overflow checks. They need just the carry flag. */
15431 if (GET_CODE (op0) == PLUS
15432 && rtx_equal_p (op1, XEXP (op0, 0)))
15436 case GTU: /* CF=0 & ZF=0 */
15437 case LEU: /* CF=1 | ZF=1 */
15438 /* Detect overflow checks. They need just the carry flag. */
15439 if (GET_CODE (op0) == MINUS
15440 && rtx_equal_p (op1, XEXP (op0, 0)))
15444 /* Codes possibly doable only with sign flag when
15445 comparing against zero. */
15446 case GE: /* SF=OF or SF=0 */
15447 case LT: /* SF<>OF or SF=1 */
15448 if (op1 == const0_rtx)
15451 /* For other cases Carry flag is not required. */
15453 /* Codes doable only with sign flag when comparing
15454 against zero, but we miss jump instruction for it
15455 so we need to use relational tests against overflow
15456 that thus needs to be zero. */
15457 case GT: /* ZF=0 & SF=OF */
15458 case LE: /* ZF=1 | SF<>OF */
15459 if (op1 == const0_rtx)
15463 /* strcmp pattern do (use flags) and combine may ask us for proper
15468 gcc_unreachable ();
15472 /* Return the fixed registers used for condition codes. */
15475 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
15482 /* If two condition code modes are compatible, return a condition code
15483 mode which is compatible with both. Otherwise, return
15486 static enum machine_mode
15487 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
15492 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
15495 if ((m1 == CCGCmode && m2 == CCGOCmode)
15496 || (m1 == CCGOCmode && m2 == CCGCmode))
15502 gcc_unreachable ();
15532 /* These are only compatible with themselves, which we already
15539 /* Return a comparison we can do and that it is equivalent to
15540 swap_condition (code) apart possibly from orderedness.
15541 But, never change orderedness if TARGET_IEEE_FP, returning
15542 UNKNOWN in that case if necessary. */
15544 static enum rtx_code
15545 ix86_fp_swap_condition (enum rtx_code code)
15549 case GT: /* GTU - CF=0 & ZF=0 */
15550 return TARGET_IEEE_FP ? UNKNOWN : UNLT;
15551 case GE: /* GEU - CF=0 */
15552 return TARGET_IEEE_FP ? UNKNOWN : UNLE;
15553 case UNLT: /* LTU - CF=1 */
15554 return TARGET_IEEE_FP ? UNKNOWN : GT;
15555 case UNLE: /* LEU - CF=1 | ZF=1 */
15556 return TARGET_IEEE_FP ? UNKNOWN : GE;
15558 return swap_condition (code);
15562 /* Return cost of comparison CODE using the best strategy for performance.
15563 All following functions do use number of instructions as a cost metrics.
15564 In future this should be tweaked to compute bytes for optimize_size and
15565 take into account performance of various instructions on various CPUs. */
15568 ix86_fp_comparison_cost (enum rtx_code code)
15572 /* The cost of code using bit-twiddling on %ah. */
15589 arith_cost = TARGET_IEEE_FP ? 5 : 4;
15593 arith_cost = TARGET_IEEE_FP ? 6 : 4;
15596 gcc_unreachable ();
15599 switch (ix86_fp_comparison_strategy (code))
15601 case IX86_FPCMP_COMI:
15602 return arith_cost > 4 ? 3 : 2;
15603 case IX86_FPCMP_SAHF:
15604 return arith_cost > 4 ? 4 : 3;
15610 /* Return strategy to use for floating-point. We assume that fcomi is always
15611 preferrable where available, since that is also true when looking at size
15612 (2 bytes, vs. 3 for fnstsw+sahf and at least 5 for fnstsw+test). */
15614 enum ix86_fpcmp_strategy
15615 ix86_fp_comparison_strategy (enum rtx_code code ATTRIBUTE_UNUSED)
15617 /* Do fcomi/sahf based test when profitable. */
15620 return IX86_FPCMP_COMI;
15622 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_function_for_size_p (cfun)))
15623 return IX86_FPCMP_SAHF;
15625 return IX86_FPCMP_ARITH;
15628 /* Swap, force into registers, or otherwise massage the two operands
15629 to a fp comparison. The operands are updated in place; the new
15630 comparison code is returned. */
15632 static enum rtx_code
15633 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
15635 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
15636 rtx op0 = *pop0, op1 = *pop1;
15637 enum machine_mode op_mode = GET_MODE (op0);
15638 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
15640 /* All of the unordered compare instructions only work on registers.
15641 The same is true of the fcomi compare instructions. The XFmode
15642 compare instructions require registers except when comparing
15643 against zero or when converting operand 1 from fixed point to
15647 && (fpcmp_mode == CCFPUmode
15648 || (op_mode == XFmode
15649 && ! (standard_80387_constant_p (op0) == 1
15650 || standard_80387_constant_p (op1) == 1)
15651 && GET_CODE (op1) != FLOAT)
15652 || ix86_fp_comparison_strategy (code) == IX86_FPCMP_COMI))
15654 op0 = force_reg (op_mode, op0);
15655 op1 = force_reg (op_mode, op1);
15659 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
15660 things around if they appear profitable, otherwise force op0
15661 into a register. */
15663 if (standard_80387_constant_p (op0) == 0
15665 && ! (standard_80387_constant_p (op1) == 0
15668 enum rtx_code new_code = ix86_fp_swap_condition (code);
15669 if (new_code != UNKNOWN)
15672 tmp = op0, op0 = op1, op1 = tmp;
15678 op0 = force_reg (op_mode, op0);
15680 if (CONSTANT_P (op1))
15682 int tmp = standard_80387_constant_p (op1);
15684 op1 = validize_mem (force_const_mem (op_mode, op1));
15688 op1 = force_reg (op_mode, op1);
15691 op1 = force_reg (op_mode, op1);
15695 /* Try to rearrange the comparison to make it cheaper. */
15696 if (ix86_fp_comparison_cost (code)
15697 > ix86_fp_comparison_cost (swap_condition (code))
15698 && (REG_P (op1) || can_create_pseudo_p ()))
15701 tmp = op0, op0 = op1, op1 = tmp;
15702 code = swap_condition (code);
15704 op0 = force_reg (op_mode, op0);
15712 /* Convert comparison codes we use to represent FP comparison to integer
15713 code that will result in proper branch. Return UNKNOWN if no such code
15717 ix86_fp_compare_code_to_integer (enum rtx_code code)
15746 /* Generate insn patterns to do a floating point compare of OPERANDS. */
15749 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch)
15751 enum machine_mode fpcmp_mode, intcmp_mode;
15754 fpcmp_mode = ix86_fp_compare_mode (code);
15755 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
15757 /* Do fcomi/sahf based test when profitable. */
15758 switch (ix86_fp_comparison_strategy (code))
15760 case IX86_FPCMP_COMI:
15761 intcmp_mode = fpcmp_mode;
15762 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15763 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15768 case IX86_FPCMP_SAHF:
15769 intcmp_mode = fpcmp_mode;
15770 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15771 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
15775 scratch = gen_reg_rtx (HImode);
15776 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
15777 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
15780 case IX86_FPCMP_ARITH:
15781 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
15782 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
15783 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
15785 scratch = gen_reg_rtx (HImode);
15786 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
15788 /* In the unordered case, we have to check C2 for NaN's, which
15789 doesn't happen to work out to anything nice combination-wise.
15790 So do some bit twiddling on the value we've got in AH to come
15791 up with an appropriate set of condition codes. */
15793 intcmp_mode = CCNOmode;
15798 if (code == GT || !TARGET_IEEE_FP)
15800 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15805 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15806 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15807 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
15808 intcmp_mode = CCmode;
15814 if (code == LT && TARGET_IEEE_FP)
15816 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15817 emit_insn (gen_cmpqi_ext_3 (scratch, const1_rtx));
15818 intcmp_mode = CCmode;
15823 emit_insn (gen_testqi_ext_ccno_0 (scratch, const1_rtx));
15829 if (code == GE || !TARGET_IEEE_FP)
15831 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
15836 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15837 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, const1_rtx));
15843 if (code == LE && TARGET_IEEE_FP)
15845 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15846 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
15847 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15848 intcmp_mode = CCmode;
15853 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
15859 if (code == EQ && TARGET_IEEE_FP)
15861 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15862 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
15863 intcmp_mode = CCmode;
15868 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15874 if (code == NE && TARGET_IEEE_FP)
15876 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
15877 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
15883 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
15889 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15893 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
15898 gcc_unreachable ();
15906 /* Return the test that should be put into the flags user, i.e.
15907 the bcc, scc, or cmov instruction. */
15908 return gen_rtx_fmt_ee (code, VOIDmode,
15909 gen_rtx_REG (intcmp_mode, FLAGS_REG),
15914 ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1)
15918 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
15919 ret = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
15921 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
15923 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
15924 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
15927 ret = ix86_expand_int_compare (code, op0, op1);
15933 ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
15937 switch (GET_MODE (op0))
15946 tmp = ix86_expand_compare (code, op0, op1);
15947 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15948 gen_rtx_LABEL_REF (VOIDmode, label),
15950 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15957 /* Expand DImode branch into multiple compare+branch. */
15959 rtx lo[2], hi[2], label2;
15960 enum rtx_code code1, code2, code3;
15961 enum machine_mode submode;
15963 if (CONSTANT_P (op0) && !CONSTANT_P (op1))
15965 tmp = op0, op0 = op1, op1 = tmp;
15966 code = swap_condition (code);
15968 if (GET_MODE (op0) == DImode)
15970 split_di (&op0, 1, lo+0, hi+0);
15971 split_di (&op1, 1, lo+1, hi+1);
15976 split_ti (&op0, 1, lo+0, hi+0);
15977 split_ti (&op1, 1, lo+1, hi+1);
15981 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
15982 avoid two branches. This costs one extra insn, so disable when
15983 optimizing for size. */
15985 if ((code == EQ || code == NE)
15986 && (!optimize_insn_for_size_p ()
15987 || hi[1] == const0_rtx || lo[1] == const0_rtx))
15992 if (hi[1] != const0_rtx)
15993 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
15994 NULL_RTX, 0, OPTAB_WIDEN);
15997 if (lo[1] != const0_rtx)
15998 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
15999 NULL_RTX, 0, OPTAB_WIDEN);
16001 tmp = expand_binop (submode, ior_optab, xor1, xor0,
16002 NULL_RTX, 0, OPTAB_WIDEN);
16004 ix86_expand_branch (code, tmp, const0_rtx, label);
16008 /* Otherwise, if we are doing less-than or greater-or-equal-than,
16009 op1 is a constant and the low word is zero, then we can just
16010 examine the high word. Similarly for low word -1 and
16011 less-or-equal-than or greater-than. */
16013 if (CONST_INT_P (hi[1]))
16016 case LT: case LTU: case GE: case GEU:
16017 if (lo[1] == const0_rtx)
16019 ix86_expand_branch (code, hi[0], hi[1], label);
16023 case LE: case LEU: case GT: case GTU:
16024 if (lo[1] == constm1_rtx)
16026 ix86_expand_branch (code, hi[0], hi[1], label);
16034 /* Otherwise, we need two or three jumps. */
16036 label2 = gen_label_rtx ();
16039 code2 = swap_condition (code);
16040 code3 = unsigned_condition (code);
16044 case LT: case GT: case LTU: case GTU:
16047 case LE: code1 = LT; code2 = GT; break;
16048 case GE: code1 = GT; code2 = LT; break;
16049 case LEU: code1 = LTU; code2 = GTU; break;
16050 case GEU: code1 = GTU; code2 = LTU; break;
16052 case EQ: code1 = UNKNOWN; code2 = NE; break;
16053 case NE: code2 = UNKNOWN; break;
16056 gcc_unreachable ();
16061 * if (hi(a) < hi(b)) goto true;
16062 * if (hi(a) > hi(b)) goto false;
16063 * if (lo(a) < lo(b)) goto true;
16067 if (code1 != UNKNOWN)
16068 ix86_expand_branch (code1, hi[0], hi[1], label);
16069 if (code2 != UNKNOWN)
16070 ix86_expand_branch (code2, hi[0], hi[1], label2);
16072 ix86_expand_branch (code3, lo[0], lo[1], label);
16074 if (code2 != UNKNOWN)
16075 emit_label (label2);
16080 gcc_assert (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC);
16085 /* Split branch based on floating point condition. */
16087 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
16088 rtx target1, rtx target2, rtx tmp, rtx pushed)
16093 if (target2 != pc_rtx)
16096 code = reverse_condition_maybe_unordered (code);
16101 condition = ix86_expand_fp_compare (code, op1, op2,
16104 /* Remove pushed operand from stack. */
16106 ix86_free_from_memory (GET_MODE (pushed));
16108 i = emit_jump_insn (gen_rtx_SET
16110 gen_rtx_IF_THEN_ELSE (VOIDmode,
16111 condition, target1, target2)));
16112 if (split_branch_probability >= 0)
16113 add_reg_note (i, REG_BR_PROB, GEN_INT (split_branch_probability));
16117 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16121 gcc_assert (GET_MODE (dest) == QImode);
16123 ret = ix86_expand_compare (code, op0, op1);
16124 PUT_MODE (ret, QImode);
16125 emit_insn (gen_rtx_SET (VOIDmode, dest, ret));
16128 /* Expand comparison setting or clearing carry flag. Return true when
16129 successful and set pop for the operation. */
16131 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
16133 enum machine_mode mode =
16134 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
16136 /* Do not handle DImode compares that go through special path. */
16137 if (mode == (TARGET_64BIT ? TImode : DImode))
16140 if (SCALAR_FLOAT_MODE_P (mode))
16142 rtx compare_op, compare_seq;
16144 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
16146 /* Shortcut: following common codes never translate
16147 into carry flag compares. */
16148 if (code == EQ || code == NE || code == UNEQ || code == LTGT
16149 || code == ORDERED || code == UNORDERED)
16152 /* These comparisons require zero flag; swap operands so they won't. */
16153 if ((code == GT || code == UNLE || code == LE || code == UNGT)
16154 && !TARGET_IEEE_FP)
16159 code = swap_condition (code);
16162 /* Try to expand the comparison and verify that we end up with
16163 carry flag based comparison. This fails to be true only when
16164 we decide to expand comparison using arithmetic that is not
16165 too common scenario. */
16167 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX);
16168 compare_seq = get_insns ();
16171 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16172 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16173 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
16175 code = GET_CODE (compare_op);
16177 if (code != LTU && code != GEU)
16180 emit_insn (compare_seq);
16185 if (!INTEGRAL_MODE_P (mode))
16194 /* Convert a==0 into (unsigned)a<1. */
16197 if (op1 != const0_rtx)
16200 code = (code == EQ ? LTU : GEU);
16203 /* Convert a>b into b<a or a>=b-1. */
16206 if (CONST_INT_P (op1))
16208 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
16209 /* Bail out on overflow. We still can swap operands but that
16210 would force loading of the constant into register. */
16211 if (op1 == const0_rtx
16212 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
16214 code = (code == GTU ? GEU : LTU);
16221 code = (code == GTU ? LTU : GEU);
16225 /* Convert a>=0 into (unsigned)a<0x80000000. */
16228 if (mode == DImode || op1 != const0_rtx)
16230 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16231 code = (code == LT ? GEU : LTU);
16235 if (mode == DImode || op1 != constm1_rtx)
16237 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
16238 code = (code == LE ? GEU : LTU);
16244 /* Swapping operands may cause constant to appear as first operand. */
16245 if (!nonimmediate_operand (op0, VOIDmode))
16247 if (!can_create_pseudo_p ())
16249 op0 = force_reg (mode, op0);
16251 *pop = ix86_expand_compare (code, op0, op1);
16252 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
16257 ix86_expand_int_movcc (rtx operands[])
16259 enum rtx_code code = GET_CODE (operands[1]), compare_code;
16260 rtx compare_seq, compare_op;
16261 enum machine_mode mode = GET_MODE (operands[0]);
16262 bool sign_bit_compare_p = false;
16263 rtx op0 = XEXP (operands[1], 0);
16264 rtx op1 = XEXP (operands[1], 1);
16267 compare_op = ix86_expand_compare (code, op0, op1);
16268 compare_seq = get_insns ();
16271 compare_code = GET_CODE (compare_op);
16273 if ((op1 == const0_rtx && (code == GE || code == LT))
16274 || (op1 == constm1_rtx && (code == GT || code == LE)))
16275 sign_bit_compare_p = true;
16277 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
16278 HImode insns, we'd be swallowed in word prefix ops. */
16280 if ((mode != HImode || TARGET_FAST_PREFIX)
16281 && (mode != (TARGET_64BIT ? TImode : DImode))
16282 && CONST_INT_P (operands[2])
16283 && CONST_INT_P (operands[3]))
16285 rtx out = operands[0];
16286 HOST_WIDE_INT ct = INTVAL (operands[2]);
16287 HOST_WIDE_INT cf = INTVAL (operands[3]);
16288 HOST_WIDE_INT diff;
16291 /* Sign bit compares are better done using shifts than we do by using
16293 if (sign_bit_compare_p
16294 || ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
16296 /* Detect overlap between destination and compare sources. */
16299 if (!sign_bit_compare_p)
16302 bool fpcmp = false;
16304 compare_code = GET_CODE (compare_op);
16306 flags = XEXP (compare_op, 0);
16308 if (GET_MODE (flags) == CCFPmode
16309 || GET_MODE (flags) == CCFPUmode)
16313 = ix86_fp_compare_code_to_integer (compare_code);
16316 /* To simplify rest of code, restrict to the GEU case. */
16317 if (compare_code == LTU)
16319 HOST_WIDE_INT tmp = ct;
16322 compare_code = reverse_condition (compare_code);
16323 code = reverse_condition (code);
16328 PUT_CODE (compare_op,
16329 reverse_condition_maybe_unordered
16330 (GET_CODE (compare_op)));
16332 PUT_CODE (compare_op,
16333 reverse_condition (GET_CODE (compare_op)));
16337 if (reg_overlap_mentioned_p (out, op0)
16338 || reg_overlap_mentioned_p (out, op1))
16339 tmp = gen_reg_rtx (mode);
16341 if (mode == DImode)
16342 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op));
16344 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp),
16345 flags, compare_op));
16349 if (code == GT || code == GE)
16350 code = reverse_condition (code);
16353 HOST_WIDE_INT tmp = ct;
16358 tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
16371 tmp = expand_simple_binop (mode, PLUS,
16373 copy_rtx (tmp), 1, OPTAB_DIRECT);
16384 tmp = expand_simple_binop (mode, IOR,
16386 copy_rtx (tmp), 1, OPTAB_DIRECT);
16388 else if (diff == -1 && ct)
16398 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16400 tmp = expand_simple_binop (mode, PLUS,
16401 copy_rtx (tmp), GEN_INT (cf),
16402 copy_rtx (tmp), 1, OPTAB_DIRECT);
16410 * andl cf - ct, dest
16420 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
16423 tmp = expand_simple_binop (mode, AND,
16425 gen_int_mode (cf - ct, mode),
16426 copy_rtx (tmp), 1, OPTAB_DIRECT);
16428 tmp = expand_simple_binop (mode, PLUS,
16429 copy_rtx (tmp), GEN_INT (ct),
16430 copy_rtx (tmp), 1, OPTAB_DIRECT);
16433 if (!rtx_equal_p (tmp, out))
16434 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
16436 return 1; /* DONE */
16441 enum machine_mode cmp_mode = GET_MODE (op0);
16444 tmp = ct, ct = cf, cf = tmp;
16447 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16449 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16451 /* We may be reversing unordered compare to normal compare, that
16452 is not valid in general (we may convert non-trapping condition
16453 to trapping one), however on i386 we currently emit all
16454 comparisons unordered. */
16455 compare_code = reverse_condition_maybe_unordered (compare_code);
16456 code = reverse_condition_maybe_unordered (code);
16460 compare_code = reverse_condition (compare_code);
16461 code = reverse_condition (code);
16465 compare_code = UNKNOWN;
16466 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT
16467 && CONST_INT_P (op1))
16469 if (op1 == const0_rtx
16470 && (code == LT || code == GE))
16471 compare_code = code;
16472 else if (op1 == constm1_rtx)
16476 else if (code == GT)
16481 /* Optimize dest = (op0 < 0) ? -1 : cf. */
16482 if (compare_code != UNKNOWN
16483 && GET_MODE (op0) == GET_MODE (out)
16484 && (cf == -1 || ct == -1))
16486 /* If lea code below could be used, only optimize
16487 if it results in a 2 insn sequence. */
16489 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
16490 || diff == 3 || diff == 5 || diff == 9)
16491 || (compare_code == LT && ct == -1)
16492 || (compare_code == GE && cf == -1))
16495 * notl op1 (if necessary)
16503 code = reverse_condition (code);
16506 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16508 out = expand_simple_binop (mode, IOR,
16510 out, 1, OPTAB_DIRECT);
16511 if (out != operands[0])
16512 emit_move_insn (operands[0], out);
16514 return 1; /* DONE */
16519 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
16520 || diff == 3 || diff == 5 || diff == 9)
16521 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
16523 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
16529 * lea cf(dest*(ct-cf)),dest
16533 * This also catches the degenerate setcc-only case.
16539 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16542 /* On x86_64 the lea instruction operates on Pmode, so we need
16543 to get arithmetics done in proper mode to match. */
16545 tmp = copy_rtx (out);
16549 out1 = copy_rtx (out);
16550 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
16554 tmp = gen_rtx_PLUS (mode, tmp, out1);
16560 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
16563 if (!rtx_equal_p (tmp, out))
16566 out = force_operand (tmp, copy_rtx (out));
16568 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
16570 if (!rtx_equal_p (out, operands[0]))
16571 emit_move_insn (operands[0], copy_rtx (out));
16573 return 1; /* DONE */
16577 * General case: Jumpful:
16578 * xorl dest,dest cmpl op1, op2
16579 * cmpl op1, op2 movl ct, dest
16580 * setcc dest jcc 1f
16581 * decl dest movl cf, dest
16582 * andl (cf-ct),dest 1:
16585 * Size 20. Size 14.
16587 * This is reasonably steep, but branch mispredict costs are
16588 * high on modern cpus, so consider failing only if optimizing
16592 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16593 && BRANCH_COST (optimize_insn_for_speed_p (),
16598 enum machine_mode cmp_mode = GET_MODE (op0);
16603 if (SCALAR_FLOAT_MODE_P (cmp_mode))
16605 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
16607 /* We may be reversing unordered compare to normal compare,
16608 that is not valid in general (we may convert non-trapping
16609 condition to trapping one), however on i386 we currently
16610 emit all comparisons unordered. */
16611 code = reverse_condition_maybe_unordered (code);
16615 code = reverse_condition (code);
16616 if (compare_code != UNKNOWN)
16617 compare_code = reverse_condition (compare_code);
16621 if (compare_code != UNKNOWN)
16623 /* notl op1 (if needed)
16628 For x < 0 (resp. x <= -1) there will be no notl,
16629 so if possible swap the constants to get rid of the
16631 True/false will be -1/0 while code below (store flag
16632 followed by decrement) is 0/-1, so the constants need
16633 to be exchanged once more. */
16635 if (compare_code == GE || !cf)
16637 code = reverse_condition (code);
16642 HOST_WIDE_INT tmp = cf;
16647 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, -1);
16651 out = emit_store_flag (out, code, op0, op1, VOIDmode, 0, 1);
16653 out = expand_simple_binop (mode, PLUS, copy_rtx (out),
16655 copy_rtx (out), 1, OPTAB_DIRECT);
16658 out = expand_simple_binop (mode, AND, copy_rtx (out),
16659 gen_int_mode (cf - ct, mode),
16660 copy_rtx (out), 1, OPTAB_DIRECT);
16662 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
16663 copy_rtx (out), 1, OPTAB_DIRECT);
16664 if (!rtx_equal_p (out, operands[0]))
16665 emit_move_insn (operands[0], copy_rtx (out));
16667 return 1; /* DONE */
16671 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
16673 /* Try a few things more with specific constants and a variable. */
16676 rtx var, orig_out, out, tmp;
16678 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
16679 return 0; /* FAIL */
16681 /* If one of the two operands is an interesting constant, load a
16682 constant with the above and mask it in with a logical operation. */
16684 if (CONST_INT_P (operands[2]))
16687 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
16688 operands[3] = constm1_rtx, op = and_optab;
16689 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
16690 operands[3] = const0_rtx, op = ior_optab;
16692 return 0; /* FAIL */
16694 else if (CONST_INT_P (operands[3]))
16697 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
16698 operands[2] = constm1_rtx, op = and_optab;
16699 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
16700 operands[2] = const0_rtx, op = ior_optab;
16702 return 0; /* FAIL */
16705 return 0; /* FAIL */
16707 orig_out = operands[0];
16708 tmp = gen_reg_rtx (mode);
16711 /* Recurse to get the constant loaded. */
16712 if (ix86_expand_int_movcc (operands) == 0)
16713 return 0; /* FAIL */
16715 /* Mask in the interesting variable. */
16716 out = expand_binop (mode, op, var, tmp, orig_out, 0,
16718 if (!rtx_equal_p (out, orig_out))
16719 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
16721 return 1; /* DONE */
16725 * For comparison with above,
16735 if (! nonimmediate_operand (operands[2], mode))
16736 operands[2] = force_reg (mode, operands[2]);
16737 if (! nonimmediate_operand (operands[3], mode))
16738 operands[3] = force_reg (mode, operands[3]);
16740 if (! register_operand (operands[2], VOIDmode)
16742 || ! register_operand (operands[3], VOIDmode)))
16743 operands[2] = force_reg (mode, operands[2]);
16746 && ! register_operand (operands[3], VOIDmode))
16747 operands[3] = force_reg (mode, operands[3]);
16749 emit_insn (compare_seq);
16750 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
16751 gen_rtx_IF_THEN_ELSE (mode,
16752 compare_op, operands[2],
16755 return 1; /* DONE */
16758 /* Swap, force into registers, or otherwise massage the two operands
16759 to an sse comparison with a mask result. Thus we differ a bit from
16760 ix86_prepare_fp_compare_args which expects to produce a flags result.
16762 The DEST operand exists to help determine whether to commute commutative
16763 operators. The POP0/POP1 operands are updated in place. The new
16764 comparison code is returned, or UNKNOWN if not implementable. */
16766 static enum rtx_code
16767 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
16768 rtx *pop0, rtx *pop1)
16776 /* We have no LTGT as an operator. We could implement it with
16777 NE & ORDERED, but this requires an extra temporary. It's
16778 not clear that it's worth it. */
16785 /* These are supported directly. */
16792 /* For commutative operators, try to canonicalize the destination
16793 operand to be first in the comparison - this helps reload to
16794 avoid extra moves. */
16795 if (!dest || !rtx_equal_p (dest, *pop1))
16803 /* These are not supported directly. Swap the comparison operands
16804 to transform into something that is supported. */
16808 code = swap_condition (code);
16812 gcc_unreachable ();
16818 /* Detect conditional moves that exactly match min/max operational
16819 semantics. Note that this is IEEE safe, as long as we don't
16820 interchange the operands.
16822 Returns FALSE if this conditional move doesn't match a MIN/MAX,
16823 and TRUE if the operation is successful and instructions are emitted. */
16826 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
16827 rtx cmp_op1, rtx if_true, rtx if_false)
16829 enum machine_mode mode;
16835 else if (code == UNGE)
16838 if_true = if_false;
16844 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
16846 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
16851 mode = GET_MODE (dest);
16853 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
16854 but MODE may be a vector mode and thus not appropriate. */
16855 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
16857 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
16860 if_true = force_reg (mode, if_true);
16861 v = gen_rtvec (2, if_true, if_false);
16862 tmp = gen_rtx_UNSPEC (mode, v, u);
16866 code = is_min ? SMIN : SMAX;
16867 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
16870 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
16874 /* Expand an sse vector comparison. Return the register with the result. */
16877 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
16878 rtx op_true, rtx op_false)
16880 enum machine_mode mode = GET_MODE (dest);
16883 cmp_op0 = force_reg (mode, cmp_op0);
16884 if (!nonimmediate_operand (cmp_op1, mode))
16885 cmp_op1 = force_reg (mode, cmp_op1);
16888 || reg_overlap_mentioned_p (dest, op_true)
16889 || reg_overlap_mentioned_p (dest, op_false))
16890 dest = gen_reg_rtx (mode);
16892 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
16893 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16898 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
16899 operations. This is used for both scalar and vector conditional moves. */
16902 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
16904 enum machine_mode mode = GET_MODE (dest);
16907 if (op_false == CONST0_RTX (mode))
16909 op_true = force_reg (mode, op_true);
16910 x = gen_rtx_AND (mode, cmp, op_true);
16911 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16913 else if (op_true == CONST0_RTX (mode))
16915 op_false = force_reg (mode, op_false);
16916 x = gen_rtx_NOT (mode, cmp);
16917 x = gen_rtx_AND (mode, x, op_false);
16918 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16920 else if (TARGET_XOP)
16922 rtx pcmov = gen_rtx_SET (mode, dest,
16923 gen_rtx_IF_THEN_ELSE (mode, cmp,
16930 op_true = force_reg (mode, op_true);
16931 op_false = force_reg (mode, op_false);
16933 t2 = gen_reg_rtx (mode);
16935 t3 = gen_reg_rtx (mode);
16939 x = gen_rtx_AND (mode, op_true, cmp);
16940 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
16942 x = gen_rtx_NOT (mode, cmp);
16943 x = gen_rtx_AND (mode, x, op_false);
16944 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
16946 x = gen_rtx_IOR (mode, t3, t2);
16947 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
16951 /* Expand a floating-point conditional move. Return true if successful. */
16954 ix86_expand_fp_movcc (rtx operands[])
16956 enum machine_mode mode = GET_MODE (operands[0]);
16957 enum rtx_code code = GET_CODE (operands[1]);
16958 rtx tmp, compare_op;
16959 rtx op0 = XEXP (operands[1], 0);
16960 rtx op1 = XEXP (operands[1], 1);
16962 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
16964 enum machine_mode cmode;
16966 /* Since we've no cmove for sse registers, don't force bad register
16967 allocation just to gain access to it. Deny movcc when the
16968 comparison mode doesn't match the move mode. */
16969 cmode = GET_MODE (op0);
16970 if (cmode == VOIDmode)
16971 cmode = GET_MODE (op1);
16975 code = ix86_prepare_sse_fp_compare_args (operands[0], code, &op0, &op1);
16976 if (code == UNKNOWN)
16979 if (ix86_expand_sse_fp_minmax (operands[0], code, op0, op1,
16980 operands[2], operands[3]))
16983 tmp = ix86_expand_sse_cmp (operands[0], code, op0, op1,
16984 operands[2], operands[3]);
16985 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
16989 /* The floating point conditional move instructions don't directly
16990 support conditions resulting from a signed integer comparison. */
16992 compare_op = ix86_expand_compare (code, op0, op1);
16993 if (!fcmov_comparison_operator (compare_op, VOIDmode))
16995 tmp = gen_reg_rtx (QImode);
16996 ix86_expand_setcc (tmp, code, op0, op1);
16998 compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
17001 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
17002 gen_rtx_IF_THEN_ELSE (mode, compare_op,
17003 operands[2], operands[3])));
17008 /* Expand a floating-point vector conditional move; a vcond operation
17009 rather than a movcc operation. */
17012 ix86_expand_fp_vcond (rtx operands[])
17014 enum rtx_code code = GET_CODE (operands[3]);
17017 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
17018 &operands[4], &operands[5]);
17019 if (code == UNKNOWN)
17022 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
17023 operands[5], operands[1], operands[2]))
17026 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
17027 operands[1], operands[2]);
17028 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
17032 /* Expand a signed/unsigned integral vector conditional move. */
17035 ix86_expand_int_vcond (rtx operands[])
17037 enum machine_mode mode = GET_MODE (operands[0]);
17038 enum rtx_code code = GET_CODE (operands[3]);
17039 bool negate = false;
17042 cop0 = operands[4];
17043 cop1 = operands[5];
17045 /* XOP supports all of the comparisons on all vector int types. */
17048 /* Canonicalize the comparison to EQ, GT, GTU. */
17059 code = reverse_condition (code);
17065 code = reverse_condition (code);
17071 code = swap_condition (code);
17072 x = cop0, cop0 = cop1, cop1 = x;
17076 gcc_unreachable ();
17079 /* Only SSE4.1/SSE4.2 supports V2DImode. */
17080 if (mode == V2DImode)
17085 /* SSE4.1 supports EQ. */
17086 if (!TARGET_SSE4_1)
17092 /* SSE4.2 supports GT/GTU. */
17093 if (!TARGET_SSE4_2)
17098 gcc_unreachable ();
17102 /* Unsigned parallel compare is not supported by the hardware.
17103 Play some tricks to turn this into a signed comparison
17107 cop0 = force_reg (mode, cop0);
17115 rtx (*gen_sub3) (rtx, rtx, rtx);
17117 /* Subtract (-(INT MAX) - 1) from both operands to make
17119 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
17121 gen_sub3 = (mode == V4SImode
17122 ? gen_subv4si3 : gen_subv2di3);
17123 t1 = gen_reg_rtx (mode);
17124 emit_insn (gen_sub3 (t1, cop0, mask));
17126 t2 = gen_reg_rtx (mode);
17127 emit_insn (gen_sub3 (t2, cop1, mask));
17137 /* Perform a parallel unsigned saturating subtraction. */
17138 x = gen_reg_rtx (mode);
17139 emit_insn (gen_rtx_SET (VOIDmode, x,
17140 gen_rtx_US_MINUS (mode, cop0, cop1)));
17143 cop1 = CONST0_RTX (mode);
17149 gcc_unreachable ();
17154 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
17155 operands[1+negate], operands[2-negate]);
17157 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
17158 operands[2-negate]);
17162 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
17163 true if we should do zero extension, else sign extension. HIGH_P is
17164 true if we want the N/2 high elements, else the low elements. */
17167 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17169 enum machine_mode imode = GET_MODE (operands[1]);
17170 rtx (*unpack)(rtx, rtx, rtx);
17177 unpack = gen_vec_interleave_highv16qi;
17179 unpack = gen_vec_interleave_lowv16qi;
17183 unpack = gen_vec_interleave_highv8hi;
17185 unpack = gen_vec_interleave_lowv8hi;
17189 unpack = gen_vec_interleave_highv4si;
17191 unpack = gen_vec_interleave_lowv4si;
17194 gcc_unreachable ();
17197 dest = gen_lowpart (imode, operands[0]);
17200 se = force_reg (imode, CONST0_RTX (imode));
17202 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
17203 operands[1], pc_rtx, pc_rtx);
17205 emit_insn (unpack (dest, operands[1], se));
17208 /* This function performs the same task as ix86_expand_sse_unpack,
17209 but with SSE4.1 instructions. */
17212 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
17214 enum machine_mode imode = GET_MODE (operands[1]);
17215 rtx (*unpack)(rtx, rtx);
17222 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
17224 unpack = gen_sse4_1_extendv8qiv8hi2;
17228 unpack = gen_sse4_1_zero_extendv4hiv4si2;
17230 unpack = gen_sse4_1_extendv4hiv4si2;
17234 unpack = gen_sse4_1_zero_extendv2siv2di2;
17236 unpack = gen_sse4_1_extendv2siv2di2;
17239 gcc_unreachable ();
17242 dest = operands[0];
17245 /* Shift higher 8 bytes to lower 8 bytes. */
17246 src = gen_reg_rtx (imode);
17247 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, src),
17248 gen_lowpart (V1TImode, operands[1]),
17254 emit_insn (unpack (dest, src));
17257 /* Expand conditional increment or decrement using adb/sbb instructions.
17258 The default case using setcc followed by the conditional move can be
17259 done by generic code. */
17261 ix86_expand_int_addcc (rtx operands[])
17263 enum rtx_code code = GET_CODE (operands[1]);
17265 rtx (*insn)(rtx, rtx, rtx, rtx, rtx);
17267 rtx val = const0_rtx;
17268 bool fpcmp = false;
17269 enum machine_mode mode;
17270 rtx op0 = XEXP (operands[1], 0);
17271 rtx op1 = XEXP (operands[1], 1);
17273 if (operands[3] != const1_rtx
17274 && operands[3] != constm1_rtx)
17276 if (!ix86_expand_carry_flag_compare (code, op0, op1, &compare_op))
17278 code = GET_CODE (compare_op);
17280 flags = XEXP (compare_op, 0);
17282 if (GET_MODE (flags) == CCFPmode
17283 || GET_MODE (flags) == CCFPUmode)
17286 code = ix86_fp_compare_code_to_integer (code);
17293 PUT_CODE (compare_op,
17294 reverse_condition_maybe_unordered
17295 (GET_CODE (compare_op)));
17297 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
17300 mode = GET_MODE (operands[0]);
17302 /* Construct either adc or sbb insn. */
17303 if ((code == LTU) == (operands[3] == constm1_rtx))
17308 insn = gen_subqi3_carry;
17311 insn = gen_subhi3_carry;
17314 insn = gen_subsi3_carry;
17317 insn = gen_subdi3_carry;
17320 gcc_unreachable ();
17328 insn = gen_addqi3_carry;
17331 insn = gen_addhi3_carry;
17334 insn = gen_addsi3_carry;
17337 insn = gen_adddi3_carry;
17340 gcc_unreachable ();
17343 emit_insn (insn (operands[0], operands[2], val, flags, compare_op));
17345 return 1; /* DONE */
17349 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
17350 works for floating pointer parameters and nonoffsetable memories.
17351 For pushes, it returns just stack offsets; the values will be saved
17352 in the right order. Maximally three parts are generated. */
17355 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
17360 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
17362 size = (GET_MODE_SIZE (mode) + 4) / 8;
17364 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
17365 gcc_assert (size >= 2 && size <= 4);
17367 /* Optimize constant pool reference to immediates. This is used by fp
17368 moves, that force all constants to memory to allow combining. */
17369 if (MEM_P (operand) && MEM_READONLY_P (operand))
17371 rtx tmp = maybe_get_pool_constant (operand);
17376 if (MEM_P (operand) && !offsettable_memref_p (operand))
17378 /* The only non-offsetable memories we handle are pushes. */
17379 int ok = push_operand (operand, VOIDmode);
17383 operand = copy_rtx (operand);
17384 PUT_MODE (operand, Pmode);
17385 parts[0] = parts[1] = parts[2] = parts[3] = operand;
17389 if (GET_CODE (operand) == CONST_VECTOR)
17391 enum machine_mode imode = int_mode_for_mode (mode);
17392 /* Caution: if we looked through a constant pool memory above,
17393 the operand may actually have a different mode now. That's
17394 ok, since we want to pun this all the way back to an integer. */
17395 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
17396 gcc_assert (operand != NULL);
17402 if (mode == DImode)
17403 split_di (&operand, 1, &parts[0], &parts[1]);
17408 if (REG_P (operand))
17410 gcc_assert (reload_completed);
17411 for (i = 0; i < size; i++)
17412 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
17414 else if (offsettable_memref_p (operand))
17416 operand = adjust_address (operand, SImode, 0);
17417 parts[0] = operand;
17418 for (i = 1; i < size; i++)
17419 parts[i] = adjust_address (operand, SImode, 4 * i);
17421 else if (GET_CODE (operand) == CONST_DOUBLE)
17426 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17430 real_to_target (l, &r, mode);
17431 parts[3] = gen_int_mode (l[3], SImode);
17432 parts[2] = gen_int_mode (l[2], SImode);
17435 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
17436 parts[2] = gen_int_mode (l[2], SImode);
17439 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
17442 gcc_unreachable ();
17444 parts[1] = gen_int_mode (l[1], SImode);
17445 parts[0] = gen_int_mode (l[0], SImode);
17448 gcc_unreachable ();
17453 if (mode == TImode)
17454 split_ti (&operand, 1, &parts[0], &parts[1]);
17455 if (mode == XFmode || mode == TFmode)
17457 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
17458 if (REG_P (operand))
17460 gcc_assert (reload_completed);
17461 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
17462 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
17464 else if (offsettable_memref_p (operand))
17466 operand = adjust_address (operand, DImode, 0);
17467 parts[0] = operand;
17468 parts[1] = adjust_address (operand, upper_mode, 8);
17470 else if (GET_CODE (operand) == CONST_DOUBLE)
17475 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
17476 real_to_target (l, &r, mode);
17478 /* Do not use shift by 32 to avoid warning on 32bit systems. */
17479 if (HOST_BITS_PER_WIDE_INT >= 64)
17482 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
17483 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
17486 parts[0] = immed_double_const (l[0], l[1], DImode);
17488 if (upper_mode == SImode)
17489 parts[1] = gen_int_mode (l[2], SImode);
17490 else if (HOST_BITS_PER_WIDE_INT >= 64)
17493 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
17494 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
17497 parts[1] = immed_double_const (l[2], l[3], DImode);
17500 gcc_unreachable ();
17507 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
17508 Return false when normal moves are needed; true when all required
17509 insns have been emitted. Operands 2-4 contain the input values
17510 int the correct order; operands 5-7 contain the output values. */
17513 ix86_split_long_move (rtx operands[])
17518 int collisions = 0;
17519 enum machine_mode mode = GET_MODE (operands[0]);
17520 bool collisionparts[4];
17522 /* The DFmode expanders may ask us to move double.
17523 For 64bit target this is single move. By hiding the fact
17524 here we simplify i386.md splitters. */
17525 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
17527 /* Optimize constant pool reference to immediates. This is used by
17528 fp moves, that force all constants to memory to allow combining. */
17530 if (MEM_P (operands[1])
17531 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
17532 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
17533 operands[1] = get_pool_constant (XEXP (operands[1], 0));
17534 if (push_operand (operands[0], VOIDmode))
17536 operands[0] = copy_rtx (operands[0]);
17537 PUT_MODE (operands[0], Pmode);
17540 operands[0] = gen_lowpart (DImode, operands[0]);
17541 operands[1] = gen_lowpart (DImode, operands[1]);
17542 emit_move_insn (operands[0], operands[1]);
17546 /* The only non-offsettable memory we handle is push. */
17547 if (push_operand (operands[0], VOIDmode))
17550 gcc_assert (!MEM_P (operands[0])
17551 || offsettable_memref_p (operands[0]));
17553 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
17554 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
17556 /* When emitting push, take care for source operands on the stack. */
17557 if (push && MEM_P (operands[1])
17558 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
17560 rtx src_base = XEXP (part[1][nparts - 1], 0);
17562 /* Compensate for the stack decrement by 4. */
17563 if (!TARGET_64BIT && nparts == 3
17564 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
17565 src_base = plus_constant (src_base, 4);
17567 /* src_base refers to the stack pointer and is
17568 automatically decreased by emitted push. */
17569 for (i = 0; i < nparts; i++)
17570 part[1][i] = change_address (part[1][i],
17571 GET_MODE (part[1][i]), src_base);
17574 /* We need to do copy in the right order in case an address register
17575 of the source overlaps the destination. */
17576 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
17580 for (i = 0; i < nparts; i++)
17583 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
17584 if (collisionparts[i])
17588 /* Collision in the middle part can be handled by reordering. */
17589 if (collisions == 1 && nparts == 3 && collisionparts [1])
17591 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17592 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17594 else if (collisions == 1
17596 && (collisionparts [1] || collisionparts [2]))
17598 if (collisionparts [1])
17600 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
17601 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
17605 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
17606 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
17610 /* If there are more collisions, we can't handle it by reordering.
17611 Do an lea to the last part and use only one colliding move. */
17612 else if (collisions > 1)
17618 base = part[0][nparts - 1];
17620 /* Handle the case when the last part isn't valid for lea.
17621 Happens in 64-bit mode storing the 12-byte XFmode. */
17622 if (GET_MODE (base) != Pmode)
17623 base = gen_rtx_REG (Pmode, REGNO (base));
17625 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
17626 part[1][0] = replace_equiv_address (part[1][0], base);
17627 for (i = 1; i < nparts; i++)
17629 tmp = plus_constant (base, UNITS_PER_WORD * i);
17630 part[1][i] = replace_equiv_address (part[1][i], tmp);
17641 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
17642 emit_insn (gen_addsi3 (stack_pointer_rtx,
17643 stack_pointer_rtx, GEN_INT (-4)));
17644 emit_move_insn (part[0][2], part[1][2]);
17646 else if (nparts == 4)
17648 emit_move_insn (part[0][3], part[1][3]);
17649 emit_move_insn (part[0][2], part[1][2]);
17654 /* In 64bit mode we don't have 32bit push available. In case this is
17655 register, it is OK - we will just use larger counterpart. We also
17656 retype memory - these comes from attempt to avoid REX prefix on
17657 moving of second half of TFmode value. */
17658 if (GET_MODE (part[1][1]) == SImode)
17660 switch (GET_CODE (part[1][1]))
17663 part[1][1] = adjust_address (part[1][1], DImode, 0);
17667 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
17671 gcc_unreachable ();
17674 if (GET_MODE (part[1][0]) == SImode)
17675 part[1][0] = part[1][1];
17678 emit_move_insn (part[0][1], part[1][1]);
17679 emit_move_insn (part[0][0], part[1][0]);
17683 /* Choose correct order to not overwrite the source before it is copied. */
17684 if ((REG_P (part[0][0])
17685 && REG_P (part[1][1])
17686 && (REGNO (part[0][0]) == REGNO (part[1][1])
17688 && REGNO (part[0][0]) == REGNO (part[1][2]))
17690 && REGNO (part[0][0]) == REGNO (part[1][3]))))
17692 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
17694 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
17696 operands[2 + i] = part[0][j];
17697 operands[6 + i] = part[1][j];
17702 for (i = 0; i < nparts; i++)
17704 operands[2 + i] = part[0][i];
17705 operands[6 + i] = part[1][i];
17709 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
17710 if (optimize_insn_for_size_p ())
17712 for (j = 0; j < nparts - 1; j++)
17713 if (CONST_INT_P (operands[6 + j])
17714 && operands[6 + j] != const0_rtx
17715 && REG_P (operands[2 + j]))
17716 for (i = j; i < nparts - 1; i++)
17717 if (CONST_INT_P (operands[7 + i])
17718 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
17719 operands[7 + i] = operands[2 + j];
17722 for (i = 0; i < nparts; i++)
17723 emit_move_insn (operands[2 + i], operands[6 + i]);
17728 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
17729 left shift by a constant, either using a single shift or
17730 a sequence of add instructions. */
17733 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
17737 emit_insn ((mode == DImode
17739 : gen_adddi3) (operand, operand, operand));
17741 else if (!optimize_insn_for_size_p ()
17742 && count * ix86_cost->add <= ix86_cost->shift_const)
17745 for (i=0; i<count; i++)
17747 emit_insn ((mode == DImode
17749 : gen_adddi3) (operand, operand, operand));
17753 emit_insn ((mode == DImode
17755 : gen_ashldi3) (operand, operand, GEN_INT (count)));
17759 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
17761 rtx low[2], high[2];
17763 const int single_width = mode == DImode ? 32 : 64;
17765 if (CONST_INT_P (operands[2]))
17767 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17768 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17770 if (count >= single_width)
17772 emit_move_insn (high[0], low[1]);
17773 emit_move_insn (low[0], const0_rtx);
17775 if (count > single_width)
17776 ix86_expand_ashl_const (high[0], count - single_width, mode);
17780 if (!rtx_equal_p (operands[0], operands[1]))
17781 emit_move_insn (operands[0], operands[1]);
17782 emit_insn ((mode == DImode
17784 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
17785 ix86_expand_ashl_const (low[0], count, mode);
17790 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17792 if (operands[1] == const1_rtx)
17794 /* Assuming we've chosen a QImode capable registers, then 1 << N
17795 can be done with two 32/64-bit shifts, no branches, no cmoves. */
17796 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
17798 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
17800 ix86_expand_clear (low[0]);
17801 ix86_expand_clear (high[0]);
17802 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
17804 d = gen_lowpart (QImode, low[0]);
17805 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17806 s = gen_rtx_EQ (QImode, flags, const0_rtx);
17807 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17809 d = gen_lowpart (QImode, high[0]);
17810 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
17811 s = gen_rtx_NE (QImode, flags, const0_rtx);
17812 emit_insn (gen_rtx_SET (VOIDmode, d, s));
17815 /* Otherwise, we can get the same results by manually performing
17816 a bit extract operation on bit 5/6, and then performing the two
17817 shifts. The two methods of getting 0/1 into low/high are exactly
17818 the same size. Avoiding the shift in the bit extract case helps
17819 pentium4 a bit; no one else seems to care much either way. */
17824 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
17825 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
17827 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
17828 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
17830 emit_insn ((mode == DImode
17832 : gen_lshrdi3) (high[0], high[0],
17833 GEN_INT (mode == DImode ? 5 : 6)));
17834 emit_insn ((mode == DImode
17836 : gen_anddi3) (high[0], high[0], const1_rtx));
17837 emit_move_insn (low[0], high[0]);
17838 emit_insn ((mode == DImode
17840 : gen_xordi3) (low[0], low[0], const1_rtx));
17843 emit_insn ((mode == DImode
17845 : gen_ashldi3) (low[0], low[0], operands[2]));
17846 emit_insn ((mode == DImode
17848 : gen_ashldi3) (high[0], high[0], operands[2]));
17852 if (operands[1] == constm1_rtx)
17854 /* For -1 << N, we can avoid the shld instruction, because we
17855 know that we're shifting 0...31/63 ones into a -1. */
17856 emit_move_insn (low[0], constm1_rtx);
17857 if (optimize_insn_for_size_p ())
17858 emit_move_insn (high[0], low[0]);
17860 emit_move_insn (high[0], constm1_rtx);
17864 if (!rtx_equal_p (operands[0], operands[1]))
17865 emit_move_insn (operands[0], operands[1]);
17867 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17868 emit_insn ((mode == DImode
17870 : gen_x86_64_shld) (high[0], low[0], operands[2]));
17873 emit_insn ((mode == DImode
17875 : gen_ashldi3) (low[0], low[0], operands[2]));
17877 if (TARGET_CMOVE && scratch)
17879 ix86_expand_clear (scratch);
17880 emit_insn ((mode == DImode
17881 ? gen_x86_shiftsi_adj_1
17882 : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2],
17886 emit_insn ((mode == DImode
17887 ? gen_x86_shiftsi_adj_2
17888 : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2]));
17892 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
17894 rtx low[2], high[2];
17896 const int single_width = mode == DImode ? 32 : 64;
17898 if (CONST_INT_P (operands[2]))
17900 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17901 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17903 if (count == single_width * 2 - 1)
17905 emit_move_insn (high[0], high[1]);
17906 emit_insn ((mode == DImode
17908 : gen_ashrdi3) (high[0], high[0],
17909 GEN_INT (single_width - 1)));
17910 emit_move_insn (low[0], high[0]);
17913 else if (count >= single_width)
17915 emit_move_insn (low[0], high[1]);
17916 emit_move_insn (high[0], low[0]);
17917 emit_insn ((mode == DImode
17919 : gen_ashrdi3) (high[0], high[0],
17920 GEN_INT (single_width - 1)));
17921 if (count > single_width)
17922 emit_insn ((mode == DImode
17924 : gen_ashrdi3) (low[0], low[0],
17925 GEN_INT (count - single_width)));
17929 if (!rtx_equal_p (operands[0], operands[1]))
17930 emit_move_insn (operands[0], operands[1]);
17931 emit_insn ((mode == DImode
17933 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
17934 emit_insn ((mode == DImode
17936 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
17941 if (!rtx_equal_p (operands[0], operands[1]))
17942 emit_move_insn (operands[0], operands[1]);
17944 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
17946 emit_insn ((mode == DImode
17948 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
17949 emit_insn ((mode == DImode
17951 : gen_ashrdi3) (high[0], high[0], operands[2]));
17953 if (TARGET_CMOVE && scratch)
17955 emit_move_insn (scratch, high[0]);
17956 emit_insn ((mode == DImode
17958 : gen_ashrdi3) (scratch, scratch,
17959 GEN_INT (single_width - 1)));
17960 emit_insn ((mode == DImode
17961 ? gen_x86_shiftsi_adj_1
17962 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
17966 emit_insn ((mode == DImode
17967 ? gen_x86_shiftsi_adj_3
17968 : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2]));
17973 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
17975 rtx low[2], high[2];
17977 const int single_width = mode == DImode ? 32 : 64;
17979 if (CONST_INT_P (operands[2]))
17981 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
17982 count = INTVAL (operands[2]) & (single_width * 2 - 1);
17984 if (count >= single_width)
17986 emit_move_insn (low[0], high[1]);
17987 ix86_expand_clear (high[0]);
17989 if (count > single_width)
17990 emit_insn ((mode == DImode
17992 : gen_lshrdi3) (low[0], low[0],
17993 GEN_INT (count - single_width)));
17997 if (!rtx_equal_p (operands[0], operands[1]))
17998 emit_move_insn (operands[0], operands[1]);
17999 emit_insn ((mode == DImode
18001 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
18002 emit_insn ((mode == DImode
18004 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
18009 if (!rtx_equal_p (operands[0], operands[1]))
18010 emit_move_insn (operands[0], operands[1]);
18012 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
18014 emit_insn ((mode == DImode
18016 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
18017 emit_insn ((mode == DImode
18019 : gen_lshrdi3) (high[0], high[0], operands[2]));
18021 /* Heh. By reversing the arguments, we can reuse this pattern. */
18022 if (TARGET_CMOVE && scratch)
18024 ix86_expand_clear (scratch);
18025 emit_insn ((mode == DImode
18026 ? gen_x86_shiftsi_adj_1
18027 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2],
18031 emit_insn ((mode == DImode
18032 ? gen_x86_shiftsi_adj_2
18033 : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2]));
18037 /* Predict just emitted jump instruction to be taken with probability PROB. */
18039 predict_jump (int prob)
18041 rtx insn = get_last_insn ();
18042 gcc_assert (JUMP_P (insn));
18043 add_reg_note (insn, REG_BR_PROB, GEN_INT (prob));
18046 /* Helper function for the string operations below. Dest VARIABLE whether
18047 it is aligned to VALUE bytes. If true, jump to the label. */
18049 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
18051 rtx label = gen_label_rtx ();
18052 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
18053 if (GET_MODE (variable) == DImode)
18054 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
18056 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
18057 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
18060 predict_jump (REG_BR_PROB_BASE * 50 / 100);
18062 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18066 /* Adjust COUNTER by the VALUE. */
18068 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
18070 if (GET_MODE (countreg) == DImode)
18071 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
18073 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
18076 /* Zero extend possibly SImode EXP to Pmode register. */
18078 ix86_zero_extend_to_Pmode (rtx exp)
18081 if (GET_MODE (exp) == VOIDmode)
18082 return force_reg (Pmode, exp);
18083 if (GET_MODE (exp) == Pmode)
18084 return copy_to_mode_reg (Pmode, exp);
18085 r = gen_reg_rtx (Pmode);
18086 emit_insn (gen_zero_extendsidi2 (r, exp));
18090 /* Divide COUNTREG by SCALE. */
18092 scale_counter (rtx countreg, int scale)
18098 if (CONST_INT_P (countreg))
18099 return GEN_INT (INTVAL (countreg) / scale);
18100 gcc_assert (REG_P (countreg));
18102 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
18103 GEN_INT (exact_log2 (scale)),
18104 NULL, 1, OPTAB_DIRECT);
18108 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
18109 DImode for constant loop counts. */
18111 static enum machine_mode
18112 counter_mode (rtx count_exp)
18114 if (GET_MODE (count_exp) != VOIDmode)
18115 return GET_MODE (count_exp);
18116 if (!CONST_INT_P (count_exp))
18118 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
18123 /* When SRCPTR is non-NULL, output simple loop to move memory
18124 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
18125 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
18126 equivalent loop to set memory by VALUE (supposed to be in MODE).
18128 The size is rounded down to whole number of chunk size moved at once.
18129 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
18133 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
18134 rtx destptr, rtx srcptr, rtx value,
18135 rtx count, enum machine_mode mode, int unroll,
18138 rtx out_label, top_label, iter, tmp;
18139 enum machine_mode iter_mode = counter_mode (count);
18140 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
18141 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
18147 top_label = gen_label_rtx ();
18148 out_label = gen_label_rtx ();
18149 iter = gen_reg_rtx (iter_mode);
18151 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
18152 NULL, 1, OPTAB_DIRECT);
18153 /* Those two should combine. */
18154 if (piece_size == const1_rtx)
18156 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
18158 predict_jump (REG_BR_PROB_BASE * 10 / 100);
18160 emit_move_insn (iter, const0_rtx);
18162 emit_label (top_label);
18164 tmp = convert_modes (Pmode, iter_mode, iter, true);
18165 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
18166 destmem = change_address (destmem, mode, x_addr);
18170 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
18171 srcmem = change_address (srcmem, mode, y_addr);
18173 /* When unrolling for chips that reorder memory reads and writes,
18174 we can save registers by using single temporary.
18175 Also using 4 temporaries is overkill in 32bit mode. */
18176 if (!TARGET_64BIT && 0)
18178 for (i = 0; i < unroll; i++)
18183 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18185 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18187 emit_move_insn (destmem, srcmem);
18193 gcc_assert (unroll <= 4);
18194 for (i = 0; i < unroll; i++)
18196 tmpreg[i] = gen_reg_rtx (mode);
18200 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
18202 emit_move_insn (tmpreg[i], srcmem);
18204 for (i = 0; i < unroll; i++)
18209 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18211 emit_move_insn (destmem, tmpreg[i]);
18216 for (i = 0; i < unroll; i++)
18220 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
18221 emit_move_insn (destmem, value);
18224 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
18225 true, OPTAB_LIB_WIDEN);
18227 emit_move_insn (iter, tmp);
18229 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
18231 if (expected_size != -1)
18233 expected_size /= GET_MODE_SIZE (mode) * unroll;
18234 if (expected_size == 0)
18236 else if (expected_size > REG_BR_PROB_BASE)
18237 predict_jump (REG_BR_PROB_BASE - 1);
18239 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
18242 predict_jump (REG_BR_PROB_BASE * 80 / 100);
18243 iter = ix86_zero_extend_to_Pmode (iter);
18244 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
18245 true, OPTAB_LIB_WIDEN);
18246 if (tmp != destptr)
18247 emit_move_insn (destptr, tmp);
18250 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
18251 true, OPTAB_LIB_WIDEN);
18253 emit_move_insn (srcptr, tmp);
18255 emit_label (out_label);
18258 /* Output "rep; mov" instruction.
18259 Arguments have same meaning as for previous function */
18261 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
18262 rtx destptr, rtx srcptr,
18264 enum machine_mode mode)
18270 /* If the size is known, it is shorter to use rep movs. */
18271 if (mode == QImode && CONST_INT_P (count)
18272 && !(INTVAL (count) & 3))
18275 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18276 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18277 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
18278 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
18279 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18280 if (mode != QImode)
18282 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18283 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18284 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18285 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
18286 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18287 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
18291 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18292 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
18294 if (CONST_INT_P (count))
18296 count = GEN_INT (INTVAL (count)
18297 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18298 destmem = shallow_copy_rtx (destmem);
18299 srcmem = shallow_copy_rtx (srcmem);
18300 set_mem_size (destmem, count);
18301 set_mem_size (srcmem, count);
18305 if (MEM_SIZE (destmem))
18306 set_mem_size (destmem, NULL_RTX);
18307 if (MEM_SIZE (srcmem))
18308 set_mem_size (srcmem, NULL_RTX);
18310 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
18314 /* Output "rep; stos" instruction.
18315 Arguments have same meaning as for previous function */
18317 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
18318 rtx count, enum machine_mode mode,
18324 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
18325 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
18326 value = force_reg (mode, gen_lowpart (mode, value));
18327 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
18328 if (mode != QImode)
18330 destexp = gen_rtx_ASHIFT (Pmode, countreg,
18331 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
18332 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
18335 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
18336 if (orig_value == const0_rtx && CONST_INT_P (count))
18338 count = GEN_INT (INTVAL (count)
18339 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
18340 destmem = shallow_copy_rtx (destmem);
18341 set_mem_size (destmem, count);
18343 else if (MEM_SIZE (destmem))
18344 set_mem_size (destmem, NULL_RTX);
18345 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
18349 emit_strmov (rtx destmem, rtx srcmem,
18350 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
18352 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
18353 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
18354 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18357 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
18359 expand_movmem_epilogue (rtx destmem, rtx srcmem,
18360 rtx destptr, rtx srcptr, rtx count, int max_size)
18363 if (CONST_INT_P (count))
18365 HOST_WIDE_INT countval = INTVAL (count);
18368 if ((countval & 0x10) && max_size > 16)
18372 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18373 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
18376 gcc_unreachable ();
18379 if ((countval & 0x08) && max_size > 8)
18382 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
18385 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18386 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
18390 if ((countval & 0x04) && max_size > 4)
18392 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
18395 if ((countval & 0x02) && max_size > 2)
18397 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
18400 if ((countval & 0x01) && max_size > 1)
18402 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
18409 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
18410 count, 1, OPTAB_DIRECT);
18411 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
18412 count, QImode, 1, 4);
18416 /* When there are stringops, we can cheaply increase dest and src pointers.
18417 Otherwise we save code size by maintaining offset (zero is readily
18418 available from preceding rep operation) and using x86 addressing modes.
18420 if (TARGET_SINGLE_STRINGOP)
18424 rtx label = ix86_expand_aligntest (count, 4, true);
18425 src = change_address (srcmem, SImode, srcptr);
18426 dest = change_address (destmem, SImode, destptr);
18427 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18428 emit_label (label);
18429 LABEL_NUSES (label) = 1;
18433 rtx label = ix86_expand_aligntest (count, 2, true);
18434 src = change_address (srcmem, HImode, srcptr);
18435 dest = change_address (destmem, HImode, destptr);
18436 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18437 emit_label (label);
18438 LABEL_NUSES (label) = 1;
18442 rtx label = ix86_expand_aligntest (count, 1, true);
18443 src = change_address (srcmem, QImode, srcptr);
18444 dest = change_address (destmem, QImode, destptr);
18445 emit_insn (gen_strmov (destptr, dest, srcptr, src));
18446 emit_label (label);
18447 LABEL_NUSES (label) = 1;
18452 rtx offset = force_reg (Pmode, const0_rtx);
18457 rtx label = ix86_expand_aligntest (count, 4, true);
18458 src = change_address (srcmem, SImode, srcptr);
18459 dest = change_address (destmem, SImode, destptr);
18460 emit_move_insn (dest, src);
18461 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
18462 true, OPTAB_LIB_WIDEN);
18464 emit_move_insn (offset, tmp);
18465 emit_label (label);
18466 LABEL_NUSES (label) = 1;
18470 rtx label = ix86_expand_aligntest (count, 2, true);
18471 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18472 src = change_address (srcmem, HImode, tmp);
18473 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18474 dest = change_address (destmem, HImode, tmp);
18475 emit_move_insn (dest, src);
18476 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
18477 true, OPTAB_LIB_WIDEN);
18479 emit_move_insn (offset, tmp);
18480 emit_label (label);
18481 LABEL_NUSES (label) = 1;
18485 rtx label = ix86_expand_aligntest (count, 1, true);
18486 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
18487 src = change_address (srcmem, QImode, tmp);
18488 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
18489 dest = change_address (destmem, QImode, tmp);
18490 emit_move_insn (dest, src);
18491 emit_label (label);
18492 LABEL_NUSES (label) = 1;
18497 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18499 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
18500 rtx count, int max_size)
18503 expand_simple_binop (counter_mode (count), AND, count,
18504 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
18505 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
18506 gen_lowpart (QImode, value), count, QImode,
18510 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
18512 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
18516 if (CONST_INT_P (count))
18518 HOST_WIDE_INT countval = INTVAL (count);
18521 if ((countval & 0x10) && max_size > 16)
18525 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18526 emit_insn (gen_strset (destptr, dest, value));
18527 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
18528 emit_insn (gen_strset (destptr, dest, value));
18531 gcc_unreachable ();
18534 if ((countval & 0x08) && max_size > 8)
18538 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
18539 emit_insn (gen_strset (destptr, dest, value));
18543 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18544 emit_insn (gen_strset (destptr, dest, value));
18545 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
18546 emit_insn (gen_strset (destptr, dest, value));
18550 if ((countval & 0x04) && max_size > 4)
18552 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
18553 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18556 if ((countval & 0x02) && max_size > 2)
18558 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
18559 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18562 if ((countval & 0x01) && max_size > 1)
18564 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
18565 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18572 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
18577 rtx label = ix86_expand_aligntest (count, 16, true);
18580 dest = change_address (destmem, DImode, destptr);
18581 emit_insn (gen_strset (destptr, dest, value));
18582 emit_insn (gen_strset (destptr, dest, value));
18586 dest = change_address (destmem, SImode, destptr);
18587 emit_insn (gen_strset (destptr, dest, value));
18588 emit_insn (gen_strset (destptr, dest, value));
18589 emit_insn (gen_strset (destptr, dest, value));
18590 emit_insn (gen_strset (destptr, dest, value));
18592 emit_label (label);
18593 LABEL_NUSES (label) = 1;
18597 rtx label = ix86_expand_aligntest (count, 8, true);
18600 dest = change_address (destmem, DImode, destptr);
18601 emit_insn (gen_strset (destptr, dest, value));
18605 dest = change_address (destmem, SImode, destptr);
18606 emit_insn (gen_strset (destptr, dest, value));
18607 emit_insn (gen_strset (destptr, dest, value));
18609 emit_label (label);
18610 LABEL_NUSES (label) = 1;
18614 rtx label = ix86_expand_aligntest (count, 4, true);
18615 dest = change_address (destmem, SImode, destptr);
18616 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
18617 emit_label (label);
18618 LABEL_NUSES (label) = 1;
18622 rtx label = ix86_expand_aligntest (count, 2, true);
18623 dest = change_address (destmem, HImode, destptr);
18624 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
18625 emit_label (label);
18626 LABEL_NUSES (label) = 1;
18630 rtx label = ix86_expand_aligntest (count, 1, true);
18631 dest = change_address (destmem, QImode, destptr);
18632 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
18633 emit_label (label);
18634 LABEL_NUSES (label) = 1;
18638 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
18639 DESIRED_ALIGNMENT. */
18641 expand_movmem_prologue (rtx destmem, rtx srcmem,
18642 rtx destptr, rtx srcptr, rtx count,
18643 int align, int desired_alignment)
18645 if (align <= 1 && desired_alignment > 1)
18647 rtx label = ix86_expand_aligntest (destptr, 1, false);
18648 srcmem = change_address (srcmem, QImode, srcptr);
18649 destmem = change_address (destmem, QImode, destptr);
18650 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18651 ix86_adjust_counter (count, 1);
18652 emit_label (label);
18653 LABEL_NUSES (label) = 1;
18655 if (align <= 2 && desired_alignment > 2)
18657 rtx label = ix86_expand_aligntest (destptr, 2, false);
18658 srcmem = change_address (srcmem, HImode, srcptr);
18659 destmem = change_address (destmem, HImode, destptr);
18660 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18661 ix86_adjust_counter (count, 2);
18662 emit_label (label);
18663 LABEL_NUSES (label) = 1;
18665 if (align <= 4 && desired_alignment > 4)
18667 rtx label = ix86_expand_aligntest (destptr, 4, false);
18668 srcmem = change_address (srcmem, SImode, srcptr);
18669 destmem = change_address (destmem, SImode, destptr);
18670 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
18671 ix86_adjust_counter (count, 4);
18672 emit_label (label);
18673 LABEL_NUSES (label) = 1;
18675 gcc_assert (desired_alignment <= 8);
18678 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
18679 ALIGN_BYTES is how many bytes need to be copied. */
18681 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
18682 int desired_align, int align_bytes)
18685 rtx src_size, dst_size;
18687 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
18688 if (src_align_bytes >= 0)
18689 src_align_bytes = desired_align - src_align_bytes;
18690 src_size = MEM_SIZE (src);
18691 dst_size = MEM_SIZE (dst);
18692 if (align_bytes & 1)
18694 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18695 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
18697 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18699 if (align_bytes & 2)
18701 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18702 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
18703 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18704 set_mem_align (dst, 2 * BITS_PER_UNIT);
18705 if (src_align_bytes >= 0
18706 && (src_align_bytes & 1) == (align_bytes & 1)
18707 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
18708 set_mem_align (src, 2 * BITS_PER_UNIT);
18710 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18712 if (align_bytes & 4)
18714 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18715 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
18716 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18717 set_mem_align (dst, 4 * BITS_PER_UNIT);
18718 if (src_align_bytes >= 0)
18720 unsigned int src_align = 0;
18721 if ((src_align_bytes & 3) == (align_bytes & 3))
18723 else if ((src_align_bytes & 1) == (align_bytes & 1))
18725 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18726 set_mem_align (src, src_align * BITS_PER_UNIT);
18729 emit_insn (gen_strmov (destreg, dst, srcreg, src));
18731 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18732 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
18733 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18734 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18735 if (src_align_bytes >= 0)
18737 unsigned int src_align = 0;
18738 if ((src_align_bytes & 7) == (align_bytes & 7))
18740 else if ((src_align_bytes & 3) == (align_bytes & 3))
18742 else if ((src_align_bytes & 1) == (align_bytes & 1))
18744 if (src_align > (unsigned int) desired_align)
18745 src_align = desired_align;
18746 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
18747 set_mem_align (src, src_align * BITS_PER_UNIT);
18750 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18752 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
18757 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
18758 DESIRED_ALIGNMENT. */
18760 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
18761 int align, int desired_alignment)
18763 if (align <= 1 && desired_alignment > 1)
18765 rtx label = ix86_expand_aligntest (destptr, 1, false);
18766 destmem = change_address (destmem, QImode, destptr);
18767 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
18768 ix86_adjust_counter (count, 1);
18769 emit_label (label);
18770 LABEL_NUSES (label) = 1;
18772 if (align <= 2 && desired_alignment > 2)
18774 rtx label = ix86_expand_aligntest (destptr, 2, false);
18775 destmem = change_address (destmem, HImode, destptr);
18776 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
18777 ix86_adjust_counter (count, 2);
18778 emit_label (label);
18779 LABEL_NUSES (label) = 1;
18781 if (align <= 4 && desired_alignment > 4)
18783 rtx label = ix86_expand_aligntest (destptr, 4, false);
18784 destmem = change_address (destmem, SImode, destptr);
18785 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
18786 ix86_adjust_counter (count, 4);
18787 emit_label (label);
18788 LABEL_NUSES (label) = 1;
18790 gcc_assert (desired_alignment <= 8);
18793 /* Set enough from DST to align DST known to by aligned by ALIGN to
18794 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
18796 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
18797 int desired_align, int align_bytes)
18800 rtx dst_size = MEM_SIZE (dst);
18801 if (align_bytes & 1)
18803 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
18805 emit_insn (gen_strset (destreg, dst,
18806 gen_lowpart (QImode, value)));
18808 if (align_bytes & 2)
18810 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
18811 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
18812 set_mem_align (dst, 2 * BITS_PER_UNIT);
18814 emit_insn (gen_strset (destreg, dst,
18815 gen_lowpart (HImode, value)));
18817 if (align_bytes & 4)
18819 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
18820 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
18821 set_mem_align (dst, 4 * BITS_PER_UNIT);
18823 emit_insn (gen_strset (destreg, dst,
18824 gen_lowpart (SImode, value)));
18826 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
18827 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
18828 set_mem_align (dst, desired_align * BITS_PER_UNIT);
18830 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
18834 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
18835 static enum stringop_alg
18836 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
18837 int *dynamic_check)
18839 const struct stringop_algs * algs;
18840 bool optimize_for_speed;
18841 /* Algorithms using the rep prefix want at least edi and ecx;
18842 additionally, memset wants eax and memcpy wants esi. Don't
18843 consider such algorithms if the user has appropriated those
18844 registers for their own purposes. */
18845 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
18847 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
18849 #define ALG_USABLE_P(alg) (rep_prefix_usable \
18850 || (alg != rep_prefix_1_byte \
18851 && alg != rep_prefix_4_byte \
18852 && alg != rep_prefix_8_byte))
18853 const struct processor_costs *cost;
18855 /* Even if the string operation call is cold, we still might spend a lot
18856 of time processing large blocks. */
18857 if (optimize_function_for_size_p (cfun)
18858 || (optimize_insn_for_size_p ()
18859 && expected_size != -1 && expected_size < 256))
18860 optimize_for_speed = false;
18862 optimize_for_speed = true;
18864 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
18866 *dynamic_check = -1;
18868 algs = &cost->memset[TARGET_64BIT != 0];
18870 algs = &cost->memcpy[TARGET_64BIT != 0];
18871 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
18872 return stringop_alg;
18873 /* rep; movq or rep; movl is the smallest variant. */
18874 else if (!optimize_for_speed)
18876 if (!count || (count & 3))
18877 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
18879 return rep_prefix_usable ? rep_prefix_4_byte : loop;
18881 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
18883 else if (expected_size != -1 && expected_size < 4)
18884 return loop_1_byte;
18885 else if (expected_size != -1)
18888 enum stringop_alg alg = libcall;
18889 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18891 /* We get here if the algorithms that were not libcall-based
18892 were rep-prefix based and we are unable to use rep prefixes
18893 based on global register usage. Break out of the loop and
18894 use the heuristic below. */
18895 if (algs->size[i].max == 0)
18897 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
18899 enum stringop_alg candidate = algs->size[i].alg;
18901 if (candidate != libcall && ALG_USABLE_P (candidate))
18903 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
18904 last non-libcall inline algorithm. */
18905 if (TARGET_INLINE_ALL_STRINGOPS)
18907 /* When the current size is best to be copied by a libcall,
18908 but we are still forced to inline, run the heuristic below
18909 that will pick code for medium sized blocks. */
18910 if (alg != libcall)
18914 else if (ALG_USABLE_P (candidate))
18918 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
18920 /* When asked to inline the call anyway, try to pick meaningful choice.
18921 We look for maximal size of block that is faster to copy by hand and
18922 take blocks of at most of that size guessing that average size will
18923 be roughly half of the block.
18925 If this turns out to be bad, we might simply specify the preferred
18926 choice in ix86_costs. */
18927 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18928 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
18931 enum stringop_alg alg;
18933 bool any_alg_usable_p = true;
18935 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
18937 enum stringop_alg candidate = algs->size[i].alg;
18938 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
18940 if (candidate != libcall && candidate
18941 && ALG_USABLE_P (candidate))
18942 max = algs->size[i].max;
18944 /* If there aren't any usable algorithms, then recursing on
18945 smaller sizes isn't going to find anything. Just return the
18946 simple byte-at-a-time copy loop. */
18947 if (!any_alg_usable_p)
18949 /* Pick something reasonable. */
18950 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18951 *dynamic_check = 128;
18952 return loop_1_byte;
18956 alg = decide_alg (count, max / 2, memset, dynamic_check);
18957 gcc_assert (*dynamic_check == -1);
18958 gcc_assert (alg != libcall);
18959 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
18960 *dynamic_check = max;
18963 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
18964 #undef ALG_USABLE_P
18967 /* Decide on alignment. We know that the operand is already aligned to ALIGN
18968 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
18970 decide_alignment (int align,
18971 enum stringop_alg alg,
18974 int desired_align = 0;
18978 gcc_unreachable ();
18980 case unrolled_loop:
18981 desired_align = GET_MODE_SIZE (Pmode);
18983 case rep_prefix_8_byte:
18986 case rep_prefix_4_byte:
18987 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18988 copying whole cacheline at once. */
18989 if (TARGET_PENTIUMPRO)
18994 case rep_prefix_1_byte:
18995 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
18996 copying whole cacheline at once. */
18997 if (TARGET_PENTIUMPRO)
19011 if (desired_align < align)
19012 desired_align = align;
19013 if (expected_size != -1 && expected_size < 4)
19014 desired_align = align;
19015 return desired_align;
19018 /* Return the smallest power of 2 greater than VAL. */
19020 smallest_pow2_greater_than (int val)
19028 /* Expand string move (memcpy) operation. Use i386 string operations when
19029 profitable. expand_setmem contains similar code. The code depends upon
19030 architecture, block size and alignment, but always has the same
19033 1) Prologue guard: Conditional that jumps up to epilogues for small
19034 blocks that can be handled by epilogue alone. This is faster but
19035 also needed for correctness, since prologue assume the block is larger
19036 than the desired alignment.
19038 Optional dynamic check for size and libcall for large
19039 blocks is emitted here too, with -minline-stringops-dynamically.
19041 2) Prologue: copy first few bytes in order to get destination aligned
19042 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
19043 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
19044 We emit either a jump tree on power of two sized blocks, or a byte loop.
19046 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
19047 with specified algorithm.
19049 4) Epilogue: code copying tail of the block that is too small to be
19050 handled by main body (or up to size guarded by prologue guard). */
19053 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
19054 rtx expected_align_exp, rtx expected_size_exp)
19060 rtx jump_around_label = NULL;
19061 HOST_WIDE_INT align = 1;
19062 unsigned HOST_WIDE_INT count = 0;
19063 HOST_WIDE_INT expected_size = -1;
19064 int size_needed = 0, epilogue_size_needed;
19065 int desired_align = 0, align_bytes = 0;
19066 enum stringop_alg alg;
19068 bool need_zero_guard = false;
19070 if (CONST_INT_P (align_exp))
19071 align = INTVAL (align_exp);
19072 /* i386 can do misaligned access on reasonably increased cost. */
19073 if (CONST_INT_P (expected_align_exp)
19074 && INTVAL (expected_align_exp) > align)
19075 align = INTVAL (expected_align_exp);
19076 /* ALIGN is the minimum of destination and source alignment, but we care here
19077 just about destination alignment. */
19078 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
19079 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
19081 if (CONST_INT_P (count_exp))
19082 count = expected_size = INTVAL (count_exp);
19083 if (CONST_INT_P (expected_size_exp) && count == 0)
19084 expected_size = INTVAL (expected_size_exp);
19086 /* Make sure we don't need to care about overflow later on. */
19087 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19090 /* Step 0: Decide on preferred algorithm, desired alignment and
19091 size of chunks to be copied by main loop. */
19093 alg = decide_alg (count, expected_size, false, &dynamic_check);
19094 desired_align = decide_alignment (align, alg, expected_size);
19096 if (!TARGET_ALIGN_STRINGOPS)
19097 align = desired_align;
19099 if (alg == libcall)
19101 gcc_assert (alg != no_stringop);
19103 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
19104 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19105 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
19110 gcc_unreachable ();
19112 need_zero_guard = true;
19113 size_needed = GET_MODE_SIZE (Pmode);
19115 case unrolled_loop:
19116 need_zero_guard = true;
19117 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
19119 case rep_prefix_8_byte:
19122 case rep_prefix_4_byte:
19125 case rep_prefix_1_byte:
19129 need_zero_guard = true;
19134 epilogue_size_needed = size_needed;
19136 /* Step 1: Prologue guard. */
19138 /* Alignment code needs count to be in register. */
19139 if (CONST_INT_P (count_exp) && desired_align > align)
19141 if (INTVAL (count_exp) > desired_align
19142 && INTVAL (count_exp) > size_needed)
19145 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19146 if (align_bytes <= 0)
19149 align_bytes = desired_align - align_bytes;
19151 if (align_bytes == 0)
19152 count_exp = force_reg (counter_mode (count_exp), count_exp);
19154 gcc_assert (desired_align >= 1 && align >= 1);
19156 /* Ensure that alignment prologue won't copy past end of block. */
19157 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19159 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19160 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
19161 Make sure it is power of 2. */
19162 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19166 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19168 /* If main algorithm works on QImode, no epilogue is needed.
19169 For small sizes just don't align anything. */
19170 if (size_needed == 1)
19171 desired_align = align;
19178 label = gen_label_rtx ();
19179 emit_cmp_and_jump_insns (count_exp,
19180 GEN_INT (epilogue_size_needed),
19181 LTU, 0, counter_mode (count_exp), 1, label);
19182 if (expected_size == -1 || expected_size < epilogue_size_needed)
19183 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19185 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19189 /* Emit code to decide on runtime whether library call or inline should be
19191 if (dynamic_check != -1)
19193 if (CONST_INT_P (count_exp))
19195 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
19197 emit_block_move_via_libcall (dst, src, count_exp, false);
19198 count_exp = const0_rtx;
19204 rtx hot_label = gen_label_rtx ();
19205 jump_around_label = gen_label_rtx ();
19206 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19207 LEU, 0, GET_MODE (count_exp), 1, hot_label);
19208 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19209 emit_block_move_via_libcall (dst, src, count_exp, false);
19210 emit_jump (jump_around_label);
19211 emit_label (hot_label);
19215 /* Step 2: Alignment prologue. */
19217 if (desired_align > align)
19219 if (align_bytes == 0)
19221 /* Except for the first move in epilogue, we no longer know
19222 constant offset in aliasing info. It don't seems to worth
19223 the pain to maintain it for the first move, so throw away
19225 src = change_address (src, BLKmode, srcreg);
19226 dst = change_address (dst, BLKmode, destreg);
19227 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
19232 /* If we know how many bytes need to be stored before dst is
19233 sufficiently aligned, maintain aliasing info accurately. */
19234 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
19235 desired_align, align_bytes);
19236 count_exp = plus_constant (count_exp, -align_bytes);
19237 count -= align_bytes;
19239 if (need_zero_guard
19240 && (count < (unsigned HOST_WIDE_INT) size_needed
19241 || (align_bytes == 0
19242 && count < ((unsigned HOST_WIDE_INT) size_needed
19243 + desired_align - align))))
19245 /* It is possible that we copied enough so the main loop will not
19247 gcc_assert (size_needed > 1);
19248 if (label == NULL_RTX)
19249 label = gen_label_rtx ();
19250 emit_cmp_and_jump_insns (count_exp,
19251 GEN_INT (size_needed),
19252 LTU, 0, counter_mode (count_exp), 1, label);
19253 if (expected_size == -1
19254 || expected_size < (desired_align - align) / 2 + size_needed)
19255 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19257 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19260 if (label && size_needed == 1)
19262 emit_label (label);
19263 LABEL_NUSES (label) = 1;
19265 epilogue_size_needed = 1;
19267 else if (label == NULL_RTX)
19268 epilogue_size_needed = size_needed;
19270 /* Step 3: Main loop. */
19276 gcc_unreachable ();
19278 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19279 count_exp, QImode, 1, expected_size);
19282 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19283 count_exp, Pmode, 1, expected_size);
19285 case unrolled_loop:
19286 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
19287 registers for 4 temporaries anyway. */
19288 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
19289 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
19292 case rep_prefix_8_byte:
19293 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19296 case rep_prefix_4_byte:
19297 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19300 case rep_prefix_1_byte:
19301 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
19305 /* Adjust properly the offset of src and dest memory for aliasing. */
19306 if (CONST_INT_P (count_exp))
19308 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
19309 (count / size_needed) * size_needed);
19310 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19311 (count / size_needed) * size_needed);
19315 src = change_address (src, BLKmode, srcreg);
19316 dst = change_address (dst, BLKmode, destreg);
19319 /* Step 4: Epilogue to copy the remaining bytes. */
19323 /* When the main loop is done, COUNT_EXP might hold original count,
19324 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19325 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19326 bytes. Compensate if needed. */
19328 if (size_needed < epilogue_size_needed)
19331 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19332 GEN_INT (size_needed - 1), count_exp, 1,
19334 if (tmp != count_exp)
19335 emit_move_insn (count_exp, tmp);
19337 emit_label (label);
19338 LABEL_NUSES (label) = 1;
19341 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19342 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
19343 epilogue_size_needed);
19344 if (jump_around_label)
19345 emit_label (jump_around_label);
19349 /* Helper function for memcpy. For QImode value 0xXY produce
19350 0xXYXYXYXY of wide specified by MODE. This is essentially
19351 a * 0x10101010, but we can do slightly better than
19352 synth_mult by unwinding the sequence by hand on CPUs with
19355 promote_duplicated_reg (enum machine_mode mode, rtx val)
19357 enum machine_mode valmode = GET_MODE (val);
19359 int nops = mode == DImode ? 3 : 2;
19361 gcc_assert (mode == SImode || mode == DImode);
19362 if (val == const0_rtx)
19363 return copy_to_mode_reg (mode, const0_rtx);
19364 if (CONST_INT_P (val))
19366 HOST_WIDE_INT v = INTVAL (val) & 255;
19370 if (mode == DImode)
19371 v |= (v << 16) << 16;
19372 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
19375 if (valmode == VOIDmode)
19377 if (valmode != QImode)
19378 val = gen_lowpart (QImode, val);
19379 if (mode == QImode)
19381 if (!TARGET_PARTIAL_REG_STALL)
19383 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
19384 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
19385 <= (ix86_cost->shift_const + ix86_cost->add) * nops
19386 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
19388 rtx reg = convert_modes (mode, QImode, val, true);
19389 tmp = promote_duplicated_reg (mode, const1_rtx);
19390 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
19395 rtx reg = convert_modes (mode, QImode, val, true);
19397 if (!TARGET_PARTIAL_REG_STALL)
19398 if (mode == SImode)
19399 emit_insn (gen_movsi_insv_1 (reg, reg));
19401 emit_insn (gen_movdi_insv_1 (reg, reg));
19404 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
19405 NULL, 1, OPTAB_DIRECT);
19407 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19409 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
19410 NULL, 1, OPTAB_DIRECT);
19411 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19412 if (mode == SImode)
19414 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
19415 NULL, 1, OPTAB_DIRECT);
19416 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
19421 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
19422 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
19423 alignment from ALIGN to DESIRED_ALIGN. */
19425 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
19430 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
19431 promoted_val = promote_duplicated_reg (DImode, val);
19432 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
19433 promoted_val = promote_duplicated_reg (SImode, val);
19434 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
19435 promoted_val = promote_duplicated_reg (HImode, val);
19437 promoted_val = val;
19439 return promoted_val;
19442 /* Expand string clear operation (bzero). Use i386 string operations when
19443 profitable. See expand_movmem comment for explanation of individual
19444 steps performed. */
19446 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
19447 rtx expected_align_exp, rtx expected_size_exp)
19452 rtx jump_around_label = NULL;
19453 HOST_WIDE_INT align = 1;
19454 unsigned HOST_WIDE_INT count = 0;
19455 HOST_WIDE_INT expected_size = -1;
19456 int size_needed = 0, epilogue_size_needed;
19457 int desired_align = 0, align_bytes = 0;
19458 enum stringop_alg alg;
19459 rtx promoted_val = NULL;
19460 bool force_loopy_epilogue = false;
19462 bool need_zero_guard = false;
19464 if (CONST_INT_P (align_exp))
19465 align = INTVAL (align_exp);
19466 /* i386 can do misaligned access on reasonably increased cost. */
19467 if (CONST_INT_P (expected_align_exp)
19468 && INTVAL (expected_align_exp) > align)
19469 align = INTVAL (expected_align_exp);
19470 if (CONST_INT_P (count_exp))
19471 count = expected_size = INTVAL (count_exp);
19472 if (CONST_INT_P (expected_size_exp) && count == 0)
19473 expected_size = INTVAL (expected_size_exp);
19475 /* Make sure we don't need to care about overflow later on. */
19476 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
19479 /* Step 0: Decide on preferred algorithm, desired alignment and
19480 size of chunks to be copied by main loop. */
19482 alg = decide_alg (count, expected_size, true, &dynamic_check);
19483 desired_align = decide_alignment (align, alg, expected_size);
19485 if (!TARGET_ALIGN_STRINGOPS)
19486 align = desired_align;
19488 if (alg == libcall)
19490 gcc_assert (alg != no_stringop);
19492 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
19493 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
19498 gcc_unreachable ();
19500 need_zero_guard = true;
19501 size_needed = GET_MODE_SIZE (Pmode);
19503 case unrolled_loop:
19504 need_zero_guard = true;
19505 size_needed = GET_MODE_SIZE (Pmode) * 4;
19507 case rep_prefix_8_byte:
19510 case rep_prefix_4_byte:
19513 case rep_prefix_1_byte:
19517 need_zero_guard = true;
19521 epilogue_size_needed = size_needed;
19523 /* Step 1: Prologue guard. */
19525 /* Alignment code needs count to be in register. */
19526 if (CONST_INT_P (count_exp) && desired_align > align)
19528 if (INTVAL (count_exp) > desired_align
19529 && INTVAL (count_exp) > size_needed)
19532 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
19533 if (align_bytes <= 0)
19536 align_bytes = desired_align - align_bytes;
19538 if (align_bytes == 0)
19540 enum machine_mode mode = SImode;
19541 if (TARGET_64BIT && (count & ~0xffffffff))
19543 count_exp = force_reg (mode, count_exp);
19546 /* Do the cheap promotion to allow better CSE across the
19547 main loop and epilogue (ie one load of the big constant in the
19548 front of all code. */
19549 if (CONST_INT_P (val_exp))
19550 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19551 desired_align, align);
19552 /* Ensure that alignment prologue won't copy past end of block. */
19553 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
19555 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
19556 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
19557 Make sure it is power of 2. */
19558 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
19560 /* To improve performance of small blocks, we jump around the VAL
19561 promoting mode. This mean that if the promoted VAL is not constant,
19562 we might not use it in the epilogue and have to use byte
19564 if (epilogue_size_needed > 2 && !promoted_val)
19565 force_loopy_epilogue = true;
19568 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
19570 /* If main algorithm works on QImode, no epilogue is needed.
19571 For small sizes just don't align anything. */
19572 if (size_needed == 1)
19573 desired_align = align;
19580 label = gen_label_rtx ();
19581 emit_cmp_and_jump_insns (count_exp,
19582 GEN_INT (epilogue_size_needed),
19583 LTU, 0, counter_mode (count_exp), 1, label);
19584 if (expected_size == -1 || expected_size <= epilogue_size_needed)
19585 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19587 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19590 if (dynamic_check != -1)
19592 rtx hot_label = gen_label_rtx ();
19593 jump_around_label = gen_label_rtx ();
19594 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
19595 LEU, 0, counter_mode (count_exp), 1, hot_label);
19596 predict_jump (REG_BR_PROB_BASE * 90 / 100);
19597 set_storage_via_libcall (dst, count_exp, val_exp, false);
19598 emit_jump (jump_around_label);
19599 emit_label (hot_label);
19602 /* Step 2: Alignment prologue. */
19604 /* Do the expensive promotion once we branched off the small blocks. */
19606 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
19607 desired_align, align);
19608 gcc_assert (desired_align >= 1 && align >= 1);
19610 if (desired_align > align)
19612 if (align_bytes == 0)
19614 /* Except for the first move in epilogue, we no longer know
19615 constant offset in aliasing info. It don't seems to worth
19616 the pain to maintain it for the first move, so throw away
19618 dst = change_address (dst, BLKmode, destreg);
19619 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
19624 /* If we know how many bytes need to be stored before dst is
19625 sufficiently aligned, maintain aliasing info accurately. */
19626 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
19627 desired_align, align_bytes);
19628 count_exp = plus_constant (count_exp, -align_bytes);
19629 count -= align_bytes;
19631 if (need_zero_guard
19632 && (count < (unsigned HOST_WIDE_INT) size_needed
19633 || (align_bytes == 0
19634 && count < ((unsigned HOST_WIDE_INT) size_needed
19635 + desired_align - align))))
19637 /* It is possible that we copied enough so the main loop will not
19639 gcc_assert (size_needed > 1);
19640 if (label == NULL_RTX)
19641 label = gen_label_rtx ();
19642 emit_cmp_and_jump_insns (count_exp,
19643 GEN_INT (size_needed),
19644 LTU, 0, counter_mode (count_exp), 1, label);
19645 if (expected_size == -1
19646 || expected_size < (desired_align - align) / 2 + size_needed)
19647 predict_jump (REG_BR_PROB_BASE * 20 / 100);
19649 predict_jump (REG_BR_PROB_BASE * 60 / 100);
19652 if (label && size_needed == 1)
19654 emit_label (label);
19655 LABEL_NUSES (label) = 1;
19657 promoted_val = val_exp;
19658 epilogue_size_needed = 1;
19660 else if (label == NULL_RTX)
19661 epilogue_size_needed = size_needed;
19663 /* Step 3: Main loop. */
19669 gcc_unreachable ();
19671 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19672 count_exp, QImode, 1, expected_size);
19675 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19676 count_exp, Pmode, 1, expected_size);
19678 case unrolled_loop:
19679 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
19680 count_exp, Pmode, 4, expected_size);
19682 case rep_prefix_8_byte:
19683 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19686 case rep_prefix_4_byte:
19687 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19690 case rep_prefix_1_byte:
19691 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
19695 /* Adjust properly the offset of src and dest memory for aliasing. */
19696 if (CONST_INT_P (count_exp))
19697 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
19698 (count / size_needed) * size_needed);
19700 dst = change_address (dst, BLKmode, destreg);
19702 /* Step 4: Epilogue to copy the remaining bytes. */
19706 /* When the main loop is done, COUNT_EXP might hold original count,
19707 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
19708 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
19709 bytes. Compensate if needed. */
19711 if (size_needed < epilogue_size_needed)
19714 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
19715 GEN_INT (size_needed - 1), count_exp, 1,
19717 if (tmp != count_exp)
19718 emit_move_insn (count_exp, tmp);
19720 emit_label (label);
19721 LABEL_NUSES (label) = 1;
19724 if (count_exp != const0_rtx && epilogue_size_needed > 1)
19726 if (force_loopy_epilogue)
19727 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
19728 epilogue_size_needed);
19730 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
19731 epilogue_size_needed);
19733 if (jump_around_label)
19734 emit_label (jump_around_label);
19738 /* Expand the appropriate insns for doing strlen if not just doing
19741 out = result, initialized with the start address
19742 align_rtx = alignment of the address.
19743 scratch = scratch register, initialized with the startaddress when
19744 not aligned, otherwise undefined
19746 This is just the body. It needs the initializations mentioned above and
19747 some address computing at the end. These things are done in i386.md. */
19750 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
19754 rtx align_2_label = NULL_RTX;
19755 rtx align_3_label = NULL_RTX;
19756 rtx align_4_label = gen_label_rtx ();
19757 rtx end_0_label = gen_label_rtx ();
19759 rtx tmpreg = gen_reg_rtx (SImode);
19760 rtx scratch = gen_reg_rtx (SImode);
19764 if (CONST_INT_P (align_rtx))
19765 align = INTVAL (align_rtx);
19767 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
19769 /* Is there a known alignment and is it less than 4? */
19772 rtx scratch1 = gen_reg_rtx (Pmode);
19773 emit_move_insn (scratch1, out);
19774 /* Is there a known alignment and is it not 2? */
19777 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
19778 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
19780 /* Leave just the 3 lower bits. */
19781 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
19782 NULL_RTX, 0, OPTAB_WIDEN);
19784 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19785 Pmode, 1, align_4_label);
19786 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
19787 Pmode, 1, align_2_label);
19788 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
19789 Pmode, 1, align_3_label);
19793 /* Since the alignment is 2, we have to check 2 or 0 bytes;
19794 check if is aligned to 4 - byte. */
19796 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
19797 NULL_RTX, 0, OPTAB_WIDEN);
19799 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
19800 Pmode, 1, align_4_label);
19803 mem = change_address (src, QImode, out);
19805 /* Now compare the bytes. */
19807 /* Compare the first n unaligned byte on a byte per byte basis. */
19808 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
19809 QImode, 1, end_0_label);
19811 /* Increment the address. */
19812 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19814 /* Not needed with an alignment of 2 */
19817 emit_label (align_2_label);
19819 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19822 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19824 emit_label (align_3_label);
19827 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
19830 emit_insn (ix86_gen_add3 (out, out, const1_rtx));
19833 /* Generate loop to check 4 bytes at a time. It is not a good idea to
19834 align this loop. It gives only huge programs, but does not help to
19836 emit_label (align_4_label);
19838 mem = change_address (src, SImode, out);
19839 emit_move_insn (scratch, mem);
19840 emit_insn (ix86_gen_add3 (out, out, GEN_INT (4)));
19842 /* This formula yields a nonzero result iff one of the bytes is zero.
19843 This saves three branches inside loop and many cycles. */
19845 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
19846 emit_insn (gen_one_cmplsi2 (scratch, scratch));
19847 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
19848 emit_insn (gen_andsi3 (tmpreg, tmpreg,
19849 gen_int_mode (0x80808080, SImode)));
19850 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
19855 rtx reg = gen_reg_rtx (SImode);
19856 rtx reg2 = gen_reg_rtx (Pmode);
19857 emit_move_insn (reg, tmpreg);
19858 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
19860 /* If zero is not in the first two bytes, move two bytes forward. */
19861 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19862 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19863 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19864 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
19865 gen_rtx_IF_THEN_ELSE (SImode, tmp,
19868 /* Emit lea manually to avoid clobbering of flags. */
19869 emit_insn (gen_rtx_SET (SImode, reg2,
19870 gen_rtx_PLUS (Pmode, out, const2_rtx)));
19872 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19873 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
19874 emit_insn (gen_rtx_SET (VOIDmode, out,
19875 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
19881 rtx end_2_label = gen_label_rtx ();
19882 /* Is zero in the first two bytes? */
19884 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
19885 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
19886 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
19887 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
19888 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
19890 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
19891 JUMP_LABEL (tmp) = end_2_label;
19893 /* Not in the first two. Move two bytes forward. */
19894 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
19895 emit_insn (ix86_gen_add3 (out, out, const2_rtx));
19897 emit_label (end_2_label);
19901 /* Avoid branch in fixing the byte. */
19902 tmpreg = gen_lowpart (QImode, tmpreg);
19903 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
19904 tmp = gen_rtx_REG (CCmode, FLAGS_REG);
19905 cmp = gen_rtx_LTU (VOIDmode, tmp, const0_rtx);
19906 emit_insn (ix86_gen_sub3_carry (out, out, GEN_INT (3), tmp, cmp));
19908 emit_label (end_0_label);
19911 /* Expand strlen. */
19914 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
19916 rtx addr, scratch1, scratch2, scratch3, scratch4;
19918 /* The generic case of strlen expander is long. Avoid it's
19919 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
19921 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19922 && !TARGET_INLINE_ALL_STRINGOPS
19923 && !optimize_insn_for_size_p ()
19924 && (!CONST_INT_P (align) || INTVAL (align) < 4))
19927 addr = force_reg (Pmode, XEXP (src, 0));
19928 scratch1 = gen_reg_rtx (Pmode);
19930 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
19931 && !optimize_insn_for_size_p ())
19933 /* Well it seems that some optimizer does not combine a call like
19934 foo(strlen(bar), strlen(bar));
19935 when the move and the subtraction is done here. It does calculate
19936 the length just once when these instructions are done inside of
19937 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
19938 often used and I use one fewer register for the lifetime of
19939 output_strlen_unroll() this is better. */
19941 emit_move_insn (out, addr);
19943 ix86_expand_strlensi_unroll_1 (out, src, align);
19945 /* strlensi_unroll_1 returns the address of the zero at the end of
19946 the string, like memchr(), so compute the length by subtracting
19947 the start address. */
19948 emit_insn (ix86_gen_sub3 (out, out, addr));
19954 /* Can't use this if the user has appropriated eax, ecx, or edi. */
19955 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
19958 scratch2 = gen_reg_rtx (Pmode);
19959 scratch3 = gen_reg_rtx (Pmode);
19960 scratch4 = force_reg (Pmode, constm1_rtx);
19962 emit_move_insn (scratch3, addr);
19963 eoschar = force_reg (QImode, eoschar);
19965 src = replace_equiv_address_nv (src, scratch3);
19967 /* If .md starts supporting :P, this can be done in .md. */
19968 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
19969 scratch4), UNSPEC_SCAS);
19970 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
19971 emit_insn (ix86_gen_one_cmpl2 (scratch2, scratch1));
19972 emit_insn (ix86_gen_add3 (out, scratch2, constm1_rtx));
19977 /* For given symbol (function) construct code to compute address of it's PLT
19978 entry in large x86-64 PIC model. */
19980 construct_plt_address (rtx symbol)
19982 rtx tmp = gen_reg_rtx (Pmode);
19983 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
19985 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
19986 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
19988 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
19989 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
19994 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
19996 rtx pop, int sibcall)
19998 rtx use = NULL, call;
20000 if (pop == const0_rtx)
20002 gcc_assert (!TARGET_64BIT || !pop);
20004 if (TARGET_MACHO && !TARGET_64BIT)
20007 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
20008 fnaddr = machopic_indirect_call_target (fnaddr);
20013 /* Static functions and indirect calls don't need the pic register. */
20014 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
20015 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20016 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
20017 use_reg (&use, pic_offset_table_rtx);
20020 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
20022 rtx al = gen_rtx_REG (QImode, AX_REG);
20023 emit_move_insn (al, callarg2);
20024 use_reg (&use, al);
20027 if (ix86_cmodel == CM_LARGE_PIC
20029 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
20030 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
20031 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
20033 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
20034 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
20036 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
20037 fnaddr = gen_rtx_MEM (QImode, fnaddr);
20040 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
20042 call = gen_rtx_SET (VOIDmode, retval, call);
20045 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
20046 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
20047 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
20050 && ix86_cfun_abi () == MS_ABI
20051 && (!callarg2 || INTVAL (callarg2) != -2))
20053 /* We need to represent that SI and DI registers are clobbered
20055 static int clobbered_registers[] = {
20056 XMM6_REG, XMM7_REG, XMM8_REG,
20057 XMM9_REG, XMM10_REG, XMM11_REG,
20058 XMM12_REG, XMM13_REG, XMM14_REG,
20059 XMM15_REG, SI_REG, DI_REG
20062 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
20063 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
20064 UNSPEC_MS_TO_SYSV_CALL);
20068 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
20069 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
20072 (SSE_REGNO_P (clobbered_registers[i])
20074 clobbered_registers[i]));
20076 call = gen_rtx_PARALLEL (VOIDmode,
20077 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
20081 call = emit_call_insn (call);
20083 CALL_INSN_FUNCTION_USAGE (call) = use;
20087 /* Clear stack slot assignments remembered from previous functions.
20088 This is called from INIT_EXPANDERS once before RTL is emitted for each
20091 static struct machine_function *
20092 ix86_init_machine_status (void)
20094 struct machine_function *f;
20096 f = ggc_alloc_cleared_machine_function ();
20097 f->use_fast_prologue_epilogue_nregs = -1;
20098 f->tls_descriptor_call_expanded_p = 0;
20099 f->call_abi = ix86_abi;
20104 /* Return a MEM corresponding to a stack slot with mode MODE.
20105 Allocate a new slot if necessary.
20107 The RTL for a function can have several slots available: N is
20108 which slot to use. */
20111 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
20113 struct stack_local_entry *s;
20115 gcc_assert (n < MAX_386_STACK_LOCALS);
20117 /* Virtual slot is valid only before vregs are instantiated. */
20118 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
20120 for (s = ix86_stack_locals; s; s = s->next)
20121 if (s->mode == mode && s->n == n)
20122 return copy_rtx (s->rtl);
20124 s = ggc_alloc_stack_local_entry ();
20127 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
20129 s->next = ix86_stack_locals;
20130 ix86_stack_locals = s;
20134 /* Construct the SYMBOL_REF for the tls_get_addr function. */
20136 static GTY(()) rtx ix86_tls_symbol;
20138 ix86_tls_get_addr (void)
20141 if (!ix86_tls_symbol)
20143 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
20144 (TARGET_ANY_GNU_TLS
20146 ? "___tls_get_addr"
20147 : "__tls_get_addr");
20150 return ix86_tls_symbol;
20153 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
20155 static GTY(()) rtx ix86_tls_module_base_symbol;
20157 ix86_tls_module_base (void)
20160 if (!ix86_tls_module_base_symbol)
20162 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
20163 "_TLS_MODULE_BASE_");
20164 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
20165 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
20168 return ix86_tls_module_base_symbol;
20171 /* Calculate the length of the memory address in the instruction
20172 encoding. Does not include the one-byte modrm, opcode, or prefix. */
20175 memory_address_length (rtx addr)
20177 struct ix86_address parts;
20178 rtx base, index, disp;
20182 if (GET_CODE (addr) == PRE_DEC
20183 || GET_CODE (addr) == POST_INC
20184 || GET_CODE (addr) == PRE_MODIFY
20185 || GET_CODE (addr) == POST_MODIFY)
20188 ok = ix86_decompose_address (addr, &parts);
20191 if (parts.base && GET_CODE (parts.base) == SUBREG)
20192 parts.base = SUBREG_REG (parts.base);
20193 if (parts.index && GET_CODE (parts.index) == SUBREG)
20194 parts.index = SUBREG_REG (parts.index);
20197 index = parts.index;
20202 - esp as the base always wants an index,
20203 - ebp as the base always wants a displacement,
20204 - r12 as the base always wants an index,
20205 - r13 as the base always wants a displacement. */
20207 /* Register Indirect. */
20208 if (base && !index && !disp)
20210 /* esp (for its index) and ebp (for its displacement) need
20211 the two-byte modrm form. Similarly for r12 and r13 in 64-bit
20214 && (addr == arg_pointer_rtx
20215 || addr == frame_pointer_rtx
20216 || REGNO (addr) == SP_REG
20217 || REGNO (addr) == BP_REG
20218 || REGNO (addr) == R12_REG
20219 || REGNO (addr) == R13_REG))
20223 /* Direct Addressing. In 64-bit mode mod 00 r/m 5
20224 is not disp32, but disp32(%rip), so for disp32
20225 SIB byte is needed, unless print_operand_address
20226 optimizes it into disp32(%rip) or (%rip) is implied
20228 else if (disp && !base && !index)
20235 if (GET_CODE (disp) == CONST)
20236 symbol = XEXP (disp, 0);
20237 if (GET_CODE (symbol) == PLUS
20238 && CONST_INT_P (XEXP (symbol, 1)))
20239 symbol = XEXP (symbol, 0);
20241 if (GET_CODE (symbol) != LABEL_REF
20242 && (GET_CODE (symbol) != SYMBOL_REF
20243 || SYMBOL_REF_TLS_MODEL (symbol) != 0)
20244 && (GET_CODE (symbol) != UNSPEC
20245 || (XINT (symbol, 1) != UNSPEC_GOTPCREL
20246 && XINT (symbol, 1) != UNSPEC_GOTNTPOFF)))
20253 /* Find the length of the displacement constant. */
20256 if (base && satisfies_constraint_K (disp))
20261 /* ebp always wants a displacement. Similarly r13. */
20262 else if (base && REG_P (base)
20263 && (REGNO (base) == BP_REG || REGNO (base) == R13_REG))
20266 /* An index requires the two-byte modrm form.... */
20268 /* ...like esp (or r12), which always wants an index. */
20269 || base == arg_pointer_rtx
20270 || base == frame_pointer_rtx
20271 || (base && REG_P (base)
20272 && (REGNO (base) == SP_REG || REGNO (base) == R12_REG)))
20289 /* Compute default value for "length_immediate" attribute. When SHORTFORM
20290 is set, expect that insn have 8bit immediate alternative. */
20292 ix86_attr_length_immediate_default (rtx insn, int shortform)
20296 extract_insn_cached (insn);
20297 for (i = recog_data.n_operands - 1; i >= 0; --i)
20298 if (CONSTANT_P (recog_data.operand[i]))
20300 enum attr_mode mode = get_attr_mode (insn);
20303 if (shortform && CONST_INT_P (recog_data.operand[i]))
20305 HOST_WIDE_INT ival = INTVAL (recog_data.operand[i]);
20312 ival = trunc_int_for_mode (ival, HImode);
20315 ival = trunc_int_for_mode (ival, SImode);
20320 if (IN_RANGE (ival, -128, 127))
20337 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
20342 fatal_insn ("unknown insn mode", insn);
20347 /* Compute default value for "length_address" attribute. */
20349 ix86_attr_length_address_default (rtx insn)
20353 if (get_attr_type (insn) == TYPE_LEA)
20355 rtx set = PATTERN (insn), addr;
20357 if (GET_CODE (set) == PARALLEL)
20358 set = XVECEXP (set, 0, 0);
20360 gcc_assert (GET_CODE (set) == SET);
20362 addr = SET_SRC (set);
20363 if (TARGET_64BIT && get_attr_mode (insn) == MODE_SI)
20365 if (GET_CODE (addr) == ZERO_EXTEND)
20366 addr = XEXP (addr, 0);
20367 if (GET_CODE (addr) == SUBREG)
20368 addr = SUBREG_REG (addr);
20371 return memory_address_length (addr);
20374 extract_insn_cached (insn);
20375 for (i = recog_data.n_operands - 1; i >= 0; --i)
20376 if (MEM_P (recog_data.operand[i]))
20378 constrain_operands_cached (reload_completed);
20379 if (which_alternative != -1)
20381 const char *constraints = recog_data.constraints[i];
20382 int alt = which_alternative;
20384 while (*constraints == '=' || *constraints == '+')
20387 while (*constraints++ != ',')
20389 /* Skip ignored operands. */
20390 if (*constraints == 'X')
20393 return memory_address_length (XEXP (recog_data.operand[i], 0));
20398 /* Compute default value for "length_vex" attribute. It includes
20399 2 or 3 byte VEX prefix and 1 opcode byte. */
20402 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
20407 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
20408 byte VEX prefix. */
20409 if (!has_0f_opcode || has_vex_w)
20412 /* We can always use 2 byte VEX prefix in 32bit. */
20416 extract_insn_cached (insn);
20418 for (i = recog_data.n_operands - 1; i >= 0; --i)
20419 if (REG_P (recog_data.operand[i]))
20421 /* REX.W bit uses 3 byte VEX prefix. */
20422 if (GET_MODE (recog_data.operand[i]) == DImode
20423 && GENERAL_REG_P (recog_data.operand[i]))
20428 /* REX.X or REX.B bits use 3 byte VEX prefix. */
20429 if (MEM_P (recog_data.operand[i])
20430 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
20437 /* Return the maximum number of instructions a cpu can issue. */
20440 ix86_issue_rate (void)
20444 case PROCESSOR_PENTIUM:
20445 case PROCESSOR_ATOM:
20449 case PROCESSOR_PENTIUMPRO:
20450 case PROCESSOR_PENTIUM4:
20451 case PROCESSOR_ATHLON:
20453 case PROCESSOR_AMDFAM10:
20454 case PROCESSOR_NOCONA:
20455 case PROCESSOR_GENERIC32:
20456 case PROCESSOR_GENERIC64:
20457 case PROCESSOR_BDVER1:
20460 case PROCESSOR_CORE2:
20468 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
20469 by DEP_INSN and nothing set by DEP_INSN. */
20472 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
20476 /* Simplify the test for uninteresting insns. */
20477 if (insn_type != TYPE_SETCC
20478 && insn_type != TYPE_ICMOV
20479 && insn_type != TYPE_FCMOV
20480 && insn_type != TYPE_IBR)
20483 if ((set = single_set (dep_insn)) != 0)
20485 set = SET_DEST (set);
20488 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
20489 && XVECLEN (PATTERN (dep_insn), 0) == 2
20490 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
20491 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
20493 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20494 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
20499 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
20502 /* This test is true if the dependent insn reads the flags but
20503 not any other potentially set register. */
20504 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
20507 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
20513 /* Return true iff USE_INSN has a memory address with operands set by
20517 ix86_agi_dependent (rtx set_insn, rtx use_insn)
20520 extract_insn_cached (use_insn);
20521 for (i = recog_data.n_operands - 1; i >= 0; --i)
20522 if (MEM_P (recog_data.operand[i]))
20524 rtx addr = XEXP (recog_data.operand[i], 0);
20525 return modified_in_p (addr, set_insn) != 0;
20531 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
20533 enum attr_type insn_type, dep_insn_type;
20534 enum attr_memory memory;
20536 int dep_insn_code_number;
20538 /* Anti and output dependencies have zero cost on all CPUs. */
20539 if (REG_NOTE_KIND (link) != 0)
20542 dep_insn_code_number = recog_memoized (dep_insn);
20544 /* If we can't recognize the insns, we can't really do anything. */
20545 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
20548 insn_type = get_attr_type (insn);
20549 dep_insn_type = get_attr_type (dep_insn);
20553 case PROCESSOR_PENTIUM:
20554 /* Address Generation Interlock adds a cycle of latency. */
20555 if (insn_type == TYPE_LEA)
20557 rtx addr = PATTERN (insn);
20559 if (GET_CODE (addr) == PARALLEL)
20560 addr = XVECEXP (addr, 0, 0);
20562 gcc_assert (GET_CODE (addr) == SET);
20564 addr = SET_SRC (addr);
20565 if (modified_in_p (addr, dep_insn))
20568 else if (ix86_agi_dependent (dep_insn, insn))
20571 /* ??? Compares pair with jump/setcc. */
20572 if (ix86_flags_dependent (insn, dep_insn, insn_type))
20575 /* Floating point stores require value to be ready one cycle earlier. */
20576 if (insn_type == TYPE_FMOV
20577 && get_attr_memory (insn) == MEMORY_STORE
20578 && !ix86_agi_dependent (dep_insn, insn))
20582 case PROCESSOR_PENTIUMPRO:
20583 memory = get_attr_memory (insn);
20585 /* INT->FP conversion is expensive. */
20586 if (get_attr_fp_int_src (dep_insn))
20589 /* There is one cycle extra latency between an FP op and a store. */
20590 if (insn_type == TYPE_FMOV
20591 && (set = single_set (dep_insn)) != NULL_RTX
20592 && (set2 = single_set (insn)) != NULL_RTX
20593 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
20594 && MEM_P (SET_DEST (set2)))
20597 /* Show ability of reorder buffer to hide latency of load by executing
20598 in parallel with previous instruction in case
20599 previous instruction is not needed to compute the address. */
20600 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20601 && !ix86_agi_dependent (dep_insn, insn))
20603 /* Claim moves to take one cycle, as core can issue one load
20604 at time and the next load can start cycle later. */
20605 if (dep_insn_type == TYPE_IMOV
20606 || dep_insn_type == TYPE_FMOV)
20614 memory = get_attr_memory (insn);
20616 /* The esp dependency is resolved before the instruction is really
20618 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
20619 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
20622 /* INT->FP conversion is expensive. */
20623 if (get_attr_fp_int_src (dep_insn))
20626 /* Show ability of reorder buffer to hide latency of load by executing
20627 in parallel with previous instruction in case
20628 previous instruction is not needed to compute the address. */
20629 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20630 && !ix86_agi_dependent (dep_insn, insn))
20632 /* Claim moves to take one cycle, as core can issue one load
20633 at time and the next load can start cycle later. */
20634 if (dep_insn_type == TYPE_IMOV
20635 || dep_insn_type == TYPE_FMOV)
20644 case PROCESSOR_ATHLON:
20646 case PROCESSOR_AMDFAM10:
20647 case PROCESSOR_BDVER1:
20648 case PROCESSOR_ATOM:
20649 case PROCESSOR_GENERIC32:
20650 case PROCESSOR_GENERIC64:
20651 memory = get_attr_memory (insn);
20653 /* Show ability of reorder buffer to hide latency of load by executing
20654 in parallel with previous instruction in case
20655 previous instruction is not needed to compute the address. */
20656 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
20657 && !ix86_agi_dependent (dep_insn, insn))
20659 enum attr_unit unit = get_attr_unit (insn);
20662 /* Because of the difference between the length of integer and
20663 floating unit pipeline preparation stages, the memory operands
20664 for floating point are cheaper.
20666 ??? For Athlon it the difference is most probably 2. */
20667 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
20670 loadcost = TARGET_ATHLON ? 2 : 0;
20672 if (cost >= loadcost)
20685 /* How many alternative schedules to try. This should be as wide as the
20686 scheduling freedom in the DFA, but no wider. Making this value too
20687 large results extra work for the scheduler. */
20690 ia32_multipass_dfa_lookahead (void)
20694 case PROCESSOR_PENTIUM:
20697 case PROCESSOR_PENTIUMPRO:
20707 /* Compute the alignment given to a constant that is being placed in memory.
20708 EXP is the constant and ALIGN is the alignment that the object would
20710 The value of this function is used instead of that alignment to align
20714 ix86_constant_alignment (tree exp, int align)
20716 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
20717 || TREE_CODE (exp) == INTEGER_CST)
20719 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
20721 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
20724 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
20725 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
20726 return BITS_PER_WORD;
20731 /* Compute the alignment for a static variable.
20732 TYPE is the data type, and ALIGN is the alignment that
20733 the object would ordinarily have. The value of this function is used
20734 instead of that alignment to align the object. */
20737 ix86_data_alignment (tree type, int align)
20739 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
20741 if (AGGREGATE_TYPE_P (type)
20742 && TYPE_SIZE (type)
20743 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20744 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
20745 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
20746 && align < max_align)
20749 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20750 to 16byte boundary. */
20753 if (AGGREGATE_TYPE_P (type)
20754 && TYPE_SIZE (type)
20755 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20756 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
20757 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20761 if (TREE_CODE (type) == ARRAY_TYPE)
20763 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20765 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20768 else if (TREE_CODE (type) == COMPLEX_TYPE)
20771 if (TYPE_MODE (type) == DCmode && align < 64)
20773 if ((TYPE_MODE (type) == XCmode
20774 || TYPE_MODE (type) == TCmode) && align < 128)
20777 else if ((TREE_CODE (type) == RECORD_TYPE
20778 || TREE_CODE (type) == UNION_TYPE
20779 || TREE_CODE (type) == QUAL_UNION_TYPE)
20780 && TYPE_FIELDS (type))
20782 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20784 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20787 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20788 || TREE_CODE (type) == INTEGER_TYPE)
20790 if (TYPE_MODE (type) == DFmode && align < 64)
20792 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20799 /* Compute the alignment for a local variable or a stack slot. EXP is
20800 the data type or decl itself, MODE is the widest mode available and
20801 ALIGN is the alignment that the object would ordinarily have. The
20802 value of this macro is used instead of that alignment to align the
20806 ix86_local_alignment (tree exp, enum machine_mode mode,
20807 unsigned int align)
20811 if (exp && DECL_P (exp))
20813 type = TREE_TYPE (exp);
20822 /* Don't do dynamic stack realignment for long long objects with
20823 -mpreferred-stack-boundary=2. */
20826 && ix86_preferred_stack_boundary < 64
20827 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
20828 && (!type || !TYPE_USER_ALIGN (type))
20829 && (!decl || !DECL_USER_ALIGN (decl)))
20832 /* If TYPE is NULL, we are allocating a stack slot for caller-save
20833 register in MODE. We will return the largest alignment of XF
20837 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
20838 align = GET_MODE_ALIGNMENT (DFmode);
20842 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
20843 to 16byte boundary. Exact wording is:
20845 An array uses the same alignment as its elements, except that a local or
20846 global array variable of length at least 16 bytes or
20847 a C99 variable-length array variable always has alignment of at least 16 bytes.
20849 This was added to allow use of aligned SSE instructions at arrays. This
20850 rule is meant for static storage (where compiler can not do the analysis
20851 by itself). We follow it for automatic variables only when convenient.
20852 We fully control everything in the function compiled and functions from
20853 other unit can not rely on the alignment.
20855 Exclude va_list type. It is the common case of local array where
20856 we can not benefit from the alignment. */
20857 if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
20860 if (AGGREGATE_TYPE_P (type)
20861 && (TYPE_MAIN_VARIANT (type)
20862 != TYPE_MAIN_VARIANT (va_list_type_node))
20863 && TYPE_SIZE (type)
20864 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
20865 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
20866 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
20869 if (TREE_CODE (type) == ARRAY_TYPE)
20871 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
20873 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
20876 else if (TREE_CODE (type) == COMPLEX_TYPE)
20878 if (TYPE_MODE (type) == DCmode && align < 64)
20880 if ((TYPE_MODE (type) == XCmode
20881 || TYPE_MODE (type) == TCmode) && align < 128)
20884 else if ((TREE_CODE (type) == RECORD_TYPE
20885 || TREE_CODE (type) == UNION_TYPE
20886 || TREE_CODE (type) == QUAL_UNION_TYPE)
20887 && TYPE_FIELDS (type))
20889 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
20891 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
20894 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
20895 || TREE_CODE (type) == INTEGER_TYPE)
20898 if (TYPE_MODE (type) == DFmode && align < 64)
20900 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
20906 /* Compute the minimum required alignment for dynamic stack realignment
20907 purposes for a local variable, parameter or a stack slot. EXP is
20908 the data type or decl itself, MODE is its mode and ALIGN is the
20909 alignment that the object would ordinarily have. */
20912 ix86_minimum_alignment (tree exp, enum machine_mode mode,
20913 unsigned int align)
20917 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
20920 if (exp && DECL_P (exp))
20922 type = TREE_TYPE (exp);
20931 /* Don't do dynamic stack realignment for long long objects with
20932 -mpreferred-stack-boundary=2. */
20933 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
20934 && (!type || !TYPE_USER_ALIGN (type))
20935 && (!decl || !DECL_USER_ALIGN (decl)))
20941 /* Find a location for the static chain incoming to a nested function.
20942 This is a register, unless all free registers are used by arguments. */
20945 ix86_static_chain (const_tree fndecl, bool incoming_p)
20949 if (!DECL_STATIC_CHAIN (fndecl))
20954 /* We always use R10 in 64-bit mode. */
20960 /* By default in 32-bit mode we use ECX to pass the static chain. */
20963 fntype = TREE_TYPE (fndecl);
20964 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
20966 /* Fastcall functions use ecx/edx for arguments, which leaves
20967 us with EAX for the static chain. */
20970 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype)))
20972 /* Thiscall functions use ecx for arguments, which leaves
20973 us with EAX for the static chain. */
20976 else if (ix86_function_regparm (fntype, fndecl) == 3)
20978 /* For regparm 3, we have no free call-clobbered registers in
20979 which to store the static chain. In order to implement this,
20980 we have the trampoline push the static chain to the stack.
20981 However, we can't push a value below the return address when
20982 we call the nested function directly, so we have to use an
20983 alternate entry point. For this we use ESI, and have the
20984 alternate entry point push ESI, so that things appear the
20985 same once we're executing the nested function. */
20988 if (fndecl == current_function_decl)
20989 ix86_static_chain_on_stack = true;
20990 return gen_frame_mem (SImode,
20991 plus_constant (arg_pointer_rtx, -8));
20997 return gen_rtx_REG (Pmode, regno);
21000 /* Emit RTL insns to initialize the variable parts of a trampoline.
21001 FNDECL is the decl of the target address; M_TRAMP is a MEM for
21002 the trampoline, and CHAIN_VALUE is an RTX for the static chain
21003 to be passed to the target function. */
21006 ix86_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
21010 fnaddr = XEXP (DECL_RTL (fndecl), 0);
21017 /* Depending on the static chain location, either load a register
21018 with a constant, or push the constant to the stack. All of the
21019 instructions are the same size. */
21020 chain = ix86_static_chain (fndecl, true);
21023 if (REGNO (chain) == CX_REG)
21025 else if (REGNO (chain) == AX_REG)
21028 gcc_unreachable ();
21033 mem = adjust_address (m_tramp, QImode, 0);
21034 emit_move_insn (mem, gen_int_mode (opcode, QImode));
21036 mem = adjust_address (m_tramp, SImode, 1);
21037 emit_move_insn (mem, chain_value);
21039 /* Compute offset from the end of the jmp to the target function.
21040 In the case in which the trampoline stores the static chain on
21041 the stack, we need to skip the first insn which pushes the
21042 (call-saved) register static chain; this push is 1 byte. */
21043 disp = expand_binop (SImode, sub_optab, fnaddr,
21044 plus_constant (XEXP (m_tramp, 0),
21045 MEM_P (chain) ? 9 : 10),
21046 NULL_RTX, 1, OPTAB_DIRECT);
21048 mem = adjust_address (m_tramp, QImode, 5);
21049 emit_move_insn (mem, gen_int_mode (0xe9, QImode));
21051 mem = adjust_address (m_tramp, SImode, 6);
21052 emit_move_insn (mem, disp);
21058 /* Load the function address to r11. Try to load address using
21059 the shorter movl instead of movabs. We may want to support
21060 movq for kernel mode, but kernel does not use trampolines at
21062 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
21064 fnaddr = copy_to_mode_reg (DImode, fnaddr);
21066 mem = adjust_address (m_tramp, HImode, offset);
21067 emit_move_insn (mem, gen_int_mode (0xbb41, HImode));
21069 mem = adjust_address (m_tramp, SImode, offset + 2);
21070 emit_move_insn (mem, gen_lowpart (SImode, fnaddr));
21075 mem = adjust_address (m_tramp, HImode, offset);
21076 emit_move_insn (mem, gen_int_mode (0xbb49, HImode));
21078 mem = adjust_address (m_tramp, DImode, offset + 2);
21079 emit_move_insn (mem, fnaddr);
21083 /* Load static chain using movabs to r10. */
21084 mem = adjust_address (m_tramp, HImode, offset);
21085 emit_move_insn (mem, gen_int_mode (0xba49, HImode));
21087 mem = adjust_address (m_tramp, DImode, offset + 2);
21088 emit_move_insn (mem, chain_value);
21091 /* Jump to r11; the last (unused) byte is a nop, only there to
21092 pad the write out to a single 32-bit store. */
21093 mem = adjust_address (m_tramp, SImode, offset);
21094 emit_move_insn (mem, gen_int_mode (0x90e3ff49, SImode));
21097 gcc_assert (offset <= TRAMPOLINE_SIZE);
21100 #ifdef ENABLE_EXECUTE_STACK
21101 #ifdef CHECK_EXECUTE_STACK_ENABLED
21102 if (CHECK_EXECUTE_STACK_ENABLED)
21104 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
21105 LCT_NORMAL, VOIDmode, 1, XEXP (m_tramp, 0), Pmode);
21109 /* The following file contains several enumerations and data structures
21110 built from the definitions in i386-builtin-types.def. */
21112 #include "i386-builtin-types.inc"
21114 /* Table for the ix86 builtin non-function types. */
21115 static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
21117 /* Retrieve an element from the above table, building some of
21118 the types lazily. */
21121 ix86_get_builtin_type (enum ix86_builtin_type tcode)
21123 unsigned int index;
21126 gcc_assert ((unsigned)tcode < ARRAY_SIZE(ix86_builtin_type_tab));
21128 type = ix86_builtin_type_tab[(int) tcode];
21132 gcc_assert (tcode > IX86_BT_LAST_PRIM);
21133 if (tcode <= IX86_BT_LAST_VECT)
21135 enum machine_mode mode;
21137 index = tcode - IX86_BT_LAST_PRIM - 1;
21138 itype = ix86_get_builtin_type (ix86_builtin_type_vect_base[index]);
21139 mode = ix86_builtin_type_vect_mode[index];
21141 type = build_vector_type_for_mode (itype, mode);
21147 index = tcode - IX86_BT_LAST_VECT - 1;
21148 if (tcode <= IX86_BT_LAST_PTR)
21149 quals = TYPE_UNQUALIFIED;
21151 quals = TYPE_QUAL_CONST;
21153 itype = ix86_get_builtin_type (ix86_builtin_type_ptr_base[index]);
21154 if (quals != TYPE_UNQUALIFIED)
21155 itype = build_qualified_type (itype, quals);
21157 type = build_pointer_type (itype);
21160 ix86_builtin_type_tab[(int) tcode] = type;
21164 /* Table for the ix86 builtin function types. */
21165 static GTY(()) tree ix86_builtin_func_type_tab[(int) IX86_BT_LAST_ALIAS + 1];
21167 /* Retrieve an element from the above table, building some of
21168 the types lazily. */
21171 ix86_get_builtin_func_type (enum ix86_builtin_func_type tcode)
21175 gcc_assert ((unsigned)tcode < ARRAY_SIZE (ix86_builtin_func_type_tab));
21177 type = ix86_builtin_func_type_tab[(int) tcode];
21181 if (tcode <= IX86_BT_LAST_FUNC)
21183 unsigned start = ix86_builtin_func_start[(int) tcode];
21184 unsigned after = ix86_builtin_func_start[(int) tcode + 1];
21185 tree rtype, atype, args = void_list_node;
21188 rtype = ix86_get_builtin_type (ix86_builtin_func_args[start]);
21189 for (i = after - 1; i > start; --i)
21191 atype = ix86_get_builtin_type (ix86_builtin_func_args[i]);
21192 args = tree_cons (NULL, atype, args);
21195 type = build_function_type (rtype, args);
21199 unsigned index = tcode - IX86_BT_LAST_FUNC - 1;
21200 enum ix86_builtin_func_type icode;
21202 icode = ix86_builtin_func_alias_base[index];
21203 type = ix86_get_builtin_func_type (icode);
21206 ix86_builtin_func_type_tab[(int) tcode] = type;
21211 /* Codes for all the SSE/MMX builtins. */
21214 IX86_BUILTIN_ADDPS,
21215 IX86_BUILTIN_ADDSS,
21216 IX86_BUILTIN_DIVPS,
21217 IX86_BUILTIN_DIVSS,
21218 IX86_BUILTIN_MULPS,
21219 IX86_BUILTIN_MULSS,
21220 IX86_BUILTIN_SUBPS,
21221 IX86_BUILTIN_SUBSS,
21223 IX86_BUILTIN_CMPEQPS,
21224 IX86_BUILTIN_CMPLTPS,
21225 IX86_BUILTIN_CMPLEPS,
21226 IX86_BUILTIN_CMPGTPS,
21227 IX86_BUILTIN_CMPGEPS,
21228 IX86_BUILTIN_CMPNEQPS,
21229 IX86_BUILTIN_CMPNLTPS,
21230 IX86_BUILTIN_CMPNLEPS,
21231 IX86_BUILTIN_CMPNGTPS,
21232 IX86_BUILTIN_CMPNGEPS,
21233 IX86_BUILTIN_CMPORDPS,
21234 IX86_BUILTIN_CMPUNORDPS,
21235 IX86_BUILTIN_CMPEQSS,
21236 IX86_BUILTIN_CMPLTSS,
21237 IX86_BUILTIN_CMPLESS,
21238 IX86_BUILTIN_CMPNEQSS,
21239 IX86_BUILTIN_CMPNLTSS,
21240 IX86_BUILTIN_CMPNLESS,
21241 IX86_BUILTIN_CMPNGTSS,
21242 IX86_BUILTIN_CMPNGESS,
21243 IX86_BUILTIN_CMPORDSS,
21244 IX86_BUILTIN_CMPUNORDSS,
21246 IX86_BUILTIN_COMIEQSS,
21247 IX86_BUILTIN_COMILTSS,
21248 IX86_BUILTIN_COMILESS,
21249 IX86_BUILTIN_COMIGTSS,
21250 IX86_BUILTIN_COMIGESS,
21251 IX86_BUILTIN_COMINEQSS,
21252 IX86_BUILTIN_UCOMIEQSS,
21253 IX86_BUILTIN_UCOMILTSS,
21254 IX86_BUILTIN_UCOMILESS,
21255 IX86_BUILTIN_UCOMIGTSS,
21256 IX86_BUILTIN_UCOMIGESS,
21257 IX86_BUILTIN_UCOMINEQSS,
21259 IX86_BUILTIN_CVTPI2PS,
21260 IX86_BUILTIN_CVTPS2PI,
21261 IX86_BUILTIN_CVTSI2SS,
21262 IX86_BUILTIN_CVTSI642SS,
21263 IX86_BUILTIN_CVTSS2SI,
21264 IX86_BUILTIN_CVTSS2SI64,
21265 IX86_BUILTIN_CVTTPS2PI,
21266 IX86_BUILTIN_CVTTSS2SI,
21267 IX86_BUILTIN_CVTTSS2SI64,
21269 IX86_BUILTIN_MAXPS,
21270 IX86_BUILTIN_MAXSS,
21271 IX86_BUILTIN_MINPS,
21272 IX86_BUILTIN_MINSS,
21274 IX86_BUILTIN_LOADUPS,
21275 IX86_BUILTIN_STOREUPS,
21276 IX86_BUILTIN_MOVSS,
21278 IX86_BUILTIN_MOVHLPS,
21279 IX86_BUILTIN_MOVLHPS,
21280 IX86_BUILTIN_LOADHPS,
21281 IX86_BUILTIN_LOADLPS,
21282 IX86_BUILTIN_STOREHPS,
21283 IX86_BUILTIN_STORELPS,
21285 IX86_BUILTIN_MASKMOVQ,
21286 IX86_BUILTIN_MOVMSKPS,
21287 IX86_BUILTIN_PMOVMSKB,
21289 IX86_BUILTIN_MOVNTPS,
21290 IX86_BUILTIN_MOVNTQ,
21292 IX86_BUILTIN_LOADDQU,
21293 IX86_BUILTIN_STOREDQU,
21295 IX86_BUILTIN_PACKSSWB,
21296 IX86_BUILTIN_PACKSSDW,
21297 IX86_BUILTIN_PACKUSWB,
21299 IX86_BUILTIN_PADDB,
21300 IX86_BUILTIN_PADDW,
21301 IX86_BUILTIN_PADDD,
21302 IX86_BUILTIN_PADDQ,
21303 IX86_BUILTIN_PADDSB,
21304 IX86_BUILTIN_PADDSW,
21305 IX86_BUILTIN_PADDUSB,
21306 IX86_BUILTIN_PADDUSW,
21307 IX86_BUILTIN_PSUBB,
21308 IX86_BUILTIN_PSUBW,
21309 IX86_BUILTIN_PSUBD,
21310 IX86_BUILTIN_PSUBQ,
21311 IX86_BUILTIN_PSUBSB,
21312 IX86_BUILTIN_PSUBSW,
21313 IX86_BUILTIN_PSUBUSB,
21314 IX86_BUILTIN_PSUBUSW,
21317 IX86_BUILTIN_PANDN,
21321 IX86_BUILTIN_PAVGB,
21322 IX86_BUILTIN_PAVGW,
21324 IX86_BUILTIN_PCMPEQB,
21325 IX86_BUILTIN_PCMPEQW,
21326 IX86_BUILTIN_PCMPEQD,
21327 IX86_BUILTIN_PCMPGTB,
21328 IX86_BUILTIN_PCMPGTW,
21329 IX86_BUILTIN_PCMPGTD,
21331 IX86_BUILTIN_PMADDWD,
21333 IX86_BUILTIN_PMAXSW,
21334 IX86_BUILTIN_PMAXUB,
21335 IX86_BUILTIN_PMINSW,
21336 IX86_BUILTIN_PMINUB,
21338 IX86_BUILTIN_PMULHUW,
21339 IX86_BUILTIN_PMULHW,
21340 IX86_BUILTIN_PMULLW,
21342 IX86_BUILTIN_PSADBW,
21343 IX86_BUILTIN_PSHUFW,
21345 IX86_BUILTIN_PSLLW,
21346 IX86_BUILTIN_PSLLD,
21347 IX86_BUILTIN_PSLLQ,
21348 IX86_BUILTIN_PSRAW,
21349 IX86_BUILTIN_PSRAD,
21350 IX86_BUILTIN_PSRLW,
21351 IX86_BUILTIN_PSRLD,
21352 IX86_BUILTIN_PSRLQ,
21353 IX86_BUILTIN_PSLLWI,
21354 IX86_BUILTIN_PSLLDI,
21355 IX86_BUILTIN_PSLLQI,
21356 IX86_BUILTIN_PSRAWI,
21357 IX86_BUILTIN_PSRADI,
21358 IX86_BUILTIN_PSRLWI,
21359 IX86_BUILTIN_PSRLDI,
21360 IX86_BUILTIN_PSRLQI,
21362 IX86_BUILTIN_PUNPCKHBW,
21363 IX86_BUILTIN_PUNPCKHWD,
21364 IX86_BUILTIN_PUNPCKHDQ,
21365 IX86_BUILTIN_PUNPCKLBW,
21366 IX86_BUILTIN_PUNPCKLWD,
21367 IX86_BUILTIN_PUNPCKLDQ,
21369 IX86_BUILTIN_SHUFPS,
21371 IX86_BUILTIN_RCPPS,
21372 IX86_BUILTIN_RCPSS,
21373 IX86_BUILTIN_RSQRTPS,
21374 IX86_BUILTIN_RSQRTPS_NR,
21375 IX86_BUILTIN_RSQRTSS,
21376 IX86_BUILTIN_RSQRTF,
21377 IX86_BUILTIN_SQRTPS,
21378 IX86_BUILTIN_SQRTPS_NR,
21379 IX86_BUILTIN_SQRTSS,
21381 IX86_BUILTIN_UNPCKHPS,
21382 IX86_BUILTIN_UNPCKLPS,
21384 IX86_BUILTIN_ANDPS,
21385 IX86_BUILTIN_ANDNPS,
21387 IX86_BUILTIN_XORPS,
21390 IX86_BUILTIN_LDMXCSR,
21391 IX86_BUILTIN_STMXCSR,
21392 IX86_BUILTIN_SFENCE,
21394 /* 3DNow! Original */
21395 IX86_BUILTIN_FEMMS,
21396 IX86_BUILTIN_PAVGUSB,
21397 IX86_BUILTIN_PF2ID,
21398 IX86_BUILTIN_PFACC,
21399 IX86_BUILTIN_PFADD,
21400 IX86_BUILTIN_PFCMPEQ,
21401 IX86_BUILTIN_PFCMPGE,
21402 IX86_BUILTIN_PFCMPGT,
21403 IX86_BUILTIN_PFMAX,
21404 IX86_BUILTIN_PFMIN,
21405 IX86_BUILTIN_PFMUL,
21406 IX86_BUILTIN_PFRCP,
21407 IX86_BUILTIN_PFRCPIT1,
21408 IX86_BUILTIN_PFRCPIT2,
21409 IX86_BUILTIN_PFRSQIT1,
21410 IX86_BUILTIN_PFRSQRT,
21411 IX86_BUILTIN_PFSUB,
21412 IX86_BUILTIN_PFSUBR,
21413 IX86_BUILTIN_PI2FD,
21414 IX86_BUILTIN_PMULHRW,
21416 /* 3DNow! Athlon Extensions */
21417 IX86_BUILTIN_PF2IW,
21418 IX86_BUILTIN_PFNACC,
21419 IX86_BUILTIN_PFPNACC,
21420 IX86_BUILTIN_PI2FW,
21421 IX86_BUILTIN_PSWAPDSI,
21422 IX86_BUILTIN_PSWAPDSF,
21425 IX86_BUILTIN_ADDPD,
21426 IX86_BUILTIN_ADDSD,
21427 IX86_BUILTIN_DIVPD,
21428 IX86_BUILTIN_DIVSD,
21429 IX86_BUILTIN_MULPD,
21430 IX86_BUILTIN_MULSD,
21431 IX86_BUILTIN_SUBPD,
21432 IX86_BUILTIN_SUBSD,
21434 IX86_BUILTIN_CMPEQPD,
21435 IX86_BUILTIN_CMPLTPD,
21436 IX86_BUILTIN_CMPLEPD,
21437 IX86_BUILTIN_CMPGTPD,
21438 IX86_BUILTIN_CMPGEPD,
21439 IX86_BUILTIN_CMPNEQPD,
21440 IX86_BUILTIN_CMPNLTPD,
21441 IX86_BUILTIN_CMPNLEPD,
21442 IX86_BUILTIN_CMPNGTPD,
21443 IX86_BUILTIN_CMPNGEPD,
21444 IX86_BUILTIN_CMPORDPD,
21445 IX86_BUILTIN_CMPUNORDPD,
21446 IX86_BUILTIN_CMPEQSD,
21447 IX86_BUILTIN_CMPLTSD,
21448 IX86_BUILTIN_CMPLESD,
21449 IX86_BUILTIN_CMPNEQSD,
21450 IX86_BUILTIN_CMPNLTSD,
21451 IX86_BUILTIN_CMPNLESD,
21452 IX86_BUILTIN_CMPORDSD,
21453 IX86_BUILTIN_CMPUNORDSD,
21455 IX86_BUILTIN_COMIEQSD,
21456 IX86_BUILTIN_COMILTSD,
21457 IX86_BUILTIN_COMILESD,
21458 IX86_BUILTIN_COMIGTSD,
21459 IX86_BUILTIN_COMIGESD,
21460 IX86_BUILTIN_COMINEQSD,
21461 IX86_BUILTIN_UCOMIEQSD,
21462 IX86_BUILTIN_UCOMILTSD,
21463 IX86_BUILTIN_UCOMILESD,
21464 IX86_BUILTIN_UCOMIGTSD,
21465 IX86_BUILTIN_UCOMIGESD,
21466 IX86_BUILTIN_UCOMINEQSD,
21468 IX86_BUILTIN_MAXPD,
21469 IX86_BUILTIN_MAXSD,
21470 IX86_BUILTIN_MINPD,
21471 IX86_BUILTIN_MINSD,
21473 IX86_BUILTIN_ANDPD,
21474 IX86_BUILTIN_ANDNPD,
21476 IX86_BUILTIN_XORPD,
21478 IX86_BUILTIN_SQRTPD,
21479 IX86_BUILTIN_SQRTSD,
21481 IX86_BUILTIN_UNPCKHPD,
21482 IX86_BUILTIN_UNPCKLPD,
21484 IX86_BUILTIN_SHUFPD,
21486 IX86_BUILTIN_LOADUPD,
21487 IX86_BUILTIN_STOREUPD,
21488 IX86_BUILTIN_MOVSD,
21490 IX86_BUILTIN_LOADHPD,
21491 IX86_BUILTIN_LOADLPD,
21493 IX86_BUILTIN_CVTDQ2PD,
21494 IX86_BUILTIN_CVTDQ2PS,
21496 IX86_BUILTIN_CVTPD2DQ,
21497 IX86_BUILTIN_CVTPD2PI,
21498 IX86_BUILTIN_CVTPD2PS,
21499 IX86_BUILTIN_CVTTPD2DQ,
21500 IX86_BUILTIN_CVTTPD2PI,
21502 IX86_BUILTIN_CVTPI2PD,
21503 IX86_BUILTIN_CVTSI2SD,
21504 IX86_BUILTIN_CVTSI642SD,
21506 IX86_BUILTIN_CVTSD2SI,
21507 IX86_BUILTIN_CVTSD2SI64,
21508 IX86_BUILTIN_CVTSD2SS,
21509 IX86_BUILTIN_CVTSS2SD,
21510 IX86_BUILTIN_CVTTSD2SI,
21511 IX86_BUILTIN_CVTTSD2SI64,
21513 IX86_BUILTIN_CVTPS2DQ,
21514 IX86_BUILTIN_CVTPS2PD,
21515 IX86_BUILTIN_CVTTPS2DQ,
21517 IX86_BUILTIN_MOVNTI,
21518 IX86_BUILTIN_MOVNTPD,
21519 IX86_BUILTIN_MOVNTDQ,
21521 IX86_BUILTIN_MOVQ128,
21524 IX86_BUILTIN_MASKMOVDQU,
21525 IX86_BUILTIN_MOVMSKPD,
21526 IX86_BUILTIN_PMOVMSKB128,
21528 IX86_BUILTIN_PACKSSWB128,
21529 IX86_BUILTIN_PACKSSDW128,
21530 IX86_BUILTIN_PACKUSWB128,
21532 IX86_BUILTIN_PADDB128,
21533 IX86_BUILTIN_PADDW128,
21534 IX86_BUILTIN_PADDD128,
21535 IX86_BUILTIN_PADDQ128,
21536 IX86_BUILTIN_PADDSB128,
21537 IX86_BUILTIN_PADDSW128,
21538 IX86_BUILTIN_PADDUSB128,
21539 IX86_BUILTIN_PADDUSW128,
21540 IX86_BUILTIN_PSUBB128,
21541 IX86_BUILTIN_PSUBW128,
21542 IX86_BUILTIN_PSUBD128,
21543 IX86_BUILTIN_PSUBQ128,
21544 IX86_BUILTIN_PSUBSB128,
21545 IX86_BUILTIN_PSUBSW128,
21546 IX86_BUILTIN_PSUBUSB128,
21547 IX86_BUILTIN_PSUBUSW128,
21549 IX86_BUILTIN_PAND128,
21550 IX86_BUILTIN_PANDN128,
21551 IX86_BUILTIN_POR128,
21552 IX86_BUILTIN_PXOR128,
21554 IX86_BUILTIN_PAVGB128,
21555 IX86_BUILTIN_PAVGW128,
21557 IX86_BUILTIN_PCMPEQB128,
21558 IX86_BUILTIN_PCMPEQW128,
21559 IX86_BUILTIN_PCMPEQD128,
21560 IX86_BUILTIN_PCMPGTB128,
21561 IX86_BUILTIN_PCMPGTW128,
21562 IX86_BUILTIN_PCMPGTD128,
21564 IX86_BUILTIN_PMADDWD128,
21566 IX86_BUILTIN_PMAXSW128,
21567 IX86_BUILTIN_PMAXUB128,
21568 IX86_BUILTIN_PMINSW128,
21569 IX86_BUILTIN_PMINUB128,
21571 IX86_BUILTIN_PMULUDQ,
21572 IX86_BUILTIN_PMULUDQ128,
21573 IX86_BUILTIN_PMULHUW128,
21574 IX86_BUILTIN_PMULHW128,
21575 IX86_BUILTIN_PMULLW128,
21577 IX86_BUILTIN_PSADBW128,
21578 IX86_BUILTIN_PSHUFHW,
21579 IX86_BUILTIN_PSHUFLW,
21580 IX86_BUILTIN_PSHUFD,
21582 IX86_BUILTIN_PSLLDQI128,
21583 IX86_BUILTIN_PSLLWI128,
21584 IX86_BUILTIN_PSLLDI128,
21585 IX86_BUILTIN_PSLLQI128,
21586 IX86_BUILTIN_PSRAWI128,
21587 IX86_BUILTIN_PSRADI128,
21588 IX86_BUILTIN_PSRLDQI128,
21589 IX86_BUILTIN_PSRLWI128,
21590 IX86_BUILTIN_PSRLDI128,
21591 IX86_BUILTIN_PSRLQI128,
21593 IX86_BUILTIN_PSLLDQ128,
21594 IX86_BUILTIN_PSLLW128,
21595 IX86_BUILTIN_PSLLD128,
21596 IX86_BUILTIN_PSLLQ128,
21597 IX86_BUILTIN_PSRAW128,
21598 IX86_BUILTIN_PSRAD128,
21599 IX86_BUILTIN_PSRLW128,
21600 IX86_BUILTIN_PSRLD128,
21601 IX86_BUILTIN_PSRLQ128,
21603 IX86_BUILTIN_PUNPCKHBW128,
21604 IX86_BUILTIN_PUNPCKHWD128,
21605 IX86_BUILTIN_PUNPCKHDQ128,
21606 IX86_BUILTIN_PUNPCKHQDQ128,
21607 IX86_BUILTIN_PUNPCKLBW128,
21608 IX86_BUILTIN_PUNPCKLWD128,
21609 IX86_BUILTIN_PUNPCKLDQ128,
21610 IX86_BUILTIN_PUNPCKLQDQ128,
21612 IX86_BUILTIN_CLFLUSH,
21613 IX86_BUILTIN_MFENCE,
21614 IX86_BUILTIN_LFENCE,
21616 IX86_BUILTIN_BSRSI,
21617 IX86_BUILTIN_BSRDI,
21618 IX86_BUILTIN_RDPMC,
21619 IX86_BUILTIN_RDTSC,
21620 IX86_BUILTIN_RDTSCP,
21621 IX86_BUILTIN_ROLQI,
21622 IX86_BUILTIN_ROLHI,
21623 IX86_BUILTIN_RORQI,
21624 IX86_BUILTIN_RORHI,
21627 IX86_BUILTIN_ADDSUBPS,
21628 IX86_BUILTIN_HADDPS,
21629 IX86_BUILTIN_HSUBPS,
21630 IX86_BUILTIN_MOVSHDUP,
21631 IX86_BUILTIN_MOVSLDUP,
21632 IX86_BUILTIN_ADDSUBPD,
21633 IX86_BUILTIN_HADDPD,
21634 IX86_BUILTIN_HSUBPD,
21635 IX86_BUILTIN_LDDQU,
21637 IX86_BUILTIN_MONITOR,
21638 IX86_BUILTIN_MWAIT,
21641 IX86_BUILTIN_PHADDW,
21642 IX86_BUILTIN_PHADDD,
21643 IX86_BUILTIN_PHADDSW,
21644 IX86_BUILTIN_PHSUBW,
21645 IX86_BUILTIN_PHSUBD,
21646 IX86_BUILTIN_PHSUBSW,
21647 IX86_BUILTIN_PMADDUBSW,
21648 IX86_BUILTIN_PMULHRSW,
21649 IX86_BUILTIN_PSHUFB,
21650 IX86_BUILTIN_PSIGNB,
21651 IX86_BUILTIN_PSIGNW,
21652 IX86_BUILTIN_PSIGND,
21653 IX86_BUILTIN_PALIGNR,
21654 IX86_BUILTIN_PABSB,
21655 IX86_BUILTIN_PABSW,
21656 IX86_BUILTIN_PABSD,
21658 IX86_BUILTIN_PHADDW128,
21659 IX86_BUILTIN_PHADDD128,
21660 IX86_BUILTIN_PHADDSW128,
21661 IX86_BUILTIN_PHSUBW128,
21662 IX86_BUILTIN_PHSUBD128,
21663 IX86_BUILTIN_PHSUBSW128,
21664 IX86_BUILTIN_PMADDUBSW128,
21665 IX86_BUILTIN_PMULHRSW128,
21666 IX86_BUILTIN_PSHUFB128,
21667 IX86_BUILTIN_PSIGNB128,
21668 IX86_BUILTIN_PSIGNW128,
21669 IX86_BUILTIN_PSIGND128,
21670 IX86_BUILTIN_PALIGNR128,
21671 IX86_BUILTIN_PABSB128,
21672 IX86_BUILTIN_PABSW128,
21673 IX86_BUILTIN_PABSD128,
21675 /* AMDFAM10 - SSE4A New Instructions. */
21676 IX86_BUILTIN_MOVNTSD,
21677 IX86_BUILTIN_MOVNTSS,
21678 IX86_BUILTIN_EXTRQI,
21679 IX86_BUILTIN_EXTRQ,
21680 IX86_BUILTIN_INSERTQI,
21681 IX86_BUILTIN_INSERTQ,
21684 IX86_BUILTIN_BLENDPD,
21685 IX86_BUILTIN_BLENDPS,
21686 IX86_BUILTIN_BLENDVPD,
21687 IX86_BUILTIN_BLENDVPS,
21688 IX86_BUILTIN_PBLENDVB128,
21689 IX86_BUILTIN_PBLENDW128,
21694 IX86_BUILTIN_INSERTPS128,
21696 IX86_BUILTIN_MOVNTDQA,
21697 IX86_BUILTIN_MPSADBW128,
21698 IX86_BUILTIN_PACKUSDW128,
21699 IX86_BUILTIN_PCMPEQQ,
21700 IX86_BUILTIN_PHMINPOSUW128,
21702 IX86_BUILTIN_PMAXSB128,
21703 IX86_BUILTIN_PMAXSD128,
21704 IX86_BUILTIN_PMAXUD128,
21705 IX86_BUILTIN_PMAXUW128,
21707 IX86_BUILTIN_PMINSB128,
21708 IX86_BUILTIN_PMINSD128,
21709 IX86_BUILTIN_PMINUD128,
21710 IX86_BUILTIN_PMINUW128,
21712 IX86_BUILTIN_PMOVSXBW128,
21713 IX86_BUILTIN_PMOVSXBD128,
21714 IX86_BUILTIN_PMOVSXBQ128,
21715 IX86_BUILTIN_PMOVSXWD128,
21716 IX86_BUILTIN_PMOVSXWQ128,
21717 IX86_BUILTIN_PMOVSXDQ128,
21719 IX86_BUILTIN_PMOVZXBW128,
21720 IX86_BUILTIN_PMOVZXBD128,
21721 IX86_BUILTIN_PMOVZXBQ128,
21722 IX86_BUILTIN_PMOVZXWD128,
21723 IX86_BUILTIN_PMOVZXWQ128,
21724 IX86_BUILTIN_PMOVZXDQ128,
21726 IX86_BUILTIN_PMULDQ128,
21727 IX86_BUILTIN_PMULLD128,
21729 IX86_BUILTIN_ROUNDPD,
21730 IX86_BUILTIN_ROUNDPS,
21731 IX86_BUILTIN_ROUNDSD,
21732 IX86_BUILTIN_ROUNDSS,
21734 IX86_BUILTIN_PTESTZ,
21735 IX86_BUILTIN_PTESTC,
21736 IX86_BUILTIN_PTESTNZC,
21738 IX86_BUILTIN_VEC_INIT_V2SI,
21739 IX86_BUILTIN_VEC_INIT_V4HI,
21740 IX86_BUILTIN_VEC_INIT_V8QI,
21741 IX86_BUILTIN_VEC_EXT_V2DF,
21742 IX86_BUILTIN_VEC_EXT_V2DI,
21743 IX86_BUILTIN_VEC_EXT_V4SF,
21744 IX86_BUILTIN_VEC_EXT_V4SI,
21745 IX86_BUILTIN_VEC_EXT_V8HI,
21746 IX86_BUILTIN_VEC_EXT_V2SI,
21747 IX86_BUILTIN_VEC_EXT_V4HI,
21748 IX86_BUILTIN_VEC_EXT_V16QI,
21749 IX86_BUILTIN_VEC_SET_V2DI,
21750 IX86_BUILTIN_VEC_SET_V4SF,
21751 IX86_BUILTIN_VEC_SET_V4SI,
21752 IX86_BUILTIN_VEC_SET_V8HI,
21753 IX86_BUILTIN_VEC_SET_V4HI,
21754 IX86_BUILTIN_VEC_SET_V16QI,
21756 IX86_BUILTIN_VEC_PACK_SFIX,
21759 IX86_BUILTIN_CRC32QI,
21760 IX86_BUILTIN_CRC32HI,
21761 IX86_BUILTIN_CRC32SI,
21762 IX86_BUILTIN_CRC32DI,
21764 IX86_BUILTIN_PCMPESTRI128,
21765 IX86_BUILTIN_PCMPESTRM128,
21766 IX86_BUILTIN_PCMPESTRA128,
21767 IX86_BUILTIN_PCMPESTRC128,
21768 IX86_BUILTIN_PCMPESTRO128,
21769 IX86_BUILTIN_PCMPESTRS128,
21770 IX86_BUILTIN_PCMPESTRZ128,
21771 IX86_BUILTIN_PCMPISTRI128,
21772 IX86_BUILTIN_PCMPISTRM128,
21773 IX86_BUILTIN_PCMPISTRA128,
21774 IX86_BUILTIN_PCMPISTRC128,
21775 IX86_BUILTIN_PCMPISTRO128,
21776 IX86_BUILTIN_PCMPISTRS128,
21777 IX86_BUILTIN_PCMPISTRZ128,
21779 IX86_BUILTIN_PCMPGTQ,
21781 /* AES instructions */
21782 IX86_BUILTIN_AESENC128,
21783 IX86_BUILTIN_AESENCLAST128,
21784 IX86_BUILTIN_AESDEC128,
21785 IX86_BUILTIN_AESDECLAST128,
21786 IX86_BUILTIN_AESIMC128,
21787 IX86_BUILTIN_AESKEYGENASSIST128,
21789 /* PCLMUL instruction */
21790 IX86_BUILTIN_PCLMULQDQ128,
21793 IX86_BUILTIN_ADDPD256,
21794 IX86_BUILTIN_ADDPS256,
21795 IX86_BUILTIN_ADDSUBPD256,
21796 IX86_BUILTIN_ADDSUBPS256,
21797 IX86_BUILTIN_ANDPD256,
21798 IX86_BUILTIN_ANDPS256,
21799 IX86_BUILTIN_ANDNPD256,
21800 IX86_BUILTIN_ANDNPS256,
21801 IX86_BUILTIN_BLENDPD256,
21802 IX86_BUILTIN_BLENDPS256,
21803 IX86_BUILTIN_BLENDVPD256,
21804 IX86_BUILTIN_BLENDVPS256,
21805 IX86_BUILTIN_DIVPD256,
21806 IX86_BUILTIN_DIVPS256,
21807 IX86_BUILTIN_DPPS256,
21808 IX86_BUILTIN_HADDPD256,
21809 IX86_BUILTIN_HADDPS256,
21810 IX86_BUILTIN_HSUBPD256,
21811 IX86_BUILTIN_HSUBPS256,
21812 IX86_BUILTIN_MAXPD256,
21813 IX86_BUILTIN_MAXPS256,
21814 IX86_BUILTIN_MINPD256,
21815 IX86_BUILTIN_MINPS256,
21816 IX86_BUILTIN_MULPD256,
21817 IX86_BUILTIN_MULPS256,
21818 IX86_BUILTIN_ORPD256,
21819 IX86_BUILTIN_ORPS256,
21820 IX86_BUILTIN_SHUFPD256,
21821 IX86_BUILTIN_SHUFPS256,
21822 IX86_BUILTIN_SUBPD256,
21823 IX86_BUILTIN_SUBPS256,
21824 IX86_BUILTIN_XORPD256,
21825 IX86_BUILTIN_XORPS256,
21826 IX86_BUILTIN_CMPSD,
21827 IX86_BUILTIN_CMPSS,
21828 IX86_BUILTIN_CMPPD,
21829 IX86_BUILTIN_CMPPS,
21830 IX86_BUILTIN_CMPPD256,
21831 IX86_BUILTIN_CMPPS256,
21832 IX86_BUILTIN_CVTDQ2PD256,
21833 IX86_BUILTIN_CVTDQ2PS256,
21834 IX86_BUILTIN_CVTPD2PS256,
21835 IX86_BUILTIN_CVTPS2DQ256,
21836 IX86_BUILTIN_CVTPS2PD256,
21837 IX86_BUILTIN_CVTTPD2DQ256,
21838 IX86_BUILTIN_CVTPD2DQ256,
21839 IX86_BUILTIN_CVTTPS2DQ256,
21840 IX86_BUILTIN_EXTRACTF128PD256,
21841 IX86_BUILTIN_EXTRACTF128PS256,
21842 IX86_BUILTIN_EXTRACTF128SI256,
21843 IX86_BUILTIN_VZEROALL,
21844 IX86_BUILTIN_VZEROUPPER,
21845 IX86_BUILTIN_VPERMILVARPD,
21846 IX86_BUILTIN_VPERMILVARPS,
21847 IX86_BUILTIN_VPERMILVARPD256,
21848 IX86_BUILTIN_VPERMILVARPS256,
21849 IX86_BUILTIN_VPERMILPD,
21850 IX86_BUILTIN_VPERMILPS,
21851 IX86_BUILTIN_VPERMILPD256,
21852 IX86_BUILTIN_VPERMILPS256,
21853 IX86_BUILTIN_VPERMIL2PD,
21854 IX86_BUILTIN_VPERMIL2PS,
21855 IX86_BUILTIN_VPERMIL2PD256,
21856 IX86_BUILTIN_VPERMIL2PS256,
21857 IX86_BUILTIN_VPERM2F128PD256,
21858 IX86_BUILTIN_VPERM2F128PS256,
21859 IX86_BUILTIN_VPERM2F128SI256,
21860 IX86_BUILTIN_VBROADCASTSS,
21861 IX86_BUILTIN_VBROADCASTSD256,
21862 IX86_BUILTIN_VBROADCASTSS256,
21863 IX86_BUILTIN_VBROADCASTPD256,
21864 IX86_BUILTIN_VBROADCASTPS256,
21865 IX86_BUILTIN_VINSERTF128PD256,
21866 IX86_BUILTIN_VINSERTF128PS256,
21867 IX86_BUILTIN_VINSERTF128SI256,
21868 IX86_BUILTIN_LOADUPD256,
21869 IX86_BUILTIN_LOADUPS256,
21870 IX86_BUILTIN_STOREUPD256,
21871 IX86_BUILTIN_STOREUPS256,
21872 IX86_BUILTIN_LDDQU256,
21873 IX86_BUILTIN_MOVNTDQ256,
21874 IX86_BUILTIN_MOVNTPD256,
21875 IX86_BUILTIN_MOVNTPS256,
21876 IX86_BUILTIN_LOADDQU256,
21877 IX86_BUILTIN_STOREDQU256,
21878 IX86_BUILTIN_MASKLOADPD,
21879 IX86_BUILTIN_MASKLOADPS,
21880 IX86_BUILTIN_MASKSTOREPD,
21881 IX86_BUILTIN_MASKSTOREPS,
21882 IX86_BUILTIN_MASKLOADPD256,
21883 IX86_BUILTIN_MASKLOADPS256,
21884 IX86_BUILTIN_MASKSTOREPD256,
21885 IX86_BUILTIN_MASKSTOREPS256,
21886 IX86_BUILTIN_MOVSHDUP256,
21887 IX86_BUILTIN_MOVSLDUP256,
21888 IX86_BUILTIN_MOVDDUP256,
21890 IX86_BUILTIN_SQRTPD256,
21891 IX86_BUILTIN_SQRTPS256,
21892 IX86_BUILTIN_SQRTPS_NR256,
21893 IX86_BUILTIN_RSQRTPS256,
21894 IX86_BUILTIN_RSQRTPS_NR256,
21896 IX86_BUILTIN_RCPPS256,
21898 IX86_BUILTIN_ROUNDPD256,
21899 IX86_BUILTIN_ROUNDPS256,
21901 IX86_BUILTIN_UNPCKHPD256,
21902 IX86_BUILTIN_UNPCKLPD256,
21903 IX86_BUILTIN_UNPCKHPS256,
21904 IX86_BUILTIN_UNPCKLPS256,
21906 IX86_BUILTIN_SI256_SI,
21907 IX86_BUILTIN_PS256_PS,
21908 IX86_BUILTIN_PD256_PD,
21909 IX86_BUILTIN_SI_SI256,
21910 IX86_BUILTIN_PS_PS256,
21911 IX86_BUILTIN_PD_PD256,
21913 IX86_BUILTIN_VTESTZPD,
21914 IX86_BUILTIN_VTESTCPD,
21915 IX86_BUILTIN_VTESTNZCPD,
21916 IX86_BUILTIN_VTESTZPS,
21917 IX86_BUILTIN_VTESTCPS,
21918 IX86_BUILTIN_VTESTNZCPS,
21919 IX86_BUILTIN_VTESTZPD256,
21920 IX86_BUILTIN_VTESTCPD256,
21921 IX86_BUILTIN_VTESTNZCPD256,
21922 IX86_BUILTIN_VTESTZPS256,
21923 IX86_BUILTIN_VTESTCPS256,
21924 IX86_BUILTIN_VTESTNZCPS256,
21925 IX86_BUILTIN_PTESTZ256,
21926 IX86_BUILTIN_PTESTC256,
21927 IX86_BUILTIN_PTESTNZC256,
21929 IX86_BUILTIN_MOVMSKPD256,
21930 IX86_BUILTIN_MOVMSKPS256,
21932 /* TFmode support builtins. */
21934 IX86_BUILTIN_HUGE_VALQ,
21935 IX86_BUILTIN_FABSQ,
21936 IX86_BUILTIN_COPYSIGNQ,
21938 /* Vectorizer support builtins. */
21939 IX86_BUILTIN_CPYSGNPS,
21940 IX86_BUILTIN_CPYSGNPD,
21942 IX86_BUILTIN_CVTUDQ2PS,
21944 IX86_BUILTIN_VEC_PERM_V2DF,
21945 IX86_BUILTIN_VEC_PERM_V4SF,
21946 IX86_BUILTIN_VEC_PERM_V2DI,
21947 IX86_BUILTIN_VEC_PERM_V4SI,
21948 IX86_BUILTIN_VEC_PERM_V8HI,
21949 IX86_BUILTIN_VEC_PERM_V16QI,
21950 IX86_BUILTIN_VEC_PERM_V2DI_U,
21951 IX86_BUILTIN_VEC_PERM_V4SI_U,
21952 IX86_BUILTIN_VEC_PERM_V8HI_U,
21953 IX86_BUILTIN_VEC_PERM_V16QI_U,
21954 IX86_BUILTIN_VEC_PERM_V4DF,
21955 IX86_BUILTIN_VEC_PERM_V8SF,
21957 /* FMA4 and XOP instructions. */
21958 IX86_BUILTIN_VFMADDSS,
21959 IX86_BUILTIN_VFMADDSD,
21960 IX86_BUILTIN_VFMADDPS,
21961 IX86_BUILTIN_VFMADDPD,
21962 IX86_BUILTIN_VFMSUBSS,
21963 IX86_BUILTIN_VFMSUBSD,
21964 IX86_BUILTIN_VFMSUBPS,
21965 IX86_BUILTIN_VFMSUBPD,
21966 IX86_BUILTIN_VFMADDSUBPS,
21967 IX86_BUILTIN_VFMADDSUBPD,
21968 IX86_BUILTIN_VFMSUBADDPS,
21969 IX86_BUILTIN_VFMSUBADDPD,
21970 IX86_BUILTIN_VFNMADDSS,
21971 IX86_BUILTIN_VFNMADDSD,
21972 IX86_BUILTIN_VFNMADDPS,
21973 IX86_BUILTIN_VFNMADDPD,
21974 IX86_BUILTIN_VFNMSUBSS,
21975 IX86_BUILTIN_VFNMSUBSD,
21976 IX86_BUILTIN_VFNMSUBPS,
21977 IX86_BUILTIN_VFNMSUBPD,
21978 IX86_BUILTIN_VFMADDPS256,
21979 IX86_BUILTIN_VFMADDPD256,
21980 IX86_BUILTIN_VFMSUBPS256,
21981 IX86_BUILTIN_VFMSUBPD256,
21982 IX86_BUILTIN_VFMADDSUBPS256,
21983 IX86_BUILTIN_VFMADDSUBPD256,
21984 IX86_BUILTIN_VFMSUBADDPS256,
21985 IX86_BUILTIN_VFMSUBADDPD256,
21986 IX86_BUILTIN_VFNMADDPS256,
21987 IX86_BUILTIN_VFNMADDPD256,
21988 IX86_BUILTIN_VFNMSUBPS256,
21989 IX86_BUILTIN_VFNMSUBPD256,
21991 IX86_BUILTIN_VPCMOV,
21992 IX86_BUILTIN_VPCMOV_V2DI,
21993 IX86_BUILTIN_VPCMOV_V4SI,
21994 IX86_BUILTIN_VPCMOV_V8HI,
21995 IX86_BUILTIN_VPCMOV_V16QI,
21996 IX86_BUILTIN_VPCMOV_V4SF,
21997 IX86_BUILTIN_VPCMOV_V2DF,
21998 IX86_BUILTIN_VPCMOV256,
21999 IX86_BUILTIN_VPCMOV_V4DI256,
22000 IX86_BUILTIN_VPCMOV_V8SI256,
22001 IX86_BUILTIN_VPCMOV_V16HI256,
22002 IX86_BUILTIN_VPCMOV_V32QI256,
22003 IX86_BUILTIN_VPCMOV_V8SF256,
22004 IX86_BUILTIN_VPCMOV_V4DF256,
22006 IX86_BUILTIN_VPPERM,
22008 IX86_BUILTIN_VPMACSSWW,
22009 IX86_BUILTIN_VPMACSWW,
22010 IX86_BUILTIN_VPMACSSWD,
22011 IX86_BUILTIN_VPMACSWD,
22012 IX86_BUILTIN_VPMACSSDD,
22013 IX86_BUILTIN_VPMACSDD,
22014 IX86_BUILTIN_VPMACSSDQL,
22015 IX86_BUILTIN_VPMACSSDQH,
22016 IX86_BUILTIN_VPMACSDQL,
22017 IX86_BUILTIN_VPMACSDQH,
22018 IX86_BUILTIN_VPMADCSSWD,
22019 IX86_BUILTIN_VPMADCSWD,
22021 IX86_BUILTIN_VPHADDBW,
22022 IX86_BUILTIN_VPHADDBD,
22023 IX86_BUILTIN_VPHADDBQ,
22024 IX86_BUILTIN_VPHADDWD,
22025 IX86_BUILTIN_VPHADDWQ,
22026 IX86_BUILTIN_VPHADDDQ,
22027 IX86_BUILTIN_VPHADDUBW,
22028 IX86_BUILTIN_VPHADDUBD,
22029 IX86_BUILTIN_VPHADDUBQ,
22030 IX86_BUILTIN_VPHADDUWD,
22031 IX86_BUILTIN_VPHADDUWQ,
22032 IX86_BUILTIN_VPHADDUDQ,
22033 IX86_BUILTIN_VPHSUBBW,
22034 IX86_BUILTIN_VPHSUBWD,
22035 IX86_BUILTIN_VPHSUBDQ,
22037 IX86_BUILTIN_VPROTB,
22038 IX86_BUILTIN_VPROTW,
22039 IX86_BUILTIN_VPROTD,
22040 IX86_BUILTIN_VPROTQ,
22041 IX86_BUILTIN_VPROTB_IMM,
22042 IX86_BUILTIN_VPROTW_IMM,
22043 IX86_BUILTIN_VPROTD_IMM,
22044 IX86_BUILTIN_VPROTQ_IMM,
22046 IX86_BUILTIN_VPSHLB,
22047 IX86_BUILTIN_VPSHLW,
22048 IX86_BUILTIN_VPSHLD,
22049 IX86_BUILTIN_VPSHLQ,
22050 IX86_BUILTIN_VPSHAB,
22051 IX86_BUILTIN_VPSHAW,
22052 IX86_BUILTIN_VPSHAD,
22053 IX86_BUILTIN_VPSHAQ,
22055 IX86_BUILTIN_VFRCZSS,
22056 IX86_BUILTIN_VFRCZSD,
22057 IX86_BUILTIN_VFRCZPS,
22058 IX86_BUILTIN_VFRCZPD,
22059 IX86_BUILTIN_VFRCZPS256,
22060 IX86_BUILTIN_VFRCZPD256,
22062 IX86_BUILTIN_VPCOMEQUB,
22063 IX86_BUILTIN_VPCOMNEUB,
22064 IX86_BUILTIN_VPCOMLTUB,
22065 IX86_BUILTIN_VPCOMLEUB,
22066 IX86_BUILTIN_VPCOMGTUB,
22067 IX86_BUILTIN_VPCOMGEUB,
22068 IX86_BUILTIN_VPCOMFALSEUB,
22069 IX86_BUILTIN_VPCOMTRUEUB,
22071 IX86_BUILTIN_VPCOMEQUW,
22072 IX86_BUILTIN_VPCOMNEUW,
22073 IX86_BUILTIN_VPCOMLTUW,
22074 IX86_BUILTIN_VPCOMLEUW,
22075 IX86_BUILTIN_VPCOMGTUW,
22076 IX86_BUILTIN_VPCOMGEUW,
22077 IX86_BUILTIN_VPCOMFALSEUW,
22078 IX86_BUILTIN_VPCOMTRUEUW,
22080 IX86_BUILTIN_VPCOMEQUD,
22081 IX86_BUILTIN_VPCOMNEUD,
22082 IX86_BUILTIN_VPCOMLTUD,
22083 IX86_BUILTIN_VPCOMLEUD,
22084 IX86_BUILTIN_VPCOMGTUD,
22085 IX86_BUILTIN_VPCOMGEUD,
22086 IX86_BUILTIN_VPCOMFALSEUD,
22087 IX86_BUILTIN_VPCOMTRUEUD,
22089 IX86_BUILTIN_VPCOMEQUQ,
22090 IX86_BUILTIN_VPCOMNEUQ,
22091 IX86_BUILTIN_VPCOMLTUQ,
22092 IX86_BUILTIN_VPCOMLEUQ,
22093 IX86_BUILTIN_VPCOMGTUQ,
22094 IX86_BUILTIN_VPCOMGEUQ,
22095 IX86_BUILTIN_VPCOMFALSEUQ,
22096 IX86_BUILTIN_VPCOMTRUEUQ,
22098 IX86_BUILTIN_VPCOMEQB,
22099 IX86_BUILTIN_VPCOMNEB,
22100 IX86_BUILTIN_VPCOMLTB,
22101 IX86_BUILTIN_VPCOMLEB,
22102 IX86_BUILTIN_VPCOMGTB,
22103 IX86_BUILTIN_VPCOMGEB,
22104 IX86_BUILTIN_VPCOMFALSEB,
22105 IX86_BUILTIN_VPCOMTRUEB,
22107 IX86_BUILTIN_VPCOMEQW,
22108 IX86_BUILTIN_VPCOMNEW,
22109 IX86_BUILTIN_VPCOMLTW,
22110 IX86_BUILTIN_VPCOMLEW,
22111 IX86_BUILTIN_VPCOMGTW,
22112 IX86_BUILTIN_VPCOMGEW,
22113 IX86_BUILTIN_VPCOMFALSEW,
22114 IX86_BUILTIN_VPCOMTRUEW,
22116 IX86_BUILTIN_VPCOMEQD,
22117 IX86_BUILTIN_VPCOMNED,
22118 IX86_BUILTIN_VPCOMLTD,
22119 IX86_BUILTIN_VPCOMLED,
22120 IX86_BUILTIN_VPCOMGTD,
22121 IX86_BUILTIN_VPCOMGED,
22122 IX86_BUILTIN_VPCOMFALSED,
22123 IX86_BUILTIN_VPCOMTRUED,
22125 IX86_BUILTIN_VPCOMEQQ,
22126 IX86_BUILTIN_VPCOMNEQ,
22127 IX86_BUILTIN_VPCOMLTQ,
22128 IX86_BUILTIN_VPCOMLEQ,
22129 IX86_BUILTIN_VPCOMGTQ,
22130 IX86_BUILTIN_VPCOMGEQ,
22131 IX86_BUILTIN_VPCOMFALSEQ,
22132 IX86_BUILTIN_VPCOMTRUEQ,
22134 /* LWP instructions. */
22135 IX86_BUILTIN_LLWPCB,
22136 IX86_BUILTIN_SLWPCB,
22137 IX86_BUILTIN_LWPVAL32,
22138 IX86_BUILTIN_LWPVAL64,
22139 IX86_BUILTIN_LWPINS32,
22140 IX86_BUILTIN_LWPINS64,
22144 /* FSGSBASE instructions. */
22145 IX86_BUILTIN_RDFSBASE32,
22146 IX86_BUILTIN_RDFSBASE64,
22147 IX86_BUILTIN_RDGSBASE32,
22148 IX86_BUILTIN_RDGSBASE64,
22149 IX86_BUILTIN_WRFSBASE32,
22150 IX86_BUILTIN_WRFSBASE64,
22151 IX86_BUILTIN_WRGSBASE32,
22152 IX86_BUILTIN_WRGSBASE64,
22154 /* RDRND instructions. */
22155 IX86_BUILTIN_RDRAND16,
22156 IX86_BUILTIN_RDRAND32,
22157 IX86_BUILTIN_RDRAND64,
22159 /* F16C instructions. */
22160 IX86_BUILTIN_CVTPH2PS,
22161 IX86_BUILTIN_CVTPH2PS256,
22162 IX86_BUILTIN_CVTPS2PH,
22163 IX86_BUILTIN_CVTPS2PH256,
22168 /* Table for the ix86 builtin decls. */
22169 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
22171 /* Table of all of the builtin functions that are possible with different ISA's
22172 but are waiting to be built until a function is declared to use that
22174 struct builtin_isa {
22175 const char *name; /* function name */
22176 enum ix86_builtin_func_type tcode; /* type to use in the declaration */
22177 int isa; /* isa_flags this builtin is defined for */
22178 bool const_p; /* true if the declaration is constant */
22179 bool set_and_not_built_p;
22182 static struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
22185 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
22186 of which isa_flags to use in the ix86_builtins_isa array. Stores the
22187 function decl in the ix86_builtins array. Returns the function decl or
22188 NULL_TREE, if the builtin was not added.
22190 If the front end has a special hook for builtin functions, delay adding
22191 builtin functions that aren't in the current ISA until the ISA is changed
22192 with function specific optimization. Doing so, can save about 300K for the
22193 default compiler. When the builtin is expanded, check at that time whether
22196 If the front end doesn't have a special hook, record all builtins, even if
22197 it isn't an instruction set in the current ISA in case the user uses
22198 function specific options for a different ISA, so that we don't get scope
22199 errors if a builtin is added in the middle of a function scope. */
22202 def_builtin (int mask, const char *name, enum ix86_builtin_func_type tcode,
22203 enum ix86_builtins code)
22205 tree decl = NULL_TREE;
22207 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
22209 ix86_builtins_isa[(int) code].isa = mask;
22211 mask &= ~OPTION_MASK_ISA_64BIT;
22213 || (mask & ix86_isa_flags) != 0
22214 || (lang_hooks.builtin_function
22215 == lang_hooks.builtin_function_ext_scope))
22218 tree type = ix86_get_builtin_func_type (tcode);
22219 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
22221 ix86_builtins[(int) code] = decl;
22222 ix86_builtins_isa[(int) code].set_and_not_built_p = false;
22226 ix86_builtins[(int) code] = NULL_TREE;
22227 ix86_builtins_isa[(int) code].tcode = tcode;
22228 ix86_builtins_isa[(int) code].name = name;
22229 ix86_builtins_isa[(int) code].const_p = false;
22230 ix86_builtins_isa[(int) code].set_and_not_built_p = true;
22237 /* Like def_builtin, but also marks the function decl "const". */
22240 def_builtin_const (int mask, const char *name,
22241 enum ix86_builtin_func_type tcode, enum ix86_builtins code)
22243 tree decl = def_builtin (mask, name, tcode, code);
22245 TREE_READONLY (decl) = 1;
22247 ix86_builtins_isa[(int) code].const_p = true;
22252 /* Add any new builtin functions for a given ISA that may not have been
22253 declared. This saves a bit of space compared to adding all of the
22254 declarations to the tree, even if we didn't use them. */
22257 ix86_add_new_builtins (int isa)
22261 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
22263 if ((ix86_builtins_isa[i].isa & isa) != 0
22264 && ix86_builtins_isa[i].set_and_not_built_p)
22268 /* Don't define the builtin again. */
22269 ix86_builtins_isa[i].set_and_not_built_p = false;
22271 type = ix86_get_builtin_func_type (ix86_builtins_isa[i].tcode);
22272 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
22273 type, i, BUILT_IN_MD, NULL,
22276 ix86_builtins[i] = decl;
22277 if (ix86_builtins_isa[i].const_p)
22278 TREE_READONLY (decl) = 1;
22283 /* Bits for builtin_description.flag. */
22285 /* Set when we don't support the comparison natively, and should
22286 swap_comparison in order to support it. */
22287 #define BUILTIN_DESC_SWAP_OPERANDS 1
22289 struct builtin_description
22291 const unsigned int mask;
22292 const enum insn_code icode;
22293 const char *const name;
22294 const enum ix86_builtins code;
22295 const enum rtx_code comparison;
22299 static const struct builtin_description bdesc_comi[] =
22301 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
22302 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
22303 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
22304 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
22305 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
22306 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
22307 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
22308 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
22309 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
22310 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
22311 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
22312 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
22313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
22314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
22315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
22316 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
22317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
22318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
22319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
22320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
22321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
22322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
22323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
22324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
22327 static const struct builtin_description bdesc_pcmpestr[] =
22330 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
22331 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
22332 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
22333 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
22334 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
22335 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
22336 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
22339 static const struct builtin_description bdesc_pcmpistr[] =
22342 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
22343 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
22344 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
22345 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
22346 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
22347 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
22348 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
22351 /* Special builtins with variable number of arguments. */
22352 static const struct builtin_description bdesc_special_args[] =
22354 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtsc, "__builtin_ia32_rdtsc", IX86_BUILTIN_RDTSC, UNKNOWN, (int) UINT64_FTYPE_VOID },
22355 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdtscp, "__builtin_ia32_rdtscp", IX86_BUILTIN_RDTSCP, UNKNOWN, (int) UINT64_FTYPE_PUNSIGNED },
22358 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22361 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
22364 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22365 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22366 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22368 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22369 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
22370 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22371 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
22373 /* SSE or 3DNow!A */
22374 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22375 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG },
22378 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
22380 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22381 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
22382 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
22384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
22385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
22386 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22388 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22389 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
22392 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
22395 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
22398 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
22399 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
22402 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
22403 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, "__builtin_ia32_vzeroupper", IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
22405 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4sf, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
22406 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv4df, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22407 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_dupv8sf, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22408 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v4df, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
22409 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_v8sf, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
22411 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
22412 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
22413 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22414 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22415 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22416 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
22417 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
22419 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
22420 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
22421 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
22423 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
22424 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
22425 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
22426 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
22427 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
22428 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
22429 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
22430 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
22432 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_llwpcb, "__builtin_ia32_llwpcb", IX86_BUILTIN_LLWPCB, UNKNOWN, (int) VOID_FTYPE_PVOID },
22433 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_slwpcb, "__builtin_ia32_slwpcb", IX86_BUILTIN_SLWPCB, UNKNOWN, (int) PVOID_FTYPE_VOID },
22434 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvalsi3, "__builtin_ia32_lwpval32", IX86_BUILTIN_LWPVAL32, UNKNOWN, (int) VOID_FTYPE_UINT_UINT_UINT },
22435 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpvaldi3, "__builtin_ia32_lwpval64", IX86_BUILTIN_LWPVAL64, UNKNOWN, (int) VOID_FTYPE_UINT64_UINT_UINT },
22436 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinssi3, "__builtin_ia32_lwpins32", IX86_BUILTIN_LWPINS32, UNKNOWN, (int) UCHAR_FTYPE_UINT_UINT_UINT },
22437 { OPTION_MASK_ISA_LWP, CODE_FOR_lwp_lwpinsdi3, "__builtin_ia32_lwpins64", IX86_BUILTIN_LWPINS64, UNKNOWN, (int) UCHAR_FTYPE_UINT64_UINT_UINT },
22440 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasesi, "__builtin_ia32_rdfsbase32", IX86_BUILTIN_RDFSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22441 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdfsbasedi, "__builtin_ia32_rdfsbase64", IX86_BUILTIN_RDFSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22442 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasesi, "__builtin_ia32_rdgsbase32", IX86_BUILTIN_RDGSBASE32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22443 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_rdgsbasedi, "__builtin_ia32_rdgsbase64", IX86_BUILTIN_RDGSBASE64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22444 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasesi, "__builtin_ia32_wrfsbase32", IX86_BUILTIN_WRFSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22445 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrfsbasedi, "__builtin_ia32_wrfsbase64", IX86_BUILTIN_WRFSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22446 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasesi, "__builtin_ia32_wrgsbase32", IX86_BUILTIN_WRGSBASE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED },
22447 { OPTION_MASK_ISA_FSGSBASE | OPTION_MASK_ISA_64BIT, CODE_FOR_wrgsbasedi, "__builtin_ia32_wrgsbase64", IX86_BUILTIN_WRGSBASE64, UNKNOWN, (int) VOID_FTYPE_UINT64 },
22450 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandhi, "__builtin_ia32_rdrand16", IX86_BUILTIN_RDRAND16, UNKNOWN, (int) UINT16_FTYPE_VOID },
22451 { OPTION_MASK_ISA_RDRND, CODE_FOR_rdrandsi, "__builtin_ia32_rdrand32", IX86_BUILTIN_RDRAND32, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
22452 { OPTION_MASK_ISA_RDRND | OPTION_MASK_ISA_64BIT, CODE_FOR_rdranddi, "__builtin_ia32_rdrand64", IX86_BUILTIN_RDRAND64, UNKNOWN, (int) UINT64_FTYPE_VOID },
22455 /* Builtins with variable number of arguments. */
22456 static const struct builtin_description bdesc_args[] =
22458 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_bsr, "__builtin_ia32_bsrsi", IX86_BUILTIN_BSRSI, UNKNOWN, (int) INT_FTYPE_INT },
22459 { OPTION_MASK_ISA_64BIT, CODE_FOR_bsr_rex64, "__builtin_ia32_bsrdi", IX86_BUILTIN_BSRDI, UNKNOWN, (int) INT64_FTYPE_INT64 },
22460 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rdpmc, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNOWN, (int) UINT64_FTYPE_INT },
22461 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlqi3, "__builtin_ia32_rolqi", IX86_BUILTIN_ROLQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22462 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotlhi3, "__builtin_ia32_rolhi", IX86_BUILTIN_ROLHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22463 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", IX86_BUILTIN_RORQI, UNKNOWN, (int) UINT8_FTYPE_UINT8_INT },
22464 { ~OPTION_MASK_ISA_64BIT, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, UNKNOWN, (int) UINT16_FTYPE_UINT16_INT },
22467 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22468 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22469 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22470 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22471 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22472 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22474 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22475 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22476 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22477 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22478 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22479 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22480 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22481 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22483 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22484 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22486 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22487 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22488 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22489 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22491 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22492 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22493 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22494 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22495 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22496 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22498 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22499 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22500 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22501 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22502 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
22503 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
22505 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22506 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
22507 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
22509 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
22511 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22512 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22513 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22514 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22515 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22516 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22518 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22519 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22520 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
22521 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22522 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22523 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
22525 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
22526 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
22527 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
22528 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
22531 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22532 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22533 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22534 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22536 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22537 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22538 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22539 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22540 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22541 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
22542 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22543 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22544 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22545 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22546 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22547 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22548 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22549 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22550 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22553 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
22554 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
22555 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22556 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
22557 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22558 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
22561 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
22562 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22563 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22564 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22565 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22566 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22567 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22568 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22569 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22570 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
22571 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
22572 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
22574 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22576 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22577 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22578 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22579 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22580 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22581 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22582 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22583 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22585 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22586 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22587 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22588 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22589 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22590 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22591 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22592 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22593 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22594 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22595 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
22596 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22597 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
22598 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
22599 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
22600 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22601 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
22602 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
22603 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
22604 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22605 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
22606 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
22608 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22609 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22610 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22611 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22613 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22614 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22615 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22616 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22618 { OPTION_MASK_ISA_SSE, CODE_FOR_copysignv4sf3, "__builtin_ia32_copysignps", IX86_BUILTIN_CPYSGNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22620 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22621 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22622 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22623 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22624 { OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22626 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
22627 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
22628 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
22630 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
22632 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22633 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22634 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
22636 /* SSE MMX or 3Dnow!A */
22637 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22638 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22639 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22641 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22642 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22643 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22644 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22646 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
22647 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
22649 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
22652 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22654 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2df", IX86_BUILTIN_VEC_PERM_V2DF, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI },
22655 { OPTION_MASK_ISA_SSE, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4sf", IX86_BUILTIN_VEC_PERM_V4SF, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI },
22656 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di", IX86_BUILTIN_VEC_PERM_V2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI },
22657 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si", IX86_BUILTIN_VEC_PERM_V4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI },
22658 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi", IX86_BUILTIN_VEC_PERM_V8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI },
22659 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi", IX86_BUILTIN_VEC_PERM_V16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22660 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v2di_u", IX86_BUILTIN_VEC_PERM_V2DI_U, UNKNOWN, (int) V2UDI_FTYPE_V2UDI_V2UDI_V2UDI },
22661 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4si_u", IX86_BUILTIN_VEC_PERM_V4SI_U, UNKNOWN, (int) V4USI_FTYPE_V4USI_V4USI_V4USI },
22662 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8hi_u", IX86_BUILTIN_VEC_PERM_V8HI_U, UNKNOWN, (int) V8UHI_FTYPE_V8UHI_V8UHI_V8UHI },
22663 { OPTION_MASK_ISA_SSE2, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v16qi_u", IX86_BUILTIN_VEC_PERM_V16QI_U, UNKNOWN, (int) V16UQI_FTYPE_V16UQI_V16UQI_V16UQI },
22664 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v4df", IX86_BUILTIN_VEC_PERM_V4DF, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI },
22665 { OPTION_MASK_ISA_AVX, CODE_FOR_nothing, "__builtin_ia32_vec_perm_v8sf", IX86_BUILTIN_VEC_PERM_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI },
22667 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
22668 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
22669 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
22670 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
22671 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22672 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtudq2ps, "__builtin_ia32_cvtudq2ps", IX86_BUILTIN_CVTUDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
22674 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22675 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22676 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
22677 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
22678 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
22680 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
22682 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22683 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
22684 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22685 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
22687 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22688 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
22689 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
22691 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22692 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22693 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22694 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22695 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22696 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22697 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22698 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22700 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22701 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22702 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22703 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22704 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
22705 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22706 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22707 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22708 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
22711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
22713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
22714 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
22715 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22716 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
22717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
22718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
22719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
22721 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22722 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22723 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22724 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22726 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22727 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22728 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22729 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22731 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22733 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22734 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22735 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22737 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
22739 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22740 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22741 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22742 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22743 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22744 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22745 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22746 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22748 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22749 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22750 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22751 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22752 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22753 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22754 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22755 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22757 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22758 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
22760 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22761 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22762 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22763 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22765 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22766 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22768 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22769 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22770 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22771 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22772 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22773 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22775 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22776 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22777 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22778 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22780 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22781 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22782 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22783 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22784 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22785 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22786 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22787 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22789 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22790 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22791 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
22793 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22794 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
22796 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
22797 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22799 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
22801 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
22802 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
22803 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
22804 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
22806 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlv1ti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22807 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22808 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22809 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22810 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22811 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22812 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22814 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrv1ti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT_CONVERT },
22815 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22816 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22817 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
22818 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22819 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22820 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
22822 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
22823 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
22824 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
22825 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
22827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
22828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22829 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
22831 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
22833 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
22834 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
22836 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22839 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22840 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
22843 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
22844 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
22846 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22847 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22848 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22849 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22850 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
22851 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
22854 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
22855 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
22856 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22857 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
22858 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
22859 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
22861 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22862 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22863 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22864 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22865 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22866 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22867 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22868 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22869 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22870 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22871 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22872 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22873 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
22874 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
22875 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22876 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22877 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22878 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22879 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22880 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
22881 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22882 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
22883 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22884 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
22887 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT },
22888 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT },
22891 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22892 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22893 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
22894 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
22895 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22896 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22897 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22898 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
22899 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
22900 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
22902 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22903 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22904 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22905 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22906 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22907 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22908 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
22909 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
22910 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
22911 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
22912 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
22913 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
22914 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
22916 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
22917 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22918 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22919 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22920 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22921 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22922 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
22923 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22924 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22925 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
22926 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
22927 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
22930 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
22931 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
22932 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
22933 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
22935 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22936 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22937 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
22940 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22941 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
22942 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
22943 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
22944 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_CRC32 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
22947 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
22948 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
22949 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
22950 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22953 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
22954 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
22956 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22957 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22958 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22959 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
22962 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
22965 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22966 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22967 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22968 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22969 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22970 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22971 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22972 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22973 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22974 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22975 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22976 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22977 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22978 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22979 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22980 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22981 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22982 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22983 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22984 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22985 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22986 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22987 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22988 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22989 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
22990 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
22992 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
22993 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
22994 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
22995 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
22997 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
22998 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
22999 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
23000 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
23001 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23002 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23003 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23004 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23005 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23006 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
23007 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
23008 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23009 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23010 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
23011 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
23012 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
23013 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
23014 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
23015 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
23016 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23017 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
23018 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23019 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
23020 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
23021 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
23022 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
23023 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
23024 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
23025 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
23026 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23027 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23028 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
23029 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
23030 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
23032 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23033 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23034 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23036 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
23037 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23038 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23039 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23040 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23042 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
23044 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
23045 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
23047 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23048 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
23049 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23050 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
23052 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
23053 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
23054 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
23055 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
23056 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
23057 { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
23059 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23060 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23061 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
23062 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23063 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23064 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
23065 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23066 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23067 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
23068 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23069 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23070 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
23071 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23072 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23073 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
23075 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
23076 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
23078 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 },
23081 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps, "__builtin_ia32_vcvtph2ps", IX86_BUILTIN_CVTPH2PS, UNKNOWN, (int) V4SF_FTYPE_V8HI },
23082 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
23083 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
23084 { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
23087 /* FMA4 and XOP. */
23088 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT
23089 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT
23090 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT
23091 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT
23092 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF
23093 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF
23094 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF
23095 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF
23096 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI
23097 #define MULTI_ARG_3_SI V4SI_FTYPE_V4SI_V4SI_V4SI
23098 #define MULTI_ARG_3_SI_DI V4SI_FTYPE_V4SI_V4SI_V2DI
23099 #define MULTI_ARG_3_HI V8HI_FTYPE_V8HI_V8HI_V8HI
23100 #define MULTI_ARG_3_HI_SI V8HI_FTYPE_V8HI_V8HI_V4SI
23101 #define MULTI_ARG_3_QI V16QI_FTYPE_V16QI_V16QI_V16QI
23102 #define MULTI_ARG_3_DI2 V4DI_FTYPE_V4DI_V4DI_V4DI
23103 #define MULTI_ARG_3_SI2 V8SI_FTYPE_V8SI_V8SI_V8SI
23104 #define MULTI_ARG_3_HI2 V16HI_FTYPE_V16HI_V16HI_V16HI
23105 #define MULTI_ARG_3_QI2 V32QI_FTYPE_V32QI_V32QI_V32QI
23106 #define MULTI_ARG_2_SF V4SF_FTYPE_V4SF_V4SF
23107 #define MULTI_ARG_2_DF V2DF_FTYPE_V2DF_V2DF
23108 #define MULTI_ARG_2_DI V2DI_FTYPE_V2DI_V2DI
23109 #define MULTI_ARG_2_SI V4SI_FTYPE_V4SI_V4SI
23110 #define MULTI_ARG_2_HI V8HI_FTYPE_V8HI_V8HI
23111 #define MULTI_ARG_2_QI V16QI_FTYPE_V16QI_V16QI
23112 #define MULTI_ARG_2_DI_IMM V2DI_FTYPE_V2DI_SI
23113 #define MULTI_ARG_2_SI_IMM V4SI_FTYPE_V4SI_SI
23114 #define MULTI_ARG_2_HI_IMM V8HI_FTYPE_V8HI_SI
23115 #define MULTI_ARG_2_QI_IMM V16QI_FTYPE_V16QI_SI
23116 #define MULTI_ARG_2_DI_CMP V2DI_FTYPE_V2DI_V2DI_CMP
23117 #define MULTI_ARG_2_SI_CMP V4SI_FTYPE_V4SI_V4SI_CMP
23118 #define MULTI_ARG_2_HI_CMP V8HI_FTYPE_V8HI_V8HI_CMP
23119 #define MULTI_ARG_2_QI_CMP V16QI_FTYPE_V16QI_V16QI_CMP
23120 #define MULTI_ARG_2_SF_TF V4SF_FTYPE_V4SF_V4SF_TF
23121 #define MULTI_ARG_2_DF_TF V2DF_FTYPE_V2DF_V2DF_TF
23122 #define MULTI_ARG_2_DI_TF V2DI_FTYPE_V2DI_V2DI_TF
23123 #define MULTI_ARG_2_SI_TF V4SI_FTYPE_V4SI_V4SI_TF
23124 #define MULTI_ARG_2_HI_TF V8HI_FTYPE_V8HI_V8HI_TF
23125 #define MULTI_ARG_2_QI_TF V16QI_FTYPE_V16QI_V16QI_TF
23126 #define MULTI_ARG_1_SF V4SF_FTYPE_V4SF
23127 #define MULTI_ARG_1_DF V2DF_FTYPE_V2DF
23128 #define MULTI_ARG_1_SF2 V8SF_FTYPE_V8SF
23129 #define MULTI_ARG_1_DF2 V4DF_FTYPE_V4DF
23130 #define MULTI_ARG_1_DI V2DI_FTYPE_V2DI
23131 #define MULTI_ARG_1_SI V4SI_FTYPE_V4SI
23132 #define MULTI_ARG_1_HI V8HI_FTYPE_V8HI
23133 #define MULTI_ARG_1_QI V16QI_FTYPE_V16QI
23134 #define MULTI_ARG_1_SI_DI V2DI_FTYPE_V4SI
23135 #define MULTI_ARG_1_HI_DI V2DI_FTYPE_V8HI
23136 #define MULTI_ARG_1_HI_SI V4SI_FTYPE_V8HI
23137 #define MULTI_ARG_1_QI_DI V2DI_FTYPE_V16QI
23138 #define MULTI_ARG_1_QI_SI V4SI_FTYPE_V16QI
23139 #define MULTI_ARG_1_QI_HI V8HI_FTYPE_V16QI
23141 static const struct builtin_description bdesc_multi_arg[] =
23143 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv4sf4, "__builtin_ia32_vfmaddss", IX86_BUILTIN_VFMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23144 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmaddv2df4, "__builtin_ia32_vfmaddsd", IX86_BUILTIN_VFMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23145 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4sf4, "__builtin_ia32_vfmaddps", IX86_BUILTIN_VFMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23146 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv2df4, "__builtin_ia32_vfmaddpd", IX86_BUILTIN_VFMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23147 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv4sf4, "__builtin_ia32_vfmsubss", IX86_BUILTIN_VFMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23148 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfmsubv2df4, "__builtin_ia32_vfmsubsd", IX86_BUILTIN_VFMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23149 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4sf4, "__builtin_ia32_vfmsubps", IX86_BUILTIN_VFMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23150 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv2df4, "__builtin_ia32_vfmsubpd", IX86_BUILTIN_VFMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23152 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv4sf4, "__builtin_ia32_vfnmaddss", IX86_BUILTIN_VFNMADDSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23153 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmaddv2df4, "__builtin_ia32_vfnmaddsd", IX86_BUILTIN_VFNMADDSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23154 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4sf4, "__builtin_ia32_vfnmaddps", IX86_BUILTIN_VFNMADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23155 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv2df4, "__builtin_ia32_vfnmaddpd", IX86_BUILTIN_VFNMADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23156 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv4sf4, "__builtin_ia32_vfnmsubss", IX86_BUILTIN_VFNMSUBSS, UNKNOWN, (int)MULTI_ARG_3_SF },
23157 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_vmfnmsubv2df4, "__builtin_ia32_vfnmsubsd", IX86_BUILTIN_VFNMSUBSD, UNKNOWN, (int)MULTI_ARG_3_DF },
23158 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4sf4, "__builtin_ia32_vfnmsubps", IX86_BUILTIN_VFNMSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23159 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv2df4, "__builtin_ia32_vfnmsubpd", IX86_BUILTIN_VFNMSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23161 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4sf4, "__builtin_ia32_vfmaddsubps", IX86_BUILTIN_VFMADDSUBPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23162 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv2df4, "__builtin_ia32_vfmaddsubpd", IX86_BUILTIN_VFMADDSUBPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23163 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4sf4, "__builtin_ia32_vfmsubaddps", IX86_BUILTIN_VFMSUBADDPS, UNKNOWN, (int)MULTI_ARG_3_SF },
23164 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv2df4, "__builtin_ia32_vfmsubaddpd", IX86_BUILTIN_VFMSUBADDPD, UNKNOWN, (int)MULTI_ARG_3_DF },
23166 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv8sf4256, "__builtin_ia32_vfmaddps256", IX86_BUILTIN_VFMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23167 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddv4df4256, "__builtin_ia32_vfmaddpd256", IX86_BUILTIN_VFMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23168 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv8sf4256, "__builtin_ia32_vfmsubps256", IX86_BUILTIN_VFMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23169 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubv4df4256, "__builtin_ia32_vfmsubpd256", IX86_BUILTIN_VFMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23171 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv8sf4256, "__builtin_ia32_vfnmaddps256", IX86_BUILTIN_VFNMADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23172 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmaddv4df4256, "__builtin_ia32_vfnmaddpd256", IX86_BUILTIN_VFNMADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23173 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv8sf4256, "__builtin_ia32_vfnmsubps256", IX86_BUILTIN_VFNMSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23174 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fnmsubv4df4256, "__builtin_ia32_vfnmsubpd256", IX86_BUILTIN_VFNMSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23176 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv8sf4, "__builtin_ia32_vfmaddsubps256", IX86_BUILTIN_VFMADDSUBPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23177 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmaddsubv4df4, "__builtin_ia32_vfmaddsubpd256", IX86_BUILTIN_VFMADDSUBPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23178 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv8sf4, "__builtin_ia32_vfmsubaddps256", IX86_BUILTIN_VFMSUBADDPS256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23179 { OPTION_MASK_ISA_FMA4, CODE_FOR_fma4i_fmsubaddv4df4, "__builtin_ia32_vfmsubaddpd256", IX86_BUILTIN_VFMSUBADDPD256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23181 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov", IX86_BUILTIN_VPCMOV, UNKNOWN, (int)MULTI_ARG_3_DI },
23182 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2di, "__builtin_ia32_vpcmov_v2di", IX86_BUILTIN_VPCMOV_V2DI, UNKNOWN, (int)MULTI_ARG_3_DI },
23183 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4si, "__builtin_ia32_vpcmov_v4si", IX86_BUILTIN_VPCMOV_V4SI, UNKNOWN, (int)MULTI_ARG_3_SI },
23184 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8hi, "__builtin_ia32_vpcmov_v8hi", IX86_BUILTIN_VPCMOV_V8HI, UNKNOWN, (int)MULTI_ARG_3_HI },
23185 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16qi, "__builtin_ia32_vpcmov_v16qi",IX86_BUILTIN_VPCMOV_V16QI,UNKNOWN, (int)MULTI_ARG_3_QI },
23186 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v2df, "__builtin_ia32_vpcmov_v2df", IX86_BUILTIN_VPCMOV_V2DF, UNKNOWN, (int)MULTI_ARG_3_DF },
23187 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4sf, "__builtin_ia32_vpcmov_v4sf", IX86_BUILTIN_VPCMOV_V4SF, UNKNOWN, (int)MULTI_ARG_3_SF },
23189 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov256", IX86_BUILTIN_VPCMOV256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23190 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4di256, "__builtin_ia32_vpcmov_v4di256", IX86_BUILTIN_VPCMOV_V4DI256, UNKNOWN, (int)MULTI_ARG_3_DI2 },
23191 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8si256, "__builtin_ia32_vpcmov_v8si256", IX86_BUILTIN_VPCMOV_V8SI256, UNKNOWN, (int)MULTI_ARG_3_SI2 },
23192 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v16hi256, "__builtin_ia32_vpcmov_v16hi256", IX86_BUILTIN_VPCMOV_V16HI256, UNKNOWN, (int)MULTI_ARG_3_HI2 },
23193 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v32qi256, "__builtin_ia32_vpcmov_v32qi256", IX86_BUILTIN_VPCMOV_V32QI256, UNKNOWN, (int)MULTI_ARG_3_QI2 },
23194 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v4df256, "__builtin_ia32_vpcmov_v4df256", IX86_BUILTIN_VPCMOV_V4DF256, UNKNOWN, (int)MULTI_ARG_3_DF2 },
23195 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcmov_v8sf256, "__builtin_ia32_vpcmov_v8sf256", IX86_BUILTIN_VPCMOV_V8SF256, UNKNOWN, (int)MULTI_ARG_3_SF2 },
23197 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pperm, "__builtin_ia32_vpperm", IX86_BUILTIN_VPPERM, UNKNOWN, (int)MULTI_ARG_3_QI },
23199 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssww, "__builtin_ia32_vpmacssww", IX86_BUILTIN_VPMACSSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23200 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsww, "__builtin_ia32_vpmacsww", IX86_BUILTIN_VPMACSWW, UNKNOWN, (int)MULTI_ARG_3_HI },
23201 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsswd, "__builtin_ia32_vpmacsswd", IX86_BUILTIN_VPMACSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23202 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacswd, "__builtin_ia32_vpmacswd", IX86_BUILTIN_VPMACSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23203 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdd, "__builtin_ia32_vpmacssdd", IX86_BUILTIN_VPMACSSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23204 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdd, "__builtin_ia32_vpmacsdd", IX86_BUILTIN_VPMACSDD, UNKNOWN, (int)MULTI_ARG_3_SI },
23205 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdql, "__builtin_ia32_vpmacssdql", IX86_BUILTIN_VPMACSSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23206 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacssdqh, "__builtin_ia32_vpmacssdqh", IX86_BUILTIN_VPMACSSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23207 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdql, "__builtin_ia32_vpmacsdql", IX86_BUILTIN_VPMACSDQL, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23208 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmacsdqh, "__builtin_ia32_vpmacsdqh", IX86_BUILTIN_VPMACSDQH, UNKNOWN, (int)MULTI_ARG_3_SI_DI },
23209 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcsswd, "__builtin_ia32_vpmadcsswd", IX86_BUILTIN_VPMADCSSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23210 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pmadcswd, "__builtin_ia32_vpmadcswd", IX86_BUILTIN_VPMADCSWD, UNKNOWN, (int)MULTI_ARG_3_HI_SI },
23212 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv2di3, "__builtin_ia32_vprotq", IX86_BUILTIN_VPROTQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23213 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv4si3, "__builtin_ia32_vprotd", IX86_BUILTIN_VPROTD, UNKNOWN, (int)MULTI_ARG_2_SI },
23214 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv8hi3, "__builtin_ia32_vprotw", IX86_BUILTIN_VPROTW, UNKNOWN, (int)MULTI_ARG_2_HI },
23215 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vrotlv16qi3, "__builtin_ia32_vprotb", IX86_BUILTIN_VPROTB, UNKNOWN, (int)MULTI_ARG_2_QI },
23216 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv2di3, "__builtin_ia32_vprotqi", IX86_BUILTIN_VPROTQ_IMM, UNKNOWN, (int)MULTI_ARG_2_DI_IMM },
23217 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv4si3, "__builtin_ia32_vprotdi", IX86_BUILTIN_VPROTD_IMM, UNKNOWN, (int)MULTI_ARG_2_SI_IMM },
23218 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv8hi3, "__builtin_ia32_vprotwi", IX86_BUILTIN_VPROTW_IMM, UNKNOWN, (int)MULTI_ARG_2_HI_IMM },
23219 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_rotlv16qi3, "__builtin_ia32_vprotbi", IX86_BUILTIN_VPROTB_IMM, UNKNOWN, (int)MULTI_ARG_2_QI_IMM },
23220 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv2di3, "__builtin_ia32_vpshaq", IX86_BUILTIN_VPSHAQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23221 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv4si3, "__builtin_ia32_vpshad", IX86_BUILTIN_VPSHAD, UNKNOWN, (int)MULTI_ARG_2_SI },
23222 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv8hi3, "__builtin_ia32_vpshaw", IX86_BUILTIN_VPSHAW, UNKNOWN, (int)MULTI_ARG_2_HI },
23223 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_ashlv16qi3, "__builtin_ia32_vpshab", IX86_BUILTIN_VPSHAB, UNKNOWN, (int)MULTI_ARG_2_QI },
23224 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv2di3, "__builtin_ia32_vpshlq", IX86_BUILTIN_VPSHLQ, UNKNOWN, (int)MULTI_ARG_2_DI },
23225 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv4si3, "__builtin_ia32_vpshld", IX86_BUILTIN_VPSHLD, UNKNOWN, (int)MULTI_ARG_2_SI },
23226 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv8hi3, "__builtin_ia32_vpshlw", IX86_BUILTIN_VPSHLW, UNKNOWN, (int)MULTI_ARG_2_HI },
23227 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_lshlv16qi3, "__builtin_ia32_vpshlb", IX86_BUILTIN_VPSHLB, UNKNOWN, (int)MULTI_ARG_2_QI },
23229 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv4sf2, "__builtin_ia32_vfrczss", IX86_BUILTIN_VFRCZSS, UNKNOWN, (int)MULTI_ARG_2_SF },
23230 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vmfrczv2df2, "__builtin_ia32_vfrczsd", IX86_BUILTIN_VFRCZSD, UNKNOWN, (int)MULTI_ARG_2_DF },
23231 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4sf2, "__builtin_ia32_vfrczps", IX86_BUILTIN_VFRCZPS, UNKNOWN, (int)MULTI_ARG_1_SF },
23232 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv2df2, "__builtin_ia32_vfrczpd", IX86_BUILTIN_VFRCZPD, UNKNOWN, (int)MULTI_ARG_1_DF },
23233 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv8sf2256, "__builtin_ia32_vfrczps256", IX86_BUILTIN_VFRCZPS256, UNKNOWN, (int)MULTI_ARG_1_SF2 },
23234 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_frczv4df2256, "__builtin_ia32_vfrczpd256", IX86_BUILTIN_VFRCZPD256, UNKNOWN, (int)MULTI_ARG_1_DF2 },
23236 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbw, "__builtin_ia32_vphaddbw", IX86_BUILTIN_VPHADDBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23237 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbd, "__builtin_ia32_vphaddbd", IX86_BUILTIN_VPHADDBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23238 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddbq, "__builtin_ia32_vphaddbq", IX86_BUILTIN_VPHADDBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23239 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwd, "__builtin_ia32_vphaddwd", IX86_BUILTIN_VPHADDWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23240 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddwq, "__builtin_ia32_vphaddwq", IX86_BUILTIN_VPHADDWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23241 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadddq, "__builtin_ia32_vphadddq", IX86_BUILTIN_VPHADDDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23242 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubw, "__builtin_ia32_vphaddubw", IX86_BUILTIN_VPHADDUBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23243 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubd, "__builtin_ia32_vphaddubd", IX86_BUILTIN_VPHADDUBD, UNKNOWN, (int)MULTI_ARG_1_QI_SI },
23244 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddubq, "__builtin_ia32_vphaddubq", IX86_BUILTIN_VPHADDUBQ, UNKNOWN, (int)MULTI_ARG_1_QI_DI },
23245 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwd, "__builtin_ia32_vphadduwd", IX86_BUILTIN_VPHADDUWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23246 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phadduwq, "__builtin_ia32_vphadduwq", IX86_BUILTIN_VPHADDUWQ, UNKNOWN, (int)MULTI_ARG_1_HI_DI },
23247 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phaddudq, "__builtin_ia32_vphaddudq", IX86_BUILTIN_VPHADDUDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23248 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubbw, "__builtin_ia32_vphsubbw", IX86_BUILTIN_VPHSUBBW, UNKNOWN, (int)MULTI_ARG_1_QI_HI },
23249 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubwd, "__builtin_ia32_vphsubwd", IX86_BUILTIN_VPHSUBWD, UNKNOWN, (int)MULTI_ARG_1_HI_SI },
23250 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_phsubdq, "__builtin_ia32_vphsubdq", IX86_BUILTIN_VPHSUBDQ, UNKNOWN, (int)MULTI_ARG_1_SI_DI },
23252 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomeqb", IX86_BUILTIN_VPCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
23253 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23254 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomneqb", IX86_BUILTIN_VPCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
23255 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomltb", IX86_BUILTIN_VPCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
23256 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomleb", IX86_BUILTIN_VPCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
23257 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgtb", IX86_BUILTIN_VPCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
23258 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv16qi3, "__builtin_ia32_vpcomgeb", IX86_BUILTIN_VPCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
23260 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomeqw", IX86_BUILTIN_VPCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
23261 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomnew", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23262 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomneqw", IX86_BUILTIN_VPCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
23263 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomltw", IX86_BUILTIN_VPCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
23264 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomlew", IX86_BUILTIN_VPCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
23265 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgtw", IX86_BUILTIN_VPCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
23266 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv8hi3, "__builtin_ia32_vpcomgew", IX86_BUILTIN_VPCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
23268 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomeqd", IX86_BUILTIN_VPCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
23269 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomned", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23270 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomneqd", IX86_BUILTIN_VPCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
23271 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomltd", IX86_BUILTIN_VPCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
23272 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomled", IX86_BUILTIN_VPCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
23273 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomgtd", IX86_BUILTIN_VPCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
23274 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv4si3, "__builtin_ia32_vpcomged", IX86_BUILTIN_VPCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
23276 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomeqq", IX86_BUILTIN_VPCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23277 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23278 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomneqq", IX86_BUILTIN_VPCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
23279 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomltq", IX86_BUILTIN_VPCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
23280 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomleq", IX86_BUILTIN_VPCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
23281 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgtq", IX86_BUILTIN_VPCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
23282 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmpv2di3, "__builtin_ia32_vpcomgeq", IX86_BUILTIN_VPCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
23284 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomequb", IX86_BUILTIN_VPCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
23285 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomneub", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23286 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v16qi3,"__builtin_ia32_vpcomnequb", IX86_BUILTIN_VPCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
23287 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomltub", IX86_BUILTIN_VPCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
23288 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomleub", IX86_BUILTIN_VPCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
23289 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgtub", IX86_BUILTIN_VPCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
23290 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv16qi3, "__builtin_ia32_vpcomgeub", IX86_BUILTIN_VPCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
23292 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomequw", IX86_BUILTIN_VPCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
23293 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomneuw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23294 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v8hi3, "__builtin_ia32_vpcomnequw", IX86_BUILTIN_VPCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
23295 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomltuw", IX86_BUILTIN_VPCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
23296 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomleuw", IX86_BUILTIN_VPCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
23297 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgtuw", IX86_BUILTIN_VPCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
23298 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv8hi3, "__builtin_ia32_vpcomgeuw", IX86_BUILTIN_VPCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
23300 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomequd", IX86_BUILTIN_VPCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
23301 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomneud", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23302 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v4si3, "__builtin_ia32_vpcomnequd", IX86_BUILTIN_VPCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
23303 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomltud", IX86_BUILTIN_VPCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
23304 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomleud", IX86_BUILTIN_VPCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
23305 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgtud", IX86_BUILTIN_VPCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
23306 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv4si3, "__builtin_ia32_vpcomgeud", IX86_BUILTIN_VPCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
23308 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomequq", IX86_BUILTIN_VPCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
23309 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomneuq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23310 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_uns2v2di3, "__builtin_ia32_vpcomnequq", IX86_BUILTIN_VPCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
23311 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomltuq", IX86_BUILTIN_VPCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
23312 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomleuq", IX86_BUILTIN_VPCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
23313 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgtuq", IX86_BUILTIN_VPCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
23314 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_maskcmp_unsv2di3, "__builtin_ia32_vpcomgeuq", IX86_BUILTIN_VPCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
23316 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseb", IX86_BUILTIN_VPCOMFALSEB, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23317 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalsew", IX86_BUILTIN_VPCOMFALSEW, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23318 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalsed", IX86_BUILTIN_VPCOMFALSED, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23319 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseq", IX86_BUILTIN_VPCOMFALSEQ, (enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23320 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomfalseub",IX86_BUILTIN_VPCOMFALSEUB,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
23321 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomfalseuw",IX86_BUILTIN_VPCOMFALSEUW,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
23322 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomfalseud",IX86_BUILTIN_VPCOMFALSEUD,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
23323 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomfalseuq",IX86_BUILTIN_VPCOMFALSEUQ,(enum rtx_code) PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
23325 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueb", IX86_BUILTIN_VPCOMTRUEB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23326 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtruew", IX86_BUILTIN_VPCOMTRUEW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23327 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrued", IX86_BUILTIN_VPCOMTRUED, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23328 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueq", IX86_BUILTIN_VPCOMTRUEQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23329 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
23330 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
23331 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
23332 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
23334 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I },
23335 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I },
23336 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 },
23337 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 },
23341 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
23342 in the current target ISA to allow the user to compile particular modules
23343 with different target specific options that differ from the command line
23346 ix86_init_mmx_sse_builtins (void)
23348 const struct builtin_description * d;
23349 enum ix86_builtin_func_type ftype;
23352 /* Add all special builtins with variable number of operands. */
23353 for (i = 0, d = bdesc_special_args;
23354 i < ARRAY_SIZE (bdesc_special_args);
23360 ftype = (enum ix86_builtin_func_type) d->flag;
23361 def_builtin (d->mask, d->name, ftype, d->code);
23364 /* Add all builtins with variable number of operands. */
23365 for (i = 0, d = bdesc_args;
23366 i < ARRAY_SIZE (bdesc_args);
23372 ftype = (enum ix86_builtin_func_type) d->flag;
23373 def_builtin_const (d->mask, d->name, ftype, d->code);
23376 /* pcmpestr[im] insns. */
23377 for (i = 0, d = bdesc_pcmpestr;
23378 i < ARRAY_SIZE (bdesc_pcmpestr);
23381 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23382 ftype = V16QI_FTYPE_V16QI_INT_V16QI_INT_INT;
23384 ftype = INT_FTYPE_V16QI_INT_V16QI_INT_INT;
23385 def_builtin_const (d->mask, d->name, ftype, d->code);
23388 /* pcmpistr[im] insns. */
23389 for (i = 0, d = bdesc_pcmpistr;
23390 i < ARRAY_SIZE (bdesc_pcmpistr);
23393 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23394 ftype = V16QI_FTYPE_V16QI_V16QI_INT;
23396 ftype = INT_FTYPE_V16QI_V16QI_INT;
23397 def_builtin_const (d->mask, d->name, ftype, d->code);
23400 /* comi/ucomi insns. */
23401 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23403 if (d->mask == OPTION_MASK_ISA_SSE2)
23404 ftype = INT_FTYPE_V2DF_V2DF;
23406 ftype = INT_FTYPE_V4SF_V4SF;
23407 def_builtin_const (d->mask, d->name, ftype, d->code);
23411 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr",
23412 VOID_FTYPE_UNSIGNED, IX86_BUILTIN_LDMXCSR);
23413 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr",
23414 UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
23416 /* SSE or 3DNow!A */
23417 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23418 "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
23419 IX86_BUILTIN_MASKMOVQ);
23422 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu",
23423 VOID_FTYPE_V16QI_V16QI_PCHAR, IX86_BUILTIN_MASKMOVDQU);
23425 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush",
23426 VOID_FTYPE_PCVOID, IX86_BUILTIN_CLFLUSH);
23427 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence",
23428 VOID_FTYPE_VOID, IX86_BUILTIN_MFENCE);
23431 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor",
23432 VOID_FTYPE_PCVOID_UNSIGNED_UNSIGNED, IX86_BUILTIN_MONITOR);
23433 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait",
23434 VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
23437 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128",
23438 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
23439 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128",
23440 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
23441 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128",
23442 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
23443 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128",
23444 V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
23445 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128",
23446 V2DI_FTYPE_V2DI, IX86_BUILTIN_AESIMC128);
23447 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128",
23448 V2DI_FTYPE_V2DI_INT, IX86_BUILTIN_AESKEYGENASSIST128);
23451 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128",
23452 V2DI_FTYPE_V2DI_V2DI_INT, IX86_BUILTIN_PCLMULQDQ128);
23454 /* MMX access to the vec_init patterns. */
23455 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si",
23456 V2SI_FTYPE_INT_INT, IX86_BUILTIN_VEC_INIT_V2SI);
23458 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi",
23459 V4HI_FTYPE_HI_HI_HI_HI,
23460 IX86_BUILTIN_VEC_INIT_V4HI);
23462 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi",
23463 V8QI_FTYPE_QI_QI_QI_QI_QI_QI_QI_QI,
23464 IX86_BUILTIN_VEC_INIT_V8QI);
23466 /* Access to the vec_extract patterns. */
23467 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df",
23468 DOUBLE_FTYPE_V2DF_INT, IX86_BUILTIN_VEC_EXT_V2DF);
23469 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di",
23470 DI_FTYPE_V2DI_INT, IX86_BUILTIN_VEC_EXT_V2DI);
23471 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf",
23472 FLOAT_FTYPE_V4SF_INT, IX86_BUILTIN_VEC_EXT_V4SF);
23473 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si",
23474 SI_FTYPE_V4SI_INT, IX86_BUILTIN_VEC_EXT_V4SI);
23475 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
23476 HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
23478 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23479 "__builtin_ia32_vec_ext_v4hi",
23480 HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
23482 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si",
23483 SI_FTYPE_V2SI_INT, IX86_BUILTIN_VEC_EXT_V2SI);
23485 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi",
23486 QI_FTYPE_V16QI_INT, IX86_BUILTIN_VEC_EXT_V16QI);
23488 /* Access to the vec_set patterns. */
23489 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT,
23490 "__builtin_ia32_vec_set_v2di",
23491 V2DI_FTYPE_V2DI_DI_INT, IX86_BUILTIN_VEC_SET_V2DI);
23493 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf",
23494 V4SF_FTYPE_V4SF_FLOAT_INT, IX86_BUILTIN_VEC_SET_V4SF);
23496 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si",
23497 V4SI_FTYPE_V4SI_SI_INT, IX86_BUILTIN_VEC_SET_V4SI);
23499 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
23500 V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
23502 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
23503 "__builtin_ia32_vec_set_v4hi",
23504 V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
23506 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi",
23507 V16QI_FTYPE_V16QI_QI_INT, IX86_BUILTIN_VEC_SET_V16QI);
23509 /* Add FMA4 multi-arg argument instructions */
23510 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23515 ftype = (enum ix86_builtin_func_type) d->flag;
23516 def_builtin_const (d->mask, d->name, ftype, d->code);
23520 /* Internal method for ix86_init_builtins. */
23523 ix86_init_builtins_va_builtins_abi (void)
23525 tree ms_va_ref, sysv_va_ref;
23526 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23527 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23528 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23529 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23533 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23534 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23535 ms_va_ref = build_reference_type (ms_va_list_type_node);
23537 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23540 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23541 fnvoid_va_start_ms =
23542 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23543 fnvoid_va_end_sysv =
23544 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23545 fnvoid_va_start_sysv =
23546 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23548 fnvoid_va_copy_ms =
23549 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23551 fnvoid_va_copy_sysv =
23552 build_function_type_list (void_type_node, sysv_va_ref,
23553 sysv_va_ref, NULL_TREE);
23555 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23556 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23557 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23558 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23559 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23560 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23561 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23562 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23563 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23564 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23565 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23566 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23570 ix86_init_builtin_types (void)
23572 tree float128_type_node, float80_type_node;
23574 /* The __float80 type. */
23575 float80_type_node = long_double_type_node;
23576 if (TYPE_MODE (float80_type_node) != XFmode)
23578 /* The __float80 type. */
23579 float80_type_node = make_node (REAL_TYPE);
23581 TYPE_PRECISION (float80_type_node) = 80;
23582 layout_type (float80_type_node);
23584 lang_hooks.types.register_builtin_type (float80_type_node, "__float80");
23586 /* The __float128 type. */
23587 float128_type_node = make_node (REAL_TYPE);
23588 TYPE_PRECISION (float128_type_node) = 128;
23589 layout_type (float128_type_node);
23590 lang_hooks.types.register_builtin_type (float128_type_node, "__float128");
23592 /* This macro is built by i386-builtin-types.awk. */
23593 DEFINE_BUILTIN_PRIMITIVE_TYPES;
23597 ix86_init_builtins (void)
23601 ix86_init_builtin_types ();
23603 /* TFmode support builtins. */
23604 def_builtin_const (0, "__builtin_infq",
23605 FLOAT128_FTYPE_VOID, IX86_BUILTIN_INFQ);
23606 def_builtin_const (0, "__builtin_huge_valq",
23607 FLOAT128_FTYPE_VOID, IX86_BUILTIN_HUGE_VALQ);
23609 /* We will expand them to normal call if SSE2 isn't available since
23610 they are used by libgcc. */
23611 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128);
23612 t = add_builtin_function ("__builtin_fabsq", t, IX86_BUILTIN_FABSQ,
23613 BUILT_IN_MD, "__fabstf2", NULL_TREE);
23614 TREE_READONLY (t) = 1;
23615 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = t;
23617 t = ix86_get_builtin_func_type (FLOAT128_FTYPE_FLOAT128_FLOAT128);
23618 t = add_builtin_function ("__builtin_copysignq", t, IX86_BUILTIN_COPYSIGNQ,
23619 BUILT_IN_MD, "__copysigntf3", NULL_TREE);
23620 TREE_READONLY (t) = 1;
23621 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = t;
23623 ix86_init_mmx_sse_builtins ();
23626 ix86_init_builtins_va_builtins_abi ();
23629 /* Return the ix86 builtin for CODE. */
23632 ix86_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
23634 if (code >= IX86_BUILTIN_MAX)
23635 return error_mark_node;
23637 return ix86_builtins[code];
23640 /* Errors in the source file can cause expand_expr to return const0_rtx
23641 where we expect a vector. To avoid crashing, use one of the vector
23642 clear instructions. */
23644 safe_vector_operand (rtx x, enum machine_mode mode)
23646 if (x == const0_rtx)
23647 x = CONST0_RTX (mode);
23651 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23654 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23657 tree arg0 = CALL_EXPR_ARG (exp, 0);
23658 tree arg1 = CALL_EXPR_ARG (exp, 1);
23659 rtx op0 = expand_normal (arg0);
23660 rtx op1 = expand_normal (arg1);
23661 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23662 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23663 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23665 if (VECTOR_MODE_P (mode0))
23666 op0 = safe_vector_operand (op0, mode0);
23667 if (VECTOR_MODE_P (mode1))
23668 op1 = safe_vector_operand (op1, mode1);
23670 if (optimize || !target
23671 || GET_MODE (target) != tmode
23672 || !insn_data[icode].operand[0].predicate (target, tmode))
23673 target = gen_reg_rtx (tmode);
23675 if (GET_MODE (op1) == SImode && mode1 == TImode)
23677 rtx x = gen_reg_rtx (V4SImode);
23678 emit_insn (gen_sse2_loadd (x, op1));
23679 op1 = gen_lowpart (TImode, x);
23682 if (!insn_data[icode].operand[1].predicate (op0, mode0))
23683 op0 = copy_to_mode_reg (mode0, op0);
23684 if (!insn_data[icode].operand[2].predicate (op1, mode1))
23685 op1 = copy_to_mode_reg (mode1, op1);
23687 pat = GEN_FCN (icode) (target, op0, op1);
23696 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23699 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23700 enum ix86_builtin_func_type m_type,
23701 enum rtx_code sub_code)
23706 bool comparison_p = false;
23708 bool last_arg_constant = false;
23709 int num_memory = 0;
23712 enum machine_mode mode;
23715 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23719 case MULTI_ARG_4_DF2_DI_I:
23720 case MULTI_ARG_4_DF2_DI_I1:
23721 case MULTI_ARG_4_SF2_SI_I:
23722 case MULTI_ARG_4_SF2_SI_I1:
23724 last_arg_constant = true;
23727 case MULTI_ARG_3_SF:
23728 case MULTI_ARG_3_DF:
23729 case MULTI_ARG_3_SF2:
23730 case MULTI_ARG_3_DF2:
23731 case MULTI_ARG_3_DI:
23732 case MULTI_ARG_3_SI:
23733 case MULTI_ARG_3_SI_DI:
23734 case MULTI_ARG_3_HI:
23735 case MULTI_ARG_3_HI_SI:
23736 case MULTI_ARG_3_QI:
23737 case MULTI_ARG_3_DI2:
23738 case MULTI_ARG_3_SI2:
23739 case MULTI_ARG_3_HI2:
23740 case MULTI_ARG_3_QI2:
23744 case MULTI_ARG_2_SF:
23745 case MULTI_ARG_2_DF:
23746 case MULTI_ARG_2_DI:
23747 case MULTI_ARG_2_SI:
23748 case MULTI_ARG_2_HI:
23749 case MULTI_ARG_2_QI:
23753 case MULTI_ARG_2_DI_IMM:
23754 case MULTI_ARG_2_SI_IMM:
23755 case MULTI_ARG_2_HI_IMM:
23756 case MULTI_ARG_2_QI_IMM:
23758 last_arg_constant = true;
23761 case MULTI_ARG_1_SF:
23762 case MULTI_ARG_1_DF:
23763 case MULTI_ARG_1_SF2:
23764 case MULTI_ARG_1_DF2:
23765 case MULTI_ARG_1_DI:
23766 case MULTI_ARG_1_SI:
23767 case MULTI_ARG_1_HI:
23768 case MULTI_ARG_1_QI:
23769 case MULTI_ARG_1_SI_DI:
23770 case MULTI_ARG_1_HI_DI:
23771 case MULTI_ARG_1_HI_SI:
23772 case MULTI_ARG_1_QI_DI:
23773 case MULTI_ARG_1_QI_SI:
23774 case MULTI_ARG_1_QI_HI:
23778 case MULTI_ARG_2_DI_CMP:
23779 case MULTI_ARG_2_SI_CMP:
23780 case MULTI_ARG_2_HI_CMP:
23781 case MULTI_ARG_2_QI_CMP:
23783 comparison_p = true;
23786 case MULTI_ARG_2_SF_TF:
23787 case MULTI_ARG_2_DF_TF:
23788 case MULTI_ARG_2_DI_TF:
23789 case MULTI_ARG_2_SI_TF:
23790 case MULTI_ARG_2_HI_TF:
23791 case MULTI_ARG_2_QI_TF:
23797 gcc_unreachable ();
23800 if (optimize || !target
23801 || GET_MODE (target) != tmode
23802 || !insn_data[icode].operand[0].predicate (target, tmode))
23803 target = gen_reg_rtx (tmode);
23805 gcc_assert (nargs <= 4);
23807 for (i = 0; i < nargs; i++)
23809 tree arg = CALL_EXPR_ARG (exp, i);
23810 rtx op = expand_normal (arg);
23811 int adjust = (comparison_p) ? 1 : 0;
23812 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23814 if (last_arg_constant && i == nargs-1)
23816 if (!CONST_INT_P (op))
23818 error ("last argument must be an immediate");
23819 return gen_reg_rtx (tmode);
23824 if (VECTOR_MODE_P (mode))
23825 op = safe_vector_operand (op, mode);
23827 /* If we aren't optimizing, only allow one memory operand to be
23829 if (memory_operand (op, mode))
23832 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23835 || !insn_data[icode].operand[i+adjust+1].predicate (op, mode)
23837 op = force_reg (mode, op);
23841 args[i].mode = mode;
23847 pat = GEN_FCN (icode) (target, args[0].op);
23852 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23853 GEN_INT ((int)sub_code));
23854 else if (! comparison_p)
23855 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23858 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23862 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23867 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23871 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op);
23875 gcc_unreachable ();
23885 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23886 insns with vec_merge. */
23889 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23893 tree arg0 = CALL_EXPR_ARG (exp, 0);
23894 rtx op1, op0 = expand_normal (arg0);
23895 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23896 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23898 if (optimize || !target
23899 || GET_MODE (target) != tmode
23900 || !insn_data[icode].operand[0].predicate (target, tmode))
23901 target = gen_reg_rtx (tmode);
23903 if (VECTOR_MODE_P (mode0))
23904 op0 = safe_vector_operand (op0, mode0);
23906 if ((optimize && !register_operand (op0, mode0))
23907 || !insn_data[icode].operand[1].predicate (op0, mode0))
23908 op0 = copy_to_mode_reg (mode0, op0);
23911 if (!insn_data[icode].operand[2].predicate (op1, mode0))
23912 op1 = copy_to_mode_reg (mode0, op1);
23914 pat = GEN_FCN (icode) (target, op0, op1);
23921 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23924 ix86_expand_sse_compare (const struct builtin_description *d,
23925 tree exp, rtx target, bool swap)
23928 tree arg0 = CALL_EXPR_ARG (exp, 0);
23929 tree arg1 = CALL_EXPR_ARG (exp, 1);
23930 rtx op0 = expand_normal (arg0);
23931 rtx op1 = expand_normal (arg1);
23933 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23934 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23935 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23936 enum rtx_code comparison = d->comparison;
23938 if (VECTOR_MODE_P (mode0))
23939 op0 = safe_vector_operand (op0, mode0);
23940 if (VECTOR_MODE_P (mode1))
23941 op1 = safe_vector_operand (op1, mode1);
23943 /* Swap operands if we have a comparison that isn't available in
23947 rtx tmp = gen_reg_rtx (mode1);
23948 emit_move_insn (tmp, op1);
23953 if (optimize || !target
23954 || GET_MODE (target) != tmode
23955 || !insn_data[d->icode].operand[0].predicate (target, tmode))
23956 target = gen_reg_rtx (tmode);
23958 if ((optimize && !register_operand (op0, mode0))
23959 || !insn_data[d->icode].operand[1].predicate (op0, mode0))
23960 op0 = copy_to_mode_reg (mode0, op0);
23961 if ((optimize && !register_operand (op1, mode1))
23962 || !insn_data[d->icode].operand[2].predicate (op1, mode1))
23963 op1 = copy_to_mode_reg (mode1, op1);
23965 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23966 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23973 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23976 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23980 tree arg0 = CALL_EXPR_ARG (exp, 0);
23981 tree arg1 = CALL_EXPR_ARG (exp, 1);
23982 rtx op0 = expand_normal (arg0);
23983 rtx op1 = expand_normal (arg1);
23984 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23985 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23986 enum rtx_code comparison = d->comparison;
23988 if (VECTOR_MODE_P (mode0))
23989 op0 = safe_vector_operand (op0, mode0);
23990 if (VECTOR_MODE_P (mode1))
23991 op1 = safe_vector_operand (op1, mode1);
23993 /* Swap operands if we have a comparison that isn't available in
23995 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24002 target = gen_reg_rtx (SImode);
24003 emit_move_insn (target, const0_rtx);
24004 target = gen_rtx_SUBREG (QImode, target, 0);
24006 if ((optimize && !register_operand (op0, mode0))
24007 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24008 op0 = copy_to_mode_reg (mode0, op0);
24009 if ((optimize && !register_operand (op1, mode1))
24010 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24011 op1 = copy_to_mode_reg (mode1, op1);
24013 pat = GEN_FCN (d->icode) (op0, op1);
24017 emit_insn (gen_rtx_SET (VOIDmode,
24018 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24019 gen_rtx_fmt_ee (comparison, QImode,
24023 return SUBREG_REG (target);
24026 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24029 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24033 tree arg0 = CALL_EXPR_ARG (exp, 0);
24034 tree arg1 = CALL_EXPR_ARG (exp, 1);
24035 rtx op0 = expand_normal (arg0);
24036 rtx op1 = expand_normal (arg1);
24037 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24038 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24039 enum rtx_code comparison = d->comparison;
24041 if (VECTOR_MODE_P (mode0))
24042 op0 = safe_vector_operand (op0, mode0);
24043 if (VECTOR_MODE_P (mode1))
24044 op1 = safe_vector_operand (op1, mode1);
24046 target = gen_reg_rtx (SImode);
24047 emit_move_insn (target, const0_rtx);
24048 target = gen_rtx_SUBREG (QImode, target, 0);
24050 if ((optimize && !register_operand (op0, mode0))
24051 || !insn_data[d->icode].operand[0].predicate (op0, mode0))
24052 op0 = copy_to_mode_reg (mode0, op0);
24053 if ((optimize && !register_operand (op1, mode1))
24054 || !insn_data[d->icode].operand[1].predicate (op1, mode1))
24055 op1 = copy_to_mode_reg (mode1, op1);
24057 pat = GEN_FCN (d->icode) (op0, op1);
24061 emit_insn (gen_rtx_SET (VOIDmode,
24062 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24063 gen_rtx_fmt_ee (comparison, QImode,
24067 return SUBREG_REG (target);
24070 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24073 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24074 tree exp, rtx target)
24077 tree arg0 = CALL_EXPR_ARG (exp, 0);
24078 tree arg1 = CALL_EXPR_ARG (exp, 1);
24079 tree arg2 = CALL_EXPR_ARG (exp, 2);
24080 tree arg3 = CALL_EXPR_ARG (exp, 3);
24081 tree arg4 = CALL_EXPR_ARG (exp, 4);
24082 rtx scratch0, scratch1;
24083 rtx op0 = expand_normal (arg0);
24084 rtx op1 = expand_normal (arg1);
24085 rtx op2 = expand_normal (arg2);
24086 rtx op3 = expand_normal (arg3);
24087 rtx op4 = expand_normal (arg4);
24088 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24090 tmode0 = insn_data[d->icode].operand[0].mode;
24091 tmode1 = insn_data[d->icode].operand[1].mode;
24092 modev2 = insn_data[d->icode].operand[2].mode;
24093 modei3 = insn_data[d->icode].operand[3].mode;
24094 modev4 = insn_data[d->icode].operand[4].mode;
24095 modei5 = insn_data[d->icode].operand[5].mode;
24096 modeimm = insn_data[d->icode].operand[6].mode;
24098 if (VECTOR_MODE_P (modev2))
24099 op0 = safe_vector_operand (op0, modev2);
24100 if (VECTOR_MODE_P (modev4))
24101 op2 = safe_vector_operand (op2, modev4);
24103 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24104 op0 = copy_to_mode_reg (modev2, op0);
24105 if (!insn_data[d->icode].operand[3].predicate (op1, modei3))
24106 op1 = copy_to_mode_reg (modei3, op1);
24107 if ((optimize && !register_operand (op2, modev4))
24108 || !insn_data[d->icode].operand[4].predicate (op2, modev4))
24109 op2 = copy_to_mode_reg (modev4, op2);
24110 if (!insn_data[d->icode].operand[5].predicate (op3, modei5))
24111 op3 = copy_to_mode_reg (modei5, op3);
24113 if (!insn_data[d->icode].operand[6].predicate (op4, modeimm))
24115 error ("the fifth argument must be a 8-bit immediate");
24119 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24121 if (optimize || !target
24122 || GET_MODE (target) != tmode0
24123 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24124 target = gen_reg_rtx (tmode0);
24126 scratch1 = gen_reg_rtx (tmode1);
24128 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24130 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24132 if (optimize || !target
24133 || GET_MODE (target) != tmode1
24134 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24135 target = gen_reg_rtx (tmode1);
24137 scratch0 = gen_reg_rtx (tmode0);
24139 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24143 gcc_assert (d->flag);
24145 scratch0 = gen_reg_rtx (tmode0);
24146 scratch1 = gen_reg_rtx (tmode1);
24148 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24158 target = gen_reg_rtx (SImode);
24159 emit_move_insn (target, const0_rtx);
24160 target = gen_rtx_SUBREG (QImode, target, 0);
24163 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24164 gen_rtx_fmt_ee (EQ, QImode,
24165 gen_rtx_REG ((enum machine_mode) d->flag,
24168 return SUBREG_REG (target);
24175 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24178 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24179 tree exp, rtx target)
24182 tree arg0 = CALL_EXPR_ARG (exp, 0);
24183 tree arg1 = CALL_EXPR_ARG (exp, 1);
24184 tree arg2 = CALL_EXPR_ARG (exp, 2);
24185 rtx scratch0, scratch1;
24186 rtx op0 = expand_normal (arg0);
24187 rtx op1 = expand_normal (arg1);
24188 rtx op2 = expand_normal (arg2);
24189 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24191 tmode0 = insn_data[d->icode].operand[0].mode;
24192 tmode1 = insn_data[d->icode].operand[1].mode;
24193 modev2 = insn_data[d->icode].operand[2].mode;
24194 modev3 = insn_data[d->icode].operand[3].mode;
24195 modeimm = insn_data[d->icode].operand[4].mode;
24197 if (VECTOR_MODE_P (modev2))
24198 op0 = safe_vector_operand (op0, modev2);
24199 if (VECTOR_MODE_P (modev3))
24200 op1 = safe_vector_operand (op1, modev3);
24202 if (!insn_data[d->icode].operand[2].predicate (op0, modev2))
24203 op0 = copy_to_mode_reg (modev2, op0);
24204 if ((optimize && !register_operand (op1, modev3))
24205 || !insn_data[d->icode].operand[3].predicate (op1, modev3))
24206 op1 = copy_to_mode_reg (modev3, op1);
24208 if (!insn_data[d->icode].operand[4].predicate (op2, modeimm))
24210 error ("the third argument must be a 8-bit immediate");
24214 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24216 if (optimize || !target
24217 || GET_MODE (target) != tmode0
24218 || !insn_data[d->icode].operand[0].predicate (target, tmode0))
24219 target = gen_reg_rtx (tmode0);
24221 scratch1 = gen_reg_rtx (tmode1);
24223 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24225 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24227 if (optimize || !target
24228 || GET_MODE (target) != tmode1
24229 || !insn_data[d->icode].operand[1].predicate (target, tmode1))
24230 target = gen_reg_rtx (tmode1);
24232 scratch0 = gen_reg_rtx (tmode0);
24234 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24238 gcc_assert (d->flag);
24240 scratch0 = gen_reg_rtx (tmode0);
24241 scratch1 = gen_reg_rtx (tmode1);
24243 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24253 target = gen_reg_rtx (SImode);
24254 emit_move_insn (target, const0_rtx);
24255 target = gen_rtx_SUBREG (QImode, target, 0);
24258 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24259 gen_rtx_fmt_ee (EQ, QImode,
24260 gen_rtx_REG ((enum machine_mode) d->flag,
24263 return SUBREG_REG (target);
24269 /* Subroutine of ix86_expand_builtin to take care of insns with
24270 variable number of operands. */
24273 ix86_expand_args_builtin (const struct builtin_description *d,
24274 tree exp, rtx target)
24276 rtx pat, real_target;
24277 unsigned int i, nargs;
24278 unsigned int nargs_constant = 0;
24279 int num_memory = 0;
24283 enum machine_mode mode;
24285 bool last_arg_count = false;
24286 enum insn_code icode = d->icode;
24287 const struct insn_data_d *insn_p = &insn_data[icode];
24288 enum machine_mode tmode = insn_p->operand[0].mode;
24289 enum machine_mode rmode = VOIDmode;
24291 enum rtx_code comparison = d->comparison;
24293 switch ((enum ix86_builtin_func_type) d->flag)
24295 case INT_FTYPE_V8SF_V8SF_PTEST:
24296 case INT_FTYPE_V4DI_V4DI_PTEST:
24297 case INT_FTYPE_V4DF_V4DF_PTEST:
24298 case INT_FTYPE_V4SF_V4SF_PTEST:
24299 case INT_FTYPE_V2DI_V2DI_PTEST:
24300 case INT_FTYPE_V2DF_V2DF_PTEST:
24301 return ix86_expand_sse_ptest (d, exp, target);
24302 case FLOAT128_FTYPE_FLOAT128:
24303 case FLOAT_FTYPE_FLOAT:
24304 case INT_FTYPE_INT:
24305 case UINT64_FTYPE_INT:
24306 case UINT16_FTYPE_UINT16:
24307 case INT64_FTYPE_INT64:
24308 case INT64_FTYPE_V4SF:
24309 case INT64_FTYPE_V2DF:
24310 case INT_FTYPE_V16QI:
24311 case INT_FTYPE_V8QI:
24312 case INT_FTYPE_V8SF:
24313 case INT_FTYPE_V4DF:
24314 case INT_FTYPE_V4SF:
24315 case INT_FTYPE_V2DF:
24316 case V16QI_FTYPE_V16QI:
24317 case V8SI_FTYPE_V8SF:
24318 case V8SI_FTYPE_V4SI:
24319 case V8HI_FTYPE_V8HI:
24320 case V8HI_FTYPE_V16QI:
24321 case V8QI_FTYPE_V8QI:
24322 case V8SF_FTYPE_V8SF:
24323 case V8SF_FTYPE_V8SI:
24324 case V8SF_FTYPE_V4SF:
24325 case V8SF_FTYPE_V8HI:
24326 case V4SI_FTYPE_V4SI:
24327 case V4SI_FTYPE_V16QI:
24328 case V4SI_FTYPE_V4SF:
24329 case V4SI_FTYPE_V8SI:
24330 case V4SI_FTYPE_V8HI:
24331 case V4SI_FTYPE_V4DF:
24332 case V4SI_FTYPE_V2DF:
24333 case V4HI_FTYPE_V4HI:
24334 case V4DF_FTYPE_V4DF:
24335 case V4DF_FTYPE_V4SI:
24336 case V4DF_FTYPE_V4SF:
24337 case V4DF_FTYPE_V2DF:
24338 case V4SF_FTYPE_V4SF:
24339 case V4SF_FTYPE_V4SI:
24340 case V4SF_FTYPE_V8SF:
24341 case V4SF_FTYPE_V4DF:
24342 case V4SF_FTYPE_V8HI:
24343 case V4SF_FTYPE_V2DF:
24344 case V2DI_FTYPE_V2DI:
24345 case V2DI_FTYPE_V16QI:
24346 case V2DI_FTYPE_V8HI:
24347 case V2DI_FTYPE_V4SI:
24348 case V2DF_FTYPE_V2DF:
24349 case V2DF_FTYPE_V4SI:
24350 case V2DF_FTYPE_V4DF:
24351 case V2DF_FTYPE_V4SF:
24352 case V2DF_FTYPE_V2SI:
24353 case V2SI_FTYPE_V2SI:
24354 case V2SI_FTYPE_V4SF:
24355 case V2SI_FTYPE_V2SF:
24356 case V2SI_FTYPE_V2DF:
24357 case V2SF_FTYPE_V2SF:
24358 case V2SF_FTYPE_V2SI:
24361 case V4SF_FTYPE_V4SF_VEC_MERGE:
24362 case V2DF_FTYPE_V2DF_VEC_MERGE:
24363 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24364 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24365 case V16QI_FTYPE_V16QI_V16QI:
24366 case V16QI_FTYPE_V8HI_V8HI:
24367 case V8QI_FTYPE_V8QI_V8QI:
24368 case V8QI_FTYPE_V4HI_V4HI:
24369 case V8HI_FTYPE_V8HI_V8HI:
24370 case V8HI_FTYPE_V16QI_V16QI:
24371 case V8HI_FTYPE_V4SI_V4SI:
24372 case V8SF_FTYPE_V8SF_V8SF:
24373 case V8SF_FTYPE_V8SF_V8SI:
24374 case V4SI_FTYPE_V4SI_V4SI:
24375 case V4SI_FTYPE_V8HI_V8HI:
24376 case V4SI_FTYPE_V4SF_V4SF:
24377 case V4SI_FTYPE_V2DF_V2DF:
24378 case V4HI_FTYPE_V4HI_V4HI:
24379 case V4HI_FTYPE_V8QI_V8QI:
24380 case V4HI_FTYPE_V2SI_V2SI:
24381 case V4DF_FTYPE_V4DF_V4DF:
24382 case V4DF_FTYPE_V4DF_V4DI:
24383 case V4SF_FTYPE_V4SF_V4SF:
24384 case V4SF_FTYPE_V4SF_V4SI:
24385 case V4SF_FTYPE_V4SF_V2SI:
24386 case V4SF_FTYPE_V4SF_V2DF:
24387 case V4SF_FTYPE_V4SF_DI:
24388 case V4SF_FTYPE_V4SF_SI:
24389 case V2DI_FTYPE_V2DI_V2DI:
24390 case V2DI_FTYPE_V16QI_V16QI:
24391 case V2DI_FTYPE_V4SI_V4SI:
24392 case V2DI_FTYPE_V2DI_V16QI:
24393 case V2DI_FTYPE_V2DF_V2DF:
24394 case V2SI_FTYPE_V2SI_V2SI:
24395 case V2SI_FTYPE_V4HI_V4HI:
24396 case V2SI_FTYPE_V2SF_V2SF:
24397 case V2DF_FTYPE_V2DF_V2DF:
24398 case V2DF_FTYPE_V2DF_V4SF:
24399 case V2DF_FTYPE_V2DF_V2DI:
24400 case V2DF_FTYPE_V2DF_DI:
24401 case V2DF_FTYPE_V2DF_SI:
24402 case V2SF_FTYPE_V2SF_V2SF:
24403 case V1DI_FTYPE_V1DI_V1DI:
24404 case V1DI_FTYPE_V8QI_V8QI:
24405 case V1DI_FTYPE_V2SI_V2SI:
24406 if (comparison == UNKNOWN)
24407 return ix86_expand_binop_builtin (icode, exp, target);
24410 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24411 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24412 gcc_assert (comparison != UNKNOWN);
24416 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24417 case V8HI_FTYPE_V8HI_SI_COUNT:
24418 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24419 case V4SI_FTYPE_V4SI_SI_COUNT:
24420 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24421 case V4HI_FTYPE_V4HI_SI_COUNT:
24422 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24423 case V2DI_FTYPE_V2DI_SI_COUNT:
24424 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24425 case V2SI_FTYPE_V2SI_SI_COUNT:
24426 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24427 case V1DI_FTYPE_V1DI_SI_COUNT:
24429 last_arg_count = true;
24431 case UINT64_FTYPE_UINT64_UINT64:
24432 case UINT_FTYPE_UINT_UINT:
24433 case UINT_FTYPE_UINT_USHORT:
24434 case UINT_FTYPE_UINT_UCHAR:
24435 case UINT16_FTYPE_UINT16_INT:
24436 case UINT8_FTYPE_UINT8_INT:
24439 case V2DI_FTYPE_V2DI_INT_CONVERT:
24442 nargs_constant = 1;
24444 case V8HI_FTYPE_V8HI_INT:
24445 case V8HI_FTYPE_V8SF_INT:
24446 case V8HI_FTYPE_V4SF_INT:
24447 case V8SF_FTYPE_V8SF_INT:
24448 case V4SI_FTYPE_V4SI_INT:
24449 case V4SI_FTYPE_V8SI_INT:
24450 case V4HI_FTYPE_V4HI_INT:
24451 case V4DF_FTYPE_V4DF_INT:
24452 case V4SF_FTYPE_V4SF_INT:
24453 case V4SF_FTYPE_V8SF_INT:
24454 case V2DI_FTYPE_V2DI_INT:
24455 case V2DF_FTYPE_V2DF_INT:
24456 case V2DF_FTYPE_V4DF_INT:
24458 nargs_constant = 1;
24460 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24461 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24462 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24463 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24464 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24467 case V16QI_FTYPE_V16QI_V16QI_INT:
24468 case V8HI_FTYPE_V8HI_V8HI_INT:
24469 case V8SI_FTYPE_V8SI_V8SI_INT:
24470 case V8SI_FTYPE_V8SI_V4SI_INT:
24471 case V8SF_FTYPE_V8SF_V8SF_INT:
24472 case V8SF_FTYPE_V8SF_V4SF_INT:
24473 case V4SI_FTYPE_V4SI_V4SI_INT:
24474 case V4DF_FTYPE_V4DF_V4DF_INT:
24475 case V4DF_FTYPE_V4DF_V2DF_INT:
24476 case V4SF_FTYPE_V4SF_V4SF_INT:
24477 case V2DI_FTYPE_V2DI_V2DI_INT:
24478 case V2DF_FTYPE_V2DF_V2DF_INT:
24480 nargs_constant = 1;
24482 case V2DI_FTYPE_V2DI_V2DI_INT_CONVERT:
24485 nargs_constant = 1;
24487 case V1DI_FTYPE_V1DI_V1DI_INT_CONVERT:
24490 nargs_constant = 1;
24492 case V2DI_FTYPE_V2DI_UINT_UINT:
24494 nargs_constant = 2;
24496 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
24497 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
24498 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
24499 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
24501 nargs_constant = 1;
24503 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24505 nargs_constant = 2;
24508 gcc_unreachable ();
24511 gcc_assert (nargs <= ARRAY_SIZE (args));
24513 if (comparison != UNKNOWN)
24515 gcc_assert (nargs == 2);
24516 return ix86_expand_sse_compare (d, exp, target, swap);
24519 if (rmode == VOIDmode || rmode == tmode)
24523 || GET_MODE (target) != tmode
24524 || !insn_p->operand[0].predicate (target, tmode))
24525 target = gen_reg_rtx (tmode);
24526 real_target = target;
24530 target = gen_reg_rtx (rmode);
24531 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24534 for (i = 0; i < nargs; i++)
24536 tree arg = CALL_EXPR_ARG (exp, i);
24537 rtx op = expand_normal (arg);
24538 enum machine_mode mode = insn_p->operand[i + 1].mode;
24539 bool match = insn_p->operand[i + 1].predicate (op, mode);
24541 if (last_arg_count && (i + 1) == nargs)
24543 /* SIMD shift insns take either an 8-bit immediate or
24544 register as count. But builtin functions take int as
24545 count. If count doesn't match, we put it in register. */
24548 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24549 if (!insn_p->operand[i + 1].predicate (op, mode))
24550 op = copy_to_reg (op);
24553 else if ((nargs - i) <= nargs_constant)
24558 case CODE_FOR_sse4_1_roundpd:
24559 case CODE_FOR_sse4_1_roundps:
24560 case CODE_FOR_sse4_1_roundsd:
24561 case CODE_FOR_sse4_1_roundss:
24562 case CODE_FOR_sse4_1_blendps:
24563 case CODE_FOR_avx_blendpd256:
24564 case CODE_FOR_avx_vpermilv4df:
24565 case CODE_FOR_avx_roundpd256:
24566 case CODE_FOR_avx_roundps256:
24567 error ("the last argument must be a 4-bit immediate");
24570 case CODE_FOR_sse4_1_blendpd:
24571 case CODE_FOR_avx_vpermilv2df:
24572 case CODE_FOR_xop_vpermil2v2df3:
24573 case CODE_FOR_xop_vpermil2v4sf3:
24574 case CODE_FOR_xop_vpermil2v4df3:
24575 case CODE_FOR_xop_vpermil2v8sf3:
24576 error ("the last argument must be a 2-bit immediate");
24579 case CODE_FOR_avx_vextractf128v4df:
24580 case CODE_FOR_avx_vextractf128v8sf:
24581 case CODE_FOR_avx_vextractf128v8si:
24582 case CODE_FOR_avx_vinsertf128v4df:
24583 case CODE_FOR_avx_vinsertf128v8sf:
24584 case CODE_FOR_avx_vinsertf128v8si:
24585 error ("the last argument must be a 1-bit immediate");
24588 case CODE_FOR_avx_cmpsdv2df3:
24589 case CODE_FOR_avx_cmpssv4sf3:
24590 case CODE_FOR_avx_cmppdv2df3:
24591 case CODE_FOR_avx_cmppsv4sf3:
24592 case CODE_FOR_avx_cmppdv4df3:
24593 case CODE_FOR_avx_cmppsv8sf3:
24594 error ("the last argument must be a 5-bit immediate");
24598 switch (nargs_constant)
24601 if ((nargs - i) == nargs_constant)
24603 error ("the next to last argument must be an 8-bit immediate");
24607 error ("the last argument must be an 8-bit immediate");
24610 gcc_unreachable ();
24617 if (VECTOR_MODE_P (mode))
24618 op = safe_vector_operand (op, mode);
24620 /* If we aren't optimizing, only allow one memory operand to
24622 if (memory_operand (op, mode))
24625 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24627 if (optimize || !match || num_memory > 1)
24628 op = copy_to_mode_reg (mode, op);
24632 op = copy_to_reg (op);
24633 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24638 args[i].mode = mode;
24644 pat = GEN_FCN (icode) (real_target, args[0].op);
24647 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24650 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24654 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24655 args[2].op, args[3].op);
24658 gcc_unreachable ();
24668 /* Subroutine of ix86_expand_builtin to take care of special insns
24669 with variable number of operands. */
24672 ix86_expand_special_args_builtin (const struct builtin_description *d,
24673 tree exp, rtx target)
24677 unsigned int i, nargs, arg_adjust, memory;
24681 enum machine_mode mode;
24683 enum insn_code icode = d->icode;
24684 bool last_arg_constant = false;
24685 const struct insn_data_d *insn_p = &insn_data[icode];
24686 enum machine_mode tmode = insn_p->operand[0].mode;
24687 enum { load, store } klass;
24689 switch ((enum ix86_builtin_func_type) d->flag)
24691 case VOID_FTYPE_VOID:
24692 emit_insn (GEN_FCN (icode) (target));
24694 case VOID_FTYPE_UINT64:
24695 case VOID_FTYPE_UNSIGNED:
24701 case UINT64_FTYPE_VOID:
24702 case UNSIGNED_FTYPE_VOID:
24703 case UINT16_FTYPE_VOID:
24708 case UINT64_FTYPE_PUNSIGNED:
24709 case V2DI_FTYPE_PV2DI:
24710 case V32QI_FTYPE_PCCHAR:
24711 case V16QI_FTYPE_PCCHAR:
24712 case V8SF_FTYPE_PCV4SF:
24713 case V8SF_FTYPE_PCFLOAT:
24714 case V4SF_FTYPE_PCFLOAT:
24715 case V4DF_FTYPE_PCV2DF:
24716 case V4DF_FTYPE_PCDOUBLE:
24717 case V2DF_FTYPE_PCDOUBLE:
24718 case VOID_FTYPE_PVOID:
24723 case VOID_FTYPE_PV2SF_V4SF:
24724 case VOID_FTYPE_PV4DI_V4DI:
24725 case VOID_FTYPE_PV2DI_V2DI:
24726 case VOID_FTYPE_PCHAR_V32QI:
24727 case VOID_FTYPE_PCHAR_V16QI:
24728 case VOID_FTYPE_PFLOAT_V8SF:
24729 case VOID_FTYPE_PFLOAT_V4SF:
24730 case VOID_FTYPE_PDOUBLE_V4DF:
24731 case VOID_FTYPE_PDOUBLE_V2DF:
24732 case VOID_FTYPE_PULONGLONG_ULONGLONG:
24733 case VOID_FTYPE_PINT_INT:
24736 /* Reserve memory operand for target. */
24737 memory = ARRAY_SIZE (args);
24739 case V4SF_FTYPE_V4SF_PCV2SF:
24740 case V2DF_FTYPE_V2DF_PCDOUBLE:
24745 case V8SF_FTYPE_PCV8SF_V8SF:
24746 case V4DF_FTYPE_PCV4DF_V4DF:
24747 case V4SF_FTYPE_PCV4SF_V4SF:
24748 case V2DF_FTYPE_PCV2DF_V2DF:
24753 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24754 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24755 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24756 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24759 /* Reserve memory operand for target. */
24760 memory = ARRAY_SIZE (args);
24762 case VOID_FTYPE_UINT_UINT_UINT:
24763 case VOID_FTYPE_UINT64_UINT_UINT:
24764 case UCHAR_FTYPE_UINT_UINT_UINT:
24765 case UCHAR_FTYPE_UINT64_UINT_UINT:
24768 memory = ARRAY_SIZE (args);
24769 last_arg_constant = true;
24772 gcc_unreachable ();
24775 gcc_assert (nargs <= ARRAY_SIZE (args));
24777 if (klass == store)
24779 arg = CALL_EXPR_ARG (exp, 0);
24780 op = expand_normal (arg);
24781 gcc_assert (target == 0);
24783 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24785 target = force_reg (tmode, op);
24793 || GET_MODE (target) != tmode
24794 || !insn_p->operand[0].predicate (target, tmode))
24795 target = gen_reg_rtx (tmode);
24798 for (i = 0; i < nargs; i++)
24800 enum machine_mode mode = insn_p->operand[i + 1].mode;
24803 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24804 op = expand_normal (arg);
24805 match = insn_p->operand[i + 1].predicate (op, mode);
24807 if (last_arg_constant && (i + 1) == nargs)
24811 if (icode == CODE_FOR_lwp_lwpvalsi3
24812 || icode == CODE_FOR_lwp_lwpinssi3
24813 || icode == CODE_FOR_lwp_lwpvaldi3
24814 || icode == CODE_FOR_lwp_lwpinsdi3)
24815 error ("the last argument must be a 32-bit immediate");
24817 error ("the last argument must be an 8-bit immediate");
24825 /* This must be the memory operand. */
24826 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24827 gcc_assert (GET_MODE (op) == mode
24828 || GET_MODE (op) == VOIDmode);
24832 /* This must be register. */
24833 if (VECTOR_MODE_P (mode))
24834 op = safe_vector_operand (op, mode);
24836 gcc_assert (GET_MODE (op) == mode
24837 || GET_MODE (op) == VOIDmode);
24838 op = copy_to_mode_reg (mode, op);
24843 args[i].mode = mode;
24849 pat = GEN_FCN (icode) (target);
24852 pat = GEN_FCN (icode) (target, args[0].op);
24855 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24858 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
24861 gcc_unreachable ();
24867 return klass == store ? 0 : target;
24870 /* Return the integer constant in ARG. Constrain it to be in the range
24871 of the subparts of VEC_TYPE; issue an error if not. */
24874 get_element_number (tree vec_type, tree arg)
24876 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24878 if (!host_integerp (arg, 1)
24879 || (elt = tree_low_cst (arg, 1), elt > max))
24881 error ("selector must be an integer constant in the range 0..%wi", max);
24888 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24889 ix86_expand_vector_init. We DO have language-level syntax for this, in
24890 the form of (type){ init-list }. Except that since we can't place emms
24891 instructions from inside the compiler, we can't allow the use of MMX
24892 registers unless the user explicitly asks for it. So we do *not* define
24893 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24894 we have builtins invoked by mmintrin.h that gives us license to emit
24895 these sorts of instructions. */
24898 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24900 enum machine_mode tmode = TYPE_MODE (type);
24901 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24902 int i, n_elt = GET_MODE_NUNITS (tmode);
24903 rtvec v = rtvec_alloc (n_elt);
24905 gcc_assert (VECTOR_MODE_P (tmode));
24906 gcc_assert (call_expr_nargs (exp) == n_elt);
24908 for (i = 0; i < n_elt; ++i)
24910 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24911 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24914 if (!target || !register_operand (target, tmode))
24915 target = gen_reg_rtx (tmode);
24917 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24921 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24922 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24923 had a language-level syntax for referencing vector elements. */
24926 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24928 enum machine_mode tmode, mode0;
24933 arg0 = CALL_EXPR_ARG (exp, 0);
24934 arg1 = CALL_EXPR_ARG (exp, 1);
24936 op0 = expand_normal (arg0);
24937 elt = get_element_number (TREE_TYPE (arg0), arg1);
24939 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24940 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24941 gcc_assert (VECTOR_MODE_P (mode0));
24943 op0 = force_reg (mode0, op0);
24945 if (optimize || !target || !register_operand (target, tmode))
24946 target = gen_reg_rtx (tmode);
24948 ix86_expand_vector_extract (true, target, op0, elt);
24953 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24954 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24955 a language-level syntax for referencing vector elements. */
24958 ix86_expand_vec_set_builtin (tree exp)
24960 enum machine_mode tmode, mode1;
24961 tree arg0, arg1, arg2;
24963 rtx op0, op1, target;
24965 arg0 = CALL_EXPR_ARG (exp, 0);
24966 arg1 = CALL_EXPR_ARG (exp, 1);
24967 arg2 = CALL_EXPR_ARG (exp, 2);
24969 tmode = TYPE_MODE (TREE_TYPE (arg0));
24970 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24971 gcc_assert (VECTOR_MODE_P (tmode));
24973 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24974 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24975 elt = get_element_number (TREE_TYPE (arg0), arg2);
24977 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24978 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24980 op0 = force_reg (tmode, op0);
24981 op1 = force_reg (mode1, op1);
24983 /* OP0 is the source of these builtin functions and shouldn't be
24984 modified. Create a copy, use it and return it as target. */
24985 target = gen_reg_rtx (tmode);
24986 emit_move_insn (target, op0);
24987 ix86_expand_vector_set (true, target, op1, elt);
24992 /* Expand an expression EXP that calls a built-in function,
24993 with result going to TARGET if that's convenient
24994 (and in mode MODE if that's convenient).
24995 SUBTARGET may be used as the target for computing one of EXP's operands.
24996 IGNORE is nonzero if the value is to be ignored. */
24999 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
25000 enum machine_mode mode ATTRIBUTE_UNUSED,
25001 int ignore ATTRIBUTE_UNUSED)
25003 const struct builtin_description *d;
25005 enum insn_code icode;
25006 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
25007 tree arg0, arg1, arg2;
25008 rtx op0, op1, op2, pat;
25009 enum machine_mode mode0, mode1, mode2;
25010 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
25012 /* Determine whether the builtin function is available under the current ISA.
25013 Originally the builtin was not created if it wasn't applicable to the
25014 current ISA based on the command line switches. With function specific
25015 options, we need to check in the context of the function making the call
25016 whether it is supported. */
25017 if (ix86_builtins_isa[fcode].isa
25018 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
25020 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
25021 NULL, NULL, false);
25024 error ("%qE needs unknown isa option", fndecl);
25027 gcc_assert (opts != NULL);
25028 error ("%qE needs isa option %s", fndecl, opts);
25036 case IX86_BUILTIN_MASKMOVQ:
25037 case IX86_BUILTIN_MASKMOVDQU:
25038 icode = (fcode == IX86_BUILTIN_MASKMOVQ
25039 ? CODE_FOR_mmx_maskmovq
25040 : CODE_FOR_sse2_maskmovdqu);
25041 /* Note the arg order is different from the operand order. */
25042 arg1 = CALL_EXPR_ARG (exp, 0);
25043 arg2 = CALL_EXPR_ARG (exp, 1);
25044 arg0 = CALL_EXPR_ARG (exp, 2);
25045 op0 = expand_normal (arg0);
25046 op1 = expand_normal (arg1);
25047 op2 = expand_normal (arg2);
25048 mode0 = insn_data[icode].operand[0].mode;
25049 mode1 = insn_data[icode].operand[1].mode;
25050 mode2 = insn_data[icode].operand[2].mode;
25052 op0 = force_reg (Pmode, op0);
25053 op0 = gen_rtx_MEM (mode1, op0);
25055 if (!insn_data[icode].operand[0].predicate (op0, mode0))
25056 op0 = copy_to_mode_reg (mode0, op0);
25057 if (!insn_data[icode].operand[1].predicate (op1, mode1))
25058 op1 = copy_to_mode_reg (mode1, op1);
25059 if (!insn_data[icode].operand[2].predicate (op2, mode2))
25060 op2 = copy_to_mode_reg (mode2, op2);
25061 pat = GEN_FCN (icode) (op0, op1, op2);
25067 case IX86_BUILTIN_LDMXCSR:
25068 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25069 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25070 emit_move_insn (target, op0);
25071 emit_insn (gen_sse_ldmxcsr (target));
25074 case IX86_BUILTIN_STMXCSR:
25075 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25076 emit_insn (gen_sse_stmxcsr (target));
25077 return copy_to_mode_reg (SImode, target);
25079 case IX86_BUILTIN_CLFLUSH:
25080 arg0 = CALL_EXPR_ARG (exp, 0);
25081 op0 = expand_normal (arg0);
25082 icode = CODE_FOR_sse2_clflush;
25083 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25084 op0 = copy_to_mode_reg (Pmode, op0);
25086 emit_insn (gen_sse2_clflush (op0));
25089 case IX86_BUILTIN_MONITOR:
25090 arg0 = CALL_EXPR_ARG (exp, 0);
25091 arg1 = CALL_EXPR_ARG (exp, 1);
25092 arg2 = CALL_EXPR_ARG (exp, 2);
25093 op0 = expand_normal (arg0);
25094 op1 = expand_normal (arg1);
25095 op2 = expand_normal (arg2);
25097 op0 = copy_to_mode_reg (Pmode, op0);
25099 op1 = copy_to_mode_reg (SImode, op1);
25101 op2 = copy_to_mode_reg (SImode, op2);
25102 emit_insn (ix86_gen_monitor (op0, op1, op2));
25105 case IX86_BUILTIN_MWAIT:
25106 arg0 = CALL_EXPR_ARG (exp, 0);
25107 arg1 = CALL_EXPR_ARG (exp, 1);
25108 op0 = expand_normal (arg0);
25109 op1 = expand_normal (arg1);
25111 op0 = copy_to_mode_reg (SImode, op0);
25113 op1 = copy_to_mode_reg (SImode, op1);
25114 emit_insn (gen_sse3_mwait (op0, op1));
25117 case IX86_BUILTIN_VEC_INIT_V2SI:
25118 case IX86_BUILTIN_VEC_INIT_V4HI:
25119 case IX86_BUILTIN_VEC_INIT_V8QI:
25120 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25122 case IX86_BUILTIN_VEC_EXT_V2DF:
25123 case IX86_BUILTIN_VEC_EXT_V2DI:
25124 case IX86_BUILTIN_VEC_EXT_V4SF:
25125 case IX86_BUILTIN_VEC_EXT_V4SI:
25126 case IX86_BUILTIN_VEC_EXT_V8HI:
25127 case IX86_BUILTIN_VEC_EXT_V2SI:
25128 case IX86_BUILTIN_VEC_EXT_V4HI:
25129 case IX86_BUILTIN_VEC_EXT_V16QI:
25130 return ix86_expand_vec_ext_builtin (exp, target);
25132 case IX86_BUILTIN_VEC_SET_V2DI:
25133 case IX86_BUILTIN_VEC_SET_V4SF:
25134 case IX86_BUILTIN_VEC_SET_V4SI:
25135 case IX86_BUILTIN_VEC_SET_V8HI:
25136 case IX86_BUILTIN_VEC_SET_V4HI:
25137 case IX86_BUILTIN_VEC_SET_V16QI:
25138 return ix86_expand_vec_set_builtin (exp);
25140 case IX86_BUILTIN_VEC_PERM_V2DF:
25141 case IX86_BUILTIN_VEC_PERM_V4SF:
25142 case IX86_BUILTIN_VEC_PERM_V2DI:
25143 case IX86_BUILTIN_VEC_PERM_V4SI:
25144 case IX86_BUILTIN_VEC_PERM_V8HI:
25145 case IX86_BUILTIN_VEC_PERM_V16QI:
25146 case IX86_BUILTIN_VEC_PERM_V2DI_U:
25147 case IX86_BUILTIN_VEC_PERM_V4SI_U:
25148 case IX86_BUILTIN_VEC_PERM_V8HI_U:
25149 case IX86_BUILTIN_VEC_PERM_V16QI_U:
25150 case IX86_BUILTIN_VEC_PERM_V4DF:
25151 case IX86_BUILTIN_VEC_PERM_V8SF:
25152 return ix86_expand_vec_perm_builtin (exp);
25154 case IX86_BUILTIN_INFQ:
25155 case IX86_BUILTIN_HUGE_VALQ:
25157 REAL_VALUE_TYPE inf;
25161 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25163 tmp = validize_mem (force_const_mem (mode, tmp));
25166 target = gen_reg_rtx (mode);
25168 emit_move_insn (target, tmp);
25172 case IX86_BUILTIN_LLWPCB:
25173 arg0 = CALL_EXPR_ARG (exp, 0);
25174 op0 = expand_normal (arg0);
25175 icode = CODE_FOR_lwp_llwpcb;
25176 if (!insn_data[icode].operand[0].predicate (op0, Pmode))
25177 op0 = copy_to_mode_reg (Pmode, op0);
25178 emit_insn (gen_lwp_llwpcb (op0));
25181 case IX86_BUILTIN_SLWPCB:
25182 icode = CODE_FOR_lwp_slwpcb;
25184 || !insn_data[icode].operand[0].predicate (target, Pmode))
25185 target = gen_reg_rtx (Pmode);
25186 emit_insn (gen_lwp_slwpcb (target));
25193 for (i = 0, d = bdesc_special_args;
25194 i < ARRAY_SIZE (bdesc_special_args);
25196 if (d->code == fcode)
25197 return ix86_expand_special_args_builtin (d, exp, target);
25199 for (i = 0, d = bdesc_args;
25200 i < ARRAY_SIZE (bdesc_args);
25202 if (d->code == fcode)
25205 case IX86_BUILTIN_FABSQ:
25206 case IX86_BUILTIN_COPYSIGNQ:
25208 /* Emit a normal call if SSE2 isn't available. */
25209 return expand_call (exp, target, ignore);
25211 return ix86_expand_args_builtin (d, exp, target);
25214 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25215 if (d->code == fcode)
25216 return ix86_expand_sse_comi (d, exp, target);
25218 for (i = 0, d = bdesc_pcmpestr;
25219 i < ARRAY_SIZE (bdesc_pcmpestr);
25221 if (d->code == fcode)
25222 return ix86_expand_sse_pcmpestr (d, exp, target);
25224 for (i = 0, d = bdesc_pcmpistr;
25225 i < ARRAY_SIZE (bdesc_pcmpistr);
25227 if (d->code == fcode)
25228 return ix86_expand_sse_pcmpistr (d, exp, target);
25230 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25231 if (d->code == fcode)
25232 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25233 (enum ix86_builtin_func_type)
25234 d->flag, d->comparison);
25236 gcc_unreachable ();
25239 /* Returns a function decl for a vectorized version of the builtin function
25240 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25241 if it is not available. */
25244 ix86_builtin_vectorized_function (tree fndecl, tree type_out,
25247 enum machine_mode in_mode, out_mode;
25249 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
25251 if (TREE_CODE (type_out) != VECTOR_TYPE
25252 || TREE_CODE (type_in) != VECTOR_TYPE
25253 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
25256 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25257 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25258 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25259 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25263 case BUILT_IN_SQRT:
25264 if (out_mode == DFmode && out_n == 2
25265 && in_mode == DFmode && in_n == 2)
25266 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25269 case BUILT_IN_SQRTF:
25270 if (out_mode == SFmode && out_n == 4
25271 && in_mode == SFmode && in_n == 4)
25272 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25275 case BUILT_IN_LRINT:
25276 if (out_mode == SImode && out_n == 4
25277 && in_mode == DFmode && in_n == 2)
25278 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25281 case BUILT_IN_LRINTF:
25282 if (out_mode == SImode && out_n == 4
25283 && in_mode == SFmode && in_n == 4)
25284 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25287 case BUILT_IN_COPYSIGN:
25288 if (out_mode == DFmode && out_n == 2
25289 && in_mode == DFmode && in_n == 2)
25290 return ix86_builtins[IX86_BUILTIN_CPYSGNPD];
25293 case BUILT_IN_COPYSIGNF:
25294 if (out_mode == SFmode && out_n == 4
25295 && in_mode == SFmode && in_n == 4)
25296 return ix86_builtins[IX86_BUILTIN_CPYSGNPS];
25303 /* Dispatch to a handler for a vectorization library. */
25304 if (ix86_veclib_handler)
25305 return ix86_veclib_handler ((enum built_in_function) fn, type_out,
25311 /* Handler for an SVML-style interface to
25312 a library with vectorized intrinsics. */
25315 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25318 tree fntype, new_fndecl, args;
25321 enum machine_mode el_mode, in_mode;
25324 /* The SVML is suitable for unsafe math only. */
25325 if (!flag_unsafe_math_optimizations)
25328 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25329 n = TYPE_VECTOR_SUBPARTS (type_out);
25330 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25331 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25332 if (el_mode != in_mode
25340 case BUILT_IN_LOG10:
25342 case BUILT_IN_TANH:
25344 case BUILT_IN_ATAN:
25345 case BUILT_IN_ATAN2:
25346 case BUILT_IN_ATANH:
25347 case BUILT_IN_CBRT:
25348 case BUILT_IN_SINH:
25350 case BUILT_IN_ASINH:
25351 case BUILT_IN_ASIN:
25352 case BUILT_IN_COSH:
25354 case BUILT_IN_ACOSH:
25355 case BUILT_IN_ACOS:
25356 if (el_mode != DFmode || n != 2)
25360 case BUILT_IN_EXPF:
25361 case BUILT_IN_LOGF:
25362 case BUILT_IN_LOG10F:
25363 case BUILT_IN_POWF:
25364 case BUILT_IN_TANHF:
25365 case BUILT_IN_TANF:
25366 case BUILT_IN_ATANF:
25367 case BUILT_IN_ATAN2F:
25368 case BUILT_IN_ATANHF:
25369 case BUILT_IN_CBRTF:
25370 case BUILT_IN_SINHF:
25371 case BUILT_IN_SINF:
25372 case BUILT_IN_ASINHF:
25373 case BUILT_IN_ASINF:
25374 case BUILT_IN_COSHF:
25375 case BUILT_IN_COSF:
25376 case BUILT_IN_ACOSHF:
25377 case BUILT_IN_ACOSF:
25378 if (el_mode != SFmode || n != 4)
25386 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25388 if (fn == BUILT_IN_LOGF)
25389 strcpy (name, "vmlsLn4");
25390 else if (fn == BUILT_IN_LOG)
25391 strcpy (name, "vmldLn2");
25394 sprintf (name, "vmls%s", bname+10);
25395 name[strlen (name)-1] = '4';
25398 sprintf (name, "vmld%s2", bname+10);
25400 /* Convert to uppercase. */
25404 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25405 args = TREE_CHAIN (args))
25409 fntype = build_function_type_list (type_out, type_in, NULL);
25411 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25413 /* Build a function declaration for the vectorized function. */
25414 new_fndecl = build_decl (BUILTINS_LOCATION,
25415 FUNCTION_DECL, get_identifier (name), fntype);
25416 TREE_PUBLIC (new_fndecl) = 1;
25417 DECL_EXTERNAL (new_fndecl) = 1;
25418 DECL_IS_NOVOPS (new_fndecl) = 1;
25419 TREE_READONLY (new_fndecl) = 1;
25424 /* Handler for an ACML-style interface to
25425 a library with vectorized intrinsics. */
25428 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25430 char name[20] = "__vr.._";
25431 tree fntype, new_fndecl, args;
25434 enum machine_mode el_mode, in_mode;
25437 /* The ACML is 64bits only and suitable for unsafe math only as
25438 it does not correctly support parts of IEEE with the required
25439 precision such as denormals. */
25441 || !flag_unsafe_math_optimizations)
25444 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25445 n = TYPE_VECTOR_SUBPARTS (type_out);
25446 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25447 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25448 if (el_mode != in_mode
25458 case BUILT_IN_LOG2:
25459 case BUILT_IN_LOG10:
25462 if (el_mode != DFmode
25467 case BUILT_IN_SINF:
25468 case BUILT_IN_COSF:
25469 case BUILT_IN_EXPF:
25470 case BUILT_IN_POWF:
25471 case BUILT_IN_LOGF:
25472 case BUILT_IN_LOG2F:
25473 case BUILT_IN_LOG10F:
25476 if (el_mode != SFmode
25485 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25486 sprintf (name + 7, "%s", bname+10);
25489 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25490 args = TREE_CHAIN (args))
25494 fntype = build_function_type_list (type_out, type_in, NULL);
25496 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25498 /* Build a function declaration for the vectorized function. */
25499 new_fndecl = build_decl (BUILTINS_LOCATION,
25500 FUNCTION_DECL, get_identifier (name), fntype);
25501 TREE_PUBLIC (new_fndecl) = 1;
25502 DECL_EXTERNAL (new_fndecl) = 1;
25503 DECL_IS_NOVOPS (new_fndecl) = 1;
25504 TREE_READONLY (new_fndecl) = 1;
25510 /* Returns a decl of a function that implements conversion of an integer vector
25511 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE
25512 are the types involved when converting according to CODE.
25513 Return NULL_TREE if it is not available. */
25516 ix86_vectorize_builtin_conversion (unsigned int code,
25517 tree dest_type, tree src_type)
25525 switch (TYPE_MODE (src_type))
25528 switch (TYPE_MODE (dest_type))
25531 return (TYPE_UNSIGNED (src_type)
25532 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS]
25533 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25535 return (TYPE_UNSIGNED (src_type)
25537 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]);
25543 switch (TYPE_MODE (dest_type))
25546 return (TYPE_UNSIGNED (src_type)
25548 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]);
25557 case FIX_TRUNC_EXPR:
25558 switch (TYPE_MODE (dest_type))
25561 switch (TYPE_MODE (src_type))
25564 return (TYPE_UNSIGNED (dest_type)
25566 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]);
25568 return (TYPE_UNSIGNED (dest_type)
25570 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]);
25577 switch (TYPE_MODE (src_type))
25580 return (TYPE_UNSIGNED (dest_type)
25582 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]);
25599 /* Returns a code for a target-specific builtin that implements
25600 reciprocal of the function, or NULL_TREE if not available. */
25603 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25604 bool sqrt ATTRIBUTE_UNUSED)
25606 if (! (TARGET_SSE_MATH && !optimize_insn_for_size_p ()
25607 && flag_finite_math_only && !flag_trapping_math
25608 && flag_unsafe_math_optimizations))
25612 /* Machine dependent builtins. */
25615 /* Vectorized version of sqrt to rsqrt conversion. */
25616 case IX86_BUILTIN_SQRTPS_NR:
25617 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25623 /* Normal builtins. */
25626 /* Sqrt to rsqrt conversion. */
25627 case BUILT_IN_SQRTF:
25628 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25635 /* Helper for avx_vpermilps256_operand et al. This is also used by
25636 the expansion functions to turn the parallel back into a mask.
25637 The return value is 0 for no match and the imm8+1 for a match. */
25640 avx_vpermilp_parallel (rtx par, enum machine_mode mode)
25642 unsigned i, nelt = GET_MODE_NUNITS (mode);
25644 unsigned char ipar[8];
25646 if (XVECLEN (par, 0) != (int) nelt)
25649 /* Validate that all of the elements are constants, and not totally
25650 out of range. Copy the data into an integral array to make the
25651 subsequent checks easier. */
25652 for (i = 0; i < nelt; ++i)
25654 rtx er = XVECEXP (par, 0, i);
25655 unsigned HOST_WIDE_INT ei;
25657 if (!CONST_INT_P (er))
25668 /* In the 256-bit DFmode case, we can only move elements within
25670 for (i = 0; i < 2; ++i)
25674 mask |= ipar[i] << i;
25676 for (i = 2; i < 4; ++i)
25680 mask |= (ipar[i] - 2) << i;
25685 /* In the 256-bit SFmode case, we have full freedom of movement
25686 within the low 128-bit lane, but the high 128-bit lane must
25687 mirror the exact same pattern. */
25688 for (i = 0; i < 4; ++i)
25689 if (ipar[i] + 4 != ipar[i + 4])
25696 /* In the 128-bit case, we've full freedom in the placement of
25697 the elements from the source operand. */
25698 for (i = 0; i < nelt; ++i)
25699 mask |= ipar[i] << (i * (nelt / 2));
25703 gcc_unreachable ();
25706 /* Make sure success has a non-zero value by adding one. */
25710 /* Helper for avx_vperm2f128_v4df_operand et al. This is also used by
25711 the expansion functions to turn the parallel back into a mask.
25712 The return value is 0 for no match and the imm8+1 for a match. */
25715 avx_vperm2f128_parallel (rtx par, enum machine_mode mode)
25717 unsigned i, nelt = GET_MODE_NUNITS (mode), nelt2 = nelt / 2;
25719 unsigned char ipar[8];
25721 if (XVECLEN (par, 0) != (int) nelt)
25724 /* Validate that all of the elements are constants, and not totally
25725 out of range. Copy the data into an integral array to make the
25726 subsequent checks easier. */
25727 for (i = 0; i < nelt; ++i)
25729 rtx er = XVECEXP (par, 0, i);
25730 unsigned HOST_WIDE_INT ei;
25732 if (!CONST_INT_P (er))
25735 if (ei >= 2 * nelt)
25740 /* Validate that the halves of the permute are halves. */
25741 for (i = 0; i < nelt2 - 1; ++i)
25742 if (ipar[i] + 1 != ipar[i + 1])
25744 for (i = nelt2; i < nelt - 1; ++i)
25745 if (ipar[i] + 1 != ipar[i + 1])
25748 /* Reconstruct the mask. */
25749 for (i = 0; i < 2; ++i)
25751 unsigned e = ipar[i * nelt2];
25755 mask |= e << (i * 4);
25758 /* Make sure success has a non-zero value by adding one. */
25763 /* Store OPERAND to the memory after reload is completed. This means
25764 that we can't easily use assign_stack_local. */
25766 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25770 gcc_assert (reload_completed);
25771 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25773 result = gen_rtx_MEM (mode,
25774 gen_rtx_PLUS (Pmode,
25776 GEN_INT (-RED_ZONE_SIZE)));
25777 emit_move_insn (result, operand);
25779 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25785 operand = gen_lowpart (DImode, operand);
25789 gen_rtx_SET (VOIDmode,
25790 gen_rtx_MEM (DImode,
25791 gen_rtx_PRE_DEC (DImode,
25792 stack_pointer_rtx)),
25796 gcc_unreachable ();
25798 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25807 split_di (&operand, 1, operands, operands + 1);
25809 gen_rtx_SET (VOIDmode,
25810 gen_rtx_MEM (SImode,
25811 gen_rtx_PRE_DEC (Pmode,
25812 stack_pointer_rtx)),
25815 gen_rtx_SET (VOIDmode,
25816 gen_rtx_MEM (SImode,
25817 gen_rtx_PRE_DEC (Pmode,
25818 stack_pointer_rtx)),
25823 /* Store HImodes as SImodes. */
25824 operand = gen_lowpart (SImode, operand);
25828 gen_rtx_SET (VOIDmode,
25829 gen_rtx_MEM (GET_MODE (operand),
25830 gen_rtx_PRE_DEC (SImode,
25831 stack_pointer_rtx)),
25835 gcc_unreachable ();
25837 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25842 /* Free operand from the memory. */
25844 ix86_free_from_memory (enum machine_mode mode)
25846 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25850 if (mode == DImode || TARGET_64BIT)
25854 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25855 to pop or add instruction if registers are available. */
25856 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25857 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25862 /* Implement TARGET_IRA_COVER_CLASSES. If -mfpmath=sse, we prefer
25863 SSE_REGS to FLOAT_REGS if their costs for a pseudo are the
25865 static const reg_class_t *
25866 i386_ira_cover_classes (void)
25868 static const reg_class_t sse_fpmath_classes[] = {
25869 GENERAL_REGS, SSE_REGS, MMX_REGS, FLOAT_REGS, LIM_REG_CLASSES
25871 static const reg_class_t no_sse_fpmath_classes[] = {
25872 GENERAL_REGS, FLOAT_REGS, MMX_REGS, SSE_REGS, LIM_REG_CLASSES
25875 return TARGET_SSE_MATH ? sse_fpmath_classes : no_sse_fpmath_classes;
25878 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25879 QImode must go into class Q_REGS.
25880 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25881 movdf to do mem-to-mem moves through integer regs. */
25883 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25885 enum machine_mode mode = GET_MODE (x);
25887 /* We're only allowed to return a subclass of CLASS. Many of the
25888 following checks fail for NO_REGS, so eliminate that early. */
25889 if (regclass == NO_REGS)
25892 /* All classes can load zeros. */
25893 if (x == CONST0_RTX (mode))
25896 /* Force constants into memory if we are loading a (nonzero) constant into
25897 an MMX or SSE register. This is because there are no MMX/SSE instructions
25898 to load from a constant. */
25900 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25903 /* Prefer SSE regs only, if we can use them for math. */
25904 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25905 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25907 /* Floating-point constants need more complex checks. */
25908 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25910 /* General regs can load everything. */
25911 if (reg_class_subset_p (regclass, GENERAL_REGS))
25914 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25915 zero above. We only want to wind up preferring 80387 registers if
25916 we plan on doing computation with them. */
25918 && standard_80387_constant_p (x))
25920 /* Limit class to non-sse. */
25921 if (regclass == FLOAT_SSE_REGS)
25923 if (regclass == FP_TOP_SSE_REGS)
25925 if (regclass == FP_SECOND_SSE_REGS)
25926 return FP_SECOND_REG;
25927 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25934 /* Generally when we see PLUS here, it's the function invariant
25935 (plus soft-fp const_int). Which can only be computed into general
25937 if (GET_CODE (x) == PLUS)
25938 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25940 /* QImode constants are easy to load, but non-constant QImode data
25941 must go into Q_REGS. */
25942 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25944 if (reg_class_subset_p (regclass, Q_REGS))
25946 if (reg_class_subset_p (Q_REGS, regclass))
25954 /* Discourage putting floating-point values in SSE registers unless
25955 SSE math is being used, and likewise for the 387 registers. */
25957 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25959 enum machine_mode mode = GET_MODE (x);
25961 /* Restrict the output reload class to the register bank that we are doing
25962 math on. If we would like not to return a subset of CLASS, reject this
25963 alternative: if reload cannot do this, it will still use its choice. */
25964 mode = GET_MODE (x);
25965 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25966 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25968 if (X87_FLOAT_MODE_P (mode))
25970 if (regclass == FP_TOP_SSE_REGS)
25972 else if (regclass == FP_SECOND_SSE_REGS)
25973 return FP_SECOND_REG;
25975 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25982 ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
25983 enum machine_mode mode,
25984 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25986 /* QImode spills from non-QI registers require
25987 intermediate register on 32bit targets. */
25988 if (!in_p && mode == QImode && !TARGET_64BIT
25989 && (rclass == GENERAL_REGS
25990 || rclass == LEGACY_REGS
25991 || rclass == INDEX_REGS))
26000 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
26001 regno = true_regnum (x);
26003 /* Return Q_REGS if the operand is in memory. */
26011 /* If we are copying between general and FP registers, we need a memory
26012 location. The same is true for SSE and MMX registers.
26014 To optimize register_move_cost performance, allow inline variant.
26016 The macro can't work reliably when one of the CLASSES is class containing
26017 registers from multiple units (SSE, MMX, integer). We avoid this by never
26018 combining those units in single alternative in the machine description.
26019 Ensure that this constraint holds to avoid unexpected surprises.
26021 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
26022 enforce these sanity checks. */
26025 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26026 enum machine_mode mode, int strict)
26028 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
26029 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
26030 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
26031 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
26032 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
26033 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
26035 gcc_assert (!strict);
26039 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
26042 /* ??? This is a lie. We do have moves between mmx/general, and for
26043 mmx/sse2. But by saying we need secondary memory we discourage the
26044 register allocator from using the mmx registers unless needed. */
26045 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
26048 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26050 /* SSE1 doesn't have any direct moves from other classes. */
26054 /* If the target says that inter-unit moves are more expensive
26055 than moving through memory, then don't generate them. */
26056 if (!TARGET_INTER_UNIT_MOVES)
26059 /* Between SSE and general, we have moves no larger than word size. */
26060 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
26068 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
26069 enum machine_mode mode, int strict)
26071 return inline_secondary_memory_needed (class1, class2, mode, strict);
26074 /* Return true if the registers in CLASS cannot represent the change from
26075 modes FROM to TO. */
26078 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
26079 enum reg_class regclass)
26084 /* x87 registers can't do subreg at all, as all values are reformatted
26085 to extended precision. */
26086 if (MAYBE_FLOAT_CLASS_P (regclass))
26089 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
26091 /* Vector registers do not support QI or HImode loads. If we don't
26092 disallow a change to these modes, reload will assume it's ok to
26093 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
26094 the vec_dupv4hi pattern. */
26095 if (GET_MODE_SIZE (from) < 4)
26098 /* Vector registers do not support subreg with nonzero offsets, which
26099 are otherwise valid for integer registers. Since we can't see
26100 whether we have a nonzero offset from here, prohibit all
26101 nonparadoxical subregs changing size. */
26102 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
26109 /* Return the cost of moving data of mode M between a
26110 register and memory. A value of 2 is the default; this cost is
26111 relative to those in `REGISTER_MOVE_COST'.
26113 This function is used extensively by register_move_cost that is used to
26114 build tables at startup. Make it inline in this case.
26115 When IN is 2, return maximum of in and out move cost.
26117 If moving between registers and memory is more expensive than
26118 between two registers, you should define this macro to express the
26121 Model also increased moving costs of QImode registers in non
26125 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
26129 if (FLOAT_CLASS_P (regclass))
26147 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
26148 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
26150 if (SSE_CLASS_P (regclass))
26153 switch (GET_MODE_SIZE (mode))
26168 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
26169 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
26171 if (MMX_CLASS_P (regclass))
26174 switch (GET_MODE_SIZE (mode))
26186 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
26187 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
26189 switch (GET_MODE_SIZE (mode))
26192 if (Q_CLASS_P (regclass) || TARGET_64BIT)
26195 return ix86_cost->int_store[0];
26196 if (TARGET_PARTIAL_REG_DEPENDENCY
26197 && optimize_function_for_speed_p (cfun))
26198 cost = ix86_cost->movzbl_load;
26200 cost = ix86_cost->int_load[0];
26202 return MAX (cost, ix86_cost->int_store[0]);
26208 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
26210 return ix86_cost->movzbl_load;
26212 return ix86_cost->int_store[0] + 4;
26217 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
26218 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
26220 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
26221 if (mode == TFmode)
26224 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
26226 cost = ix86_cost->int_load[2];
26228 cost = ix86_cost->int_store[2];
26229 return (cost * (((int) GET_MODE_SIZE (mode)
26230 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
26235 ix86_memory_move_cost (enum machine_mode mode, reg_class_t regclass,
26238 return inline_memory_move_cost (mode, (enum reg_class) regclass, in ? 1 : 0);
26242 /* Return the cost of moving data from a register in class CLASS1 to
26243 one in class CLASS2.
26245 It is not required that the cost always equal 2 when FROM is the same as TO;
26246 on some machines it is expensive to move between registers if they are not
26247 general registers. */
26250 ix86_register_move_cost (enum machine_mode mode, reg_class_t class1_i,
26251 reg_class_t class2_i)
26253 enum reg_class class1 = (enum reg_class) class1_i;
26254 enum reg_class class2 = (enum reg_class) class2_i;
26256 /* In case we require secondary memory, compute cost of the store followed
26257 by load. In order to avoid bad register allocation choices, we need
26258 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
26260 if (inline_secondary_memory_needed (class1, class2, mode, 0))
26264 cost += inline_memory_move_cost (mode, class1, 2);
26265 cost += inline_memory_move_cost (mode, class2, 2);
26267 /* In case of copying from general_purpose_register we may emit multiple
26268 stores followed by single load causing memory size mismatch stall.
26269 Count this as arbitrarily high cost of 20. */
26270 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
26273 /* In the case of FP/MMX moves, the registers actually overlap, and we
26274 have to switch modes in order to treat them differently. */
26275 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
26276 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
26282 /* Moves between SSE/MMX and integer unit are expensive. */
26283 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
26284 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
26286 /* ??? By keeping returned value relatively high, we limit the number
26287 of moves between integer and MMX/SSE registers for all targets.
26288 Additionally, high value prevents problem with x86_modes_tieable_p(),
26289 where integer modes in MMX/SSE registers are not tieable
26290 because of missing QImode and HImode moves to, from or between
26291 MMX/SSE registers. */
26292 return MAX (8, ix86_cost->mmxsse_to_integer);
26294 if (MAYBE_FLOAT_CLASS_P (class1))
26295 return ix86_cost->fp_move;
26296 if (MAYBE_SSE_CLASS_P (class1))
26297 return ix86_cost->sse_move;
26298 if (MAYBE_MMX_CLASS_P (class1))
26299 return ix86_cost->mmx_move;
26303 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26306 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26308 /* Flags and only flags can only hold CCmode values. */
26309 if (CC_REGNO_P (regno))
26310 return GET_MODE_CLASS (mode) == MODE_CC;
26311 if (GET_MODE_CLASS (mode) == MODE_CC
26312 || GET_MODE_CLASS (mode) == MODE_RANDOM
26313 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26315 if (FP_REGNO_P (regno))
26316 return VALID_FP_MODE_P (mode);
26317 if (SSE_REGNO_P (regno))
26319 /* We implement the move patterns for all vector modes into and
26320 out of SSE registers, even when no operation instructions
26321 are available. OImode move is available only when AVX is
26323 return ((TARGET_AVX && mode == OImode)
26324 || VALID_AVX256_REG_MODE (mode)
26325 || VALID_SSE_REG_MODE (mode)
26326 || VALID_SSE2_REG_MODE (mode)
26327 || VALID_MMX_REG_MODE (mode)
26328 || VALID_MMX_REG_MODE_3DNOW (mode));
26330 if (MMX_REGNO_P (regno))
26332 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26333 so if the register is available at all, then we can move data of
26334 the given mode into or out of it. */
26335 return (VALID_MMX_REG_MODE (mode)
26336 || VALID_MMX_REG_MODE_3DNOW (mode));
26339 if (mode == QImode)
26341 /* Take care for QImode values - they can be in non-QI regs,
26342 but then they do cause partial register stalls. */
26343 if (regno <= BX_REG || TARGET_64BIT)
26345 if (!TARGET_PARTIAL_REG_STALL)
26347 return reload_in_progress || reload_completed;
26349 /* We handle both integer and floats in the general purpose registers. */
26350 else if (VALID_INT_MODE_P (mode))
26352 else if (VALID_FP_MODE_P (mode))
26354 else if (VALID_DFP_MODE_P (mode))
26356 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26357 on to use that value in smaller contexts, this can easily force a
26358 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26359 supporting DImode, allow it. */
26360 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26366 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26367 tieable integer mode. */
26370 ix86_tieable_integer_mode_p (enum machine_mode mode)
26379 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26382 return TARGET_64BIT;
26389 /* Return true if MODE1 is accessible in a register that can hold MODE2
26390 without copying. That is, all register classes that can hold MODE2
26391 can also hold MODE1. */
26394 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26396 if (mode1 == mode2)
26399 if (ix86_tieable_integer_mode_p (mode1)
26400 && ix86_tieable_integer_mode_p (mode2))
26403 /* MODE2 being XFmode implies fp stack or general regs, which means we
26404 can tie any smaller floating point modes to it. Note that we do not
26405 tie this with TFmode. */
26406 if (mode2 == XFmode)
26407 return mode1 == SFmode || mode1 == DFmode;
26409 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26410 that we can tie it with SFmode. */
26411 if (mode2 == DFmode)
26412 return mode1 == SFmode;
26414 /* If MODE2 is only appropriate for an SSE register, then tie with
26415 any other mode acceptable to SSE registers. */
26416 if (GET_MODE_SIZE (mode2) == 16
26417 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26418 return (GET_MODE_SIZE (mode1) == 16
26419 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26421 /* If MODE2 is appropriate for an MMX register, then tie
26422 with any other mode acceptable to MMX registers. */
26423 if (GET_MODE_SIZE (mode2) == 8
26424 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26425 return (GET_MODE_SIZE (mode1) == 8
26426 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26431 /* Compute a (partial) cost for rtx X. Return true if the complete
26432 cost has been computed, and false if subexpressions should be
26433 scanned. In either case, *TOTAL contains the cost result. */
26436 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26438 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26439 enum machine_mode mode = GET_MODE (x);
26440 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26448 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26450 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26452 else if (flag_pic && SYMBOLIC_CONST (x)
26454 || (!GET_CODE (x) != LABEL_REF
26455 && (GET_CODE (x) != SYMBOL_REF
26456 || !SYMBOL_REF_LOCAL_P (x)))))
26463 if (mode == VOIDmode)
26466 switch (standard_80387_constant_p (x))
26471 default: /* Other constants */
26476 /* Start with (MEM (SYMBOL_REF)), since that's where
26477 it'll probably end up. Add a penalty for size. */
26478 *total = (COSTS_N_INSNS (1)
26479 + (flag_pic != 0 && !TARGET_64BIT)
26480 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26486 /* The zero extensions is often completely free on x86_64, so make
26487 it as cheap as possible. */
26488 if (TARGET_64BIT && mode == DImode
26489 && GET_MODE (XEXP (x, 0)) == SImode)
26491 else if (TARGET_ZERO_EXTEND_WITH_AND)
26492 *total = cost->add;
26494 *total = cost->movzx;
26498 *total = cost->movsx;
26502 if (CONST_INT_P (XEXP (x, 1))
26503 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26505 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26508 *total = cost->add;
26511 if ((value == 2 || value == 3)
26512 && cost->lea <= cost->shift_const)
26514 *total = cost->lea;
26524 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26526 if (CONST_INT_P (XEXP (x, 1)))
26528 if (INTVAL (XEXP (x, 1)) > 32)
26529 *total = cost->shift_const + COSTS_N_INSNS (2);
26531 *total = cost->shift_const * 2;
26535 if (GET_CODE (XEXP (x, 1)) == AND)
26536 *total = cost->shift_var * 2;
26538 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26543 if (CONST_INT_P (XEXP (x, 1)))
26544 *total = cost->shift_const;
26546 *total = cost->shift_var;
26551 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26553 /* ??? SSE scalar cost should be used here. */
26554 *total = cost->fmul;
26557 else if (X87_FLOAT_MODE_P (mode))
26559 *total = cost->fmul;
26562 else if (FLOAT_MODE_P (mode))
26564 /* ??? SSE vector cost should be used here. */
26565 *total = cost->fmul;
26570 rtx op0 = XEXP (x, 0);
26571 rtx op1 = XEXP (x, 1);
26573 if (CONST_INT_P (XEXP (x, 1)))
26575 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26576 for (nbits = 0; value != 0; value &= value - 1)
26580 /* This is arbitrary. */
26583 /* Compute costs correctly for widening multiplication. */
26584 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26585 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26586 == GET_MODE_SIZE (mode))
26588 int is_mulwiden = 0;
26589 enum machine_mode inner_mode = GET_MODE (op0);
26591 if (GET_CODE (op0) == GET_CODE (op1))
26592 is_mulwiden = 1, op1 = XEXP (op1, 0);
26593 else if (CONST_INT_P (op1))
26595 if (GET_CODE (op0) == SIGN_EXTEND)
26596 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26599 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26603 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26606 *total = (cost->mult_init[MODE_INDEX (mode)]
26607 + nbits * cost->mult_bit
26608 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26617 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26618 /* ??? SSE cost should be used here. */
26619 *total = cost->fdiv;
26620 else if (X87_FLOAT_MODE_P (mode))
26621 *total = cost->fdiv;
26622 else if (FLOAT_MODE_P (mode))
26623 /* ??? SSE vector cost should be used here. */
26624 *total = cost->fdiv;
26626 *total = cost->divide[MODE_INDEX (mode)];
26630 if (GET_MODE_CLASS (mode) == MODE_INT
26631 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26633 if (GET_CODE (XEXP (x, 0)) == PLUS
26634 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26635 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26636 && CONSTANT_P (XEXP (x, 1)))
26638 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26639 if (val == 2 || val == 4 || val == 8)
26641 *total = cost->lea;
26642 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26643 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26644 outer_code, speed);
26645 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26649 else if (GET_CODE (XEXP (x, 0)) == MULT
26650 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26652 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26653 if (val == 2 || val == 4 || val == 8)
26655 *total = cost->lea;
26656 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26657 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26661 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26663 *total = cost->lea;
26664 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26665 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26666 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26673 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26675 /* ??? SSE cost should be used here. */
26676 *total = cost->fadd;
26679 else if (X87_FLOAT_MODE_P (mode))
26681 *total = cost->fadd;
26684 else if (FLOAT_MODE_P (mode))
26686 /* ??? SSE vector cost should be used here. */
26687 *total = cost->fadd;
26695 if (!TARGET_64BIT && mode == DImode)
26697 *total = (cost->add * 2
26698 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26699 << (GET_MODE (XEXP (x, 0)) != DImode))
26700 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26701 << (GET_MODE (XEXP (x, 1)) != DImode)));
26707 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26709 /* ??? SSE cost should be used here. */
26710 *total = cost->fchs;
26713 else if (X87_FLOAT_MODE_P (mode))
26715 *total = cost->fchs;
26718 else if (FLOAT_MODE_P (mode))
26720 /* ??? SSE vector cost should be used here. */
26721 *total = cost->fchs;
26727 if (!TARGET_64BIT && mode == DImode)
26728 *total = cost->add * 2;
26730 *total = cost->add;
26734 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26735 && XEXP (XEXP (x, 0), 1) == const1_rtx
26736 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26737 && XEXP (x, 1) == const0_rtx)
26739 /* This kind of construct is implemented using test[bwl].
26740 Treat it as if we had an AND. */
26741 *total = (cost->add
26742 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26743 + rtx_cost (const1_rtx, outer_code, speed));
26749 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26754 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26755 /* ??? SSE cost should be used here. */
26756 *total = cost->fabs;
26757 else if (X87_FLOAT_MODE_P (mode))
26758 *total = cost->fabs;
26759 else if (FLOAT_MODE_P (mode))
26760 /* ??? SSE vector cost should be used here. */
26761 *total = cost->fabs;
26765 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26766 /* ??? SSE cost should be used here. */
26767 *total = cost->fsqrt;
26768 else if (X87_FLOAT_MODE_P (mode))
26769 *total = cost->fsqrt;
26770 else if (FLOAT_MODE_P (mode))
26771 /* ??? SSE vector cost should be used here. */
26772 *total = cost->fsqrt;
26776 if (XINT (x, 1) == UNSPEC_TP)
26783 case VEC_DUPLICATE:
26784 /* ??? Assume all of these vector manipulation patterns are
26785 recognizable. In which case they all pretty much have the
26787 *total = COSTS_N_INSNS (1);
26797 static int current_machopic_label_num;
26799 /* Given a symbol name and its associated stub, write out the
26800 definition of the stub. */
26803 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26805 unsigned int length;
26806 char *binder_name, *symbol_name, lazy_ptr_name[32];
26807 int label = ++current_machopic_label_num;
26809 /* For 64-bit we shouldn't get here. */
26810 gcc_assert (!TARGET_64BIT);
26812 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26813 symb = targetm.strip_name_encoding (symb);
26815 length = strlen (stub);
26816 binder_name = XALLOCAVEC (char, length + 32);
26817 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26819 length = strlen (symb);
26820 symbol_name = XALLOCAVEC (char, length + 32);
26821 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26823 sprintf (lazy_ptr_name, "L%d$lz", label);
26826 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26828 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26830 fprintf (file, "%s:\n", stub);
26831 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26835 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26836 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26837 fprintf (file, "\tjmp\t*%%edx\n");
26840 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26842 fprintf (file, "%s:\n", binder_name);
26846 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26847 fputs ("\tpushl\t%eax\n", file);
26850 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26852 fputs ("\tjmp\tdyld_stub_binding_helper\n", file);
26854 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26855 fprintf (file, "%s:\n", lazy_ptr_name);
26856 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26857 fprintf (file, ASM_LONG "%s\n", binder_name);
26859 #endif /* TARGET_MACHO */
26861 /* Order the registers for register allocator. */
26864 x86_order_regs_for_local_alloc (void)
26869 /* First allocate the local general purpose registers. */
26870 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26871 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26872 reg_alloc_order [pos++] = i;
26874 /* Global general purpose registers. */
26875 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26876 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26877 reg_alloc_order [pos++] = i;
26879 /* x87 registers come first in case we are doing FP math
26881 if (!TARGET_SSE_MATH)
26882 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26883 reg_alloc_order [pos++] = i;
26885 /* SSE registers. */
26886 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26887 reg_alloc_order [pos++] = i;
26888 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26889 reg_alloc_order [pos++] = i;
26891 /* x87 registers. */
26892 if (TARGET_SSE_MATH)
26893 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26894 reg_alloc_order [pos++] = i;
26896 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26897 reg_alloc_order [pos++] = i;
26899 /* Initialize the rest of array as we do not allocate some registers
26901 while (pos < FIRST_PSEUDO_REGISTER)
26902 reg_alloc_order [pos++] = 0;
26905 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26906 struct attribute_spec.handler. */
26908 ix86_handle_abi_attribute (tree *node, tree name,
26909 tree args ATTRIBUTE_UNUSED,
26910 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26912 if (TREE_CODE (*node) != FUNCTION_TYPE
26913 && TREE_CODE (*node) != METHOD_TYPE
26914 && TREE_CODE (*node) != FIELD_DECL
26915 && TREE_CODE (*node) != TYPE_DECL)
26917 warning (OPT_Wattributes, "%qE attribute only applies to functions",
26919 *no_add_attrs = true;
26924 warning (OPT_Wattributes, "%qE attribute only available for 64-bit",
26926 *no_add_attrs = true;
26930 /* Can combine regparm with all attributes but fastcall. */
26931 if (is_attribute_p ("ms_abi", name))
26933 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26935 error ("ms_abi and sysv_abi attributes are not compatible");
26940 else if (is_attribute_p ("sysv_abi", name))
26942 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26944 error ("ms_abi and sysv_abi attributes are not compatible");
26953 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26954 struct attribute_spec.handler. */
26956 ix86_handle_struct_attribute (tree *node, tree name,
26957 tree args ATTRIBUTE_UNUSED,
26958 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26961 if (DECL_P (*node))
26963 if (TREE_CODE (*node) == TYPE_DECL)
26964 type = &TREE_TYPE (*node);
26969 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26970 || TREE_CODE (*type) == UNION_TYPE)))
26972 warning (OPT_Wattributes, "%qE attribute ignored",
26974 *no_add_attrs = true;
26977 else if ((is_attribute_p ("ms_struct", name)
26978 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26979 || ((is_attribute_p ("gcc_struct", name)
26980 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26982 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
26984 *no_add_attrs = true;
26991 ix86_handle_fndecl_attribute (tree *node, tree name,
26992 tree args ATTRIBUTE_UNUSED,
26993 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26995 if (TREE_CODE (*node) != FUNCTION_DECL)
26997 warning (OPT_Wattributes, "%qE attribute only applies to functions",
26999 *no_add_attrs = true;
27003 #ifndef HAVE_AS_IX86_SWAP
27005 sorry ("ms_hook_prologue attribute needs assembler swap suffix support");
27012 ix86_ms_bitfield_layout_p (const_tree record_type)
27014 return (TARGET_MS_BITFIELD_LAYOUT &&
27015 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
27016 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
27019 /* Returns an expression indicating where the this parameter is
27020 located on entry to the FUNCTION. */
27023 x86_this_parameter (tree function)
27025 tree type = TREE_TYPE (function);
27026 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
27031 const int *parm_regs;
27033 if (ix86_function_type_abi (type) == MS_ABI)
27034 parm_regs = x86_64_ms_abi_int_parameter_registers;
27036 parm_regs = x86_64_int_parameter_registers;
27037 return gen_rtx_REG (DImode, parm_regs[aggr]);
27040 nregs = ix86_function_regparm (type, function);
27042 if (nregs > 0 && !stdarg_p (type))
27046 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
27047 regno = aggr ? DX_REG : CX_REG;
27048 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type)))
27052 return gen_rtx_MEM (SImode,
27053 plus_constant (stack_pointer_rtx, 4));
27062 return gen_rtx_MEM (SImode,
27063 plus_constant (stack_pointer_rtx, 4));
27066 return gen_rtx_REG (SImode, regno);
27069 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
27072 /* Determine whether x86_output_mi_thunk can succeed. */
27075 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
27076 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
27077 HOST_WIDE_INT vcall_offset, const_tree function)
27079 /* 64-bit can handle anything. */
27083 /* For 32-bit, everything's fine if we have one free register. */
27084 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
27087 /* Need a free register for vcall_offset. */
27091 /* Need a free register for GOT references. */
27092 if (flag_pic && !targetm.binds_local_p (function))
27095 /* Otherwise ok. */
27099 /* Output the assembler code for a thunk function. THUNK_DECL is the
27100 declaration for the thunk function itself, FUNCTION is the decl for
27101 the target function. DELTA is an immediate constant offset to be
27102 added to THIS. If VCALL_OFFSET is nonzero, the word at
27103 *(*this + vcall_offset) should be added to THIS. */
27106 x86_output_mi_thunk (FILE *file,
27107 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
27108 HOST_WIDE_INT vcall_offset, tree function)
27111 rtx this_param = x86_this_parameter (function);
27114 /* Make sure unwind info is emitted for the thunk if needed. */
27115 final_start_function (emit_barrier (), file, 1);
27117 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
27118 pull it in now and let DELTA benefit. */
27119 if (REG_P (this_param))
27120 this_reg = this_param;
27121 else if (vcall_offset)
27123 /* Put the this parameter into %eax. */
27124 xops[0] = this_param;
27125 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
27126 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27129 this_reg = NULL_RTX;
27131 /* Adjust the this parameter by a fixed constant. */
27134 xops[0] = GEN_INT (delta);
27135 xops[1] = this_reg ? this_reg : this_param;
27138 if (!x86_64_general_operand (xops[0], DImode))
27140 tmp = gen_rtx_REG (DImode, R10_REG);
27142 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
27144 xops[1] = this_param;
27146 if (x86_maybe_negate_const_int (&xops[0], DImode))
27147 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops);
27149 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
27151 else if (x86_maybe_negate_const_int (&xops[0], SImode))
27152 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops);
27154 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
27157 /* Adjust the this parameter by a value stored in the vtable. */
27161 tmp = gen_rtx_REG (DImode, R10_REG);
27164 int tmp_regno = CX_REG;
27165 if (lookup_attribute ("fastcall",
27166 TYPE_ATTRIBUTES (TREE_TYPE (function)))
27167 || lookup_attribute ("thiscall",
27168 TYPE_ATTRIBUTES (TREE_TYPE (function))))
27169 tmp_regno = AX_REG;
27170 tmp = gen_rtx_REG (SImode, tmp_regno);
27173 xops[0] = gen_rtx_MEM (Pmode, this_reg);
27175 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27177 /* Adjust the this parameter. */
27178 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
27179 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
27181 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
27182 xops[0] = GEN_INT (vcall_offset);
27184 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
27185 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
27187 xops[1] = this_reg;
27188 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
27191 /* If necessary, drop THIS back to its stack slot. */
27192 if (this_reg && this_reg != this_param)
27194 xops[0] = this_reg;
27195 xops[1] = this_param;
27196 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
27199 xops[0] = XEXP (DECL_RTL (function), 0);
27202 if (!flag_pic || targetm.binds_local_p (function))
27203 output_asm_insn ("jmp\t%P0", xops);
27204 /* All thunks should be in the same object as their target,
27205 and thus binds_local_p should be true. */
27206 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
27207 gcc_unreachable ();
27210 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
27211 tmp = gen_rtx_CONST (Pmode, tmp);
27212 tmp = gen_rtx_MEM (QImode, tmp);
27214 output_asm_insn ("jmp\t%A0", xops);
27219 if (!flag_pic || targetm.binds_local_p (function))
27220 output_asm_insn ("jmp\t%P0", xops);
27225 rtx sym_ref = XEXP (DECL_RTL (function), 0);
27226 if (TARGET_MACHO_BRANCH_ISLANDS)
27227 sym_ref = (gen_rtx_SYMBOL_REF
27229 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
27230 tmp = gen_rtx_MEM (QImode, sym_ref);
27232 output_asm_insn ("jmp\t%0", xops);
27235 #endif /* TARGET_MACHO */
27237 tmp = gen_rtx_REG (SImode, CX_REG);
27238 output_set_got (tmp, NULL_RTX);
27241 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
27242 output_asm_insn ("jmp\t{*}%1", xops);
27245 final_end_function ();
27249 x86_file_start (void)
27251 default_file_start ();
27253 darwin_file_start ();
27255 if (X86_FILE_START_VERSION_DIRECTIVE)
27256 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
27257 if (X86_FILE_START_FLTUSED)
27258 fputs ("\t.global\t__fltused\n", asm_out_file);
27259 if (ix86_asm_dialect == ASM_INTEL)
27260 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
27264 x86_field_alignment (tree field, int computed)
27266 enum machine_mode mode;
27267 tree type = TREE_TYPE (field);
27269 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
27271 mode = TYPE_MODE (strip_array_types (type));
27272 if (mode == DFmode || mode == DCmode
27273 || GET_MODE_CLASS (mode) == MODE_INT
27274 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
27275 return MIN (32, computed);
27279 /* Output assembler code to FILE to increment profiler label # LABELNO
27280 for profiling a function entry. */
27282 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
27284 const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
27289 #ifndef NO_PROFILE_COUNTERS
27290 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
27293 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
27294 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", mcount_name);
27296 fprintf (file, "\tcall\t%s\n", mcount_name);
27300 #ifndef NO_PROFILE_COUNTERS
27301 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%" PROFILE_COUNT_REGISTER "\n",
27304 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
27308 #ifndef NO_PROFILE_COUNTERS
27309 fprintf (file, "\tmovl\t$%sP%d,%%" PROFILE_COUNT_REGISTER "\n",
27312 fprintf (file, "\tcall\t%s\n", mcount_name);
27316 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27317 /* We don't have exact information about the insn sizes, but we may assume
27318 quite safely that we are informed about all 1 byte insns and memory
27319 address sizes. This is enough to eliminate unnecessary padding in
27323 min_insn_size (rtx insn)
27327 if (!INSN_P (insn) || !active_insn_p (insn))
27330 /* Discard alignments we've emit and jump instructions. */
27331 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
27332 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
27334 if (JUMP_TABLE_DATA_P (insn))
27337 /* Important case - calls are always 5 bytes.
27338 It is common to have many calls in the row. */
27340 && symbolic_reference_mentioned_p (PATTERN (insn))
27341 && !SIBLING_CALL_P (insn))
27343 len = get_attr_length (insn);
27347 /* For normal instructions we rely on get_attr_length being exact,
27348 with a few exceptions. */
27349 if (!JUMP_P (insn))
27351 enum attr_type type = get_attr_type (insn);
27356 if (GET_CODE (PATTERN (insn)) == ASM_INPUT
27357 || asm_noperands (PATTERN (insn)) >= 0)
27364 /* Otherwise trust get_attr_length. */
27368 l = get_attr_length_address (insn);
27369 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27378 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27382 ix86_avoid_jump_mispredicts (void)
27384 rtx insn, start = get_insns ();
27385 int nbytes = 0, njumps = 0;
27388 /* Look for all minimal intervals of instructions containing 4 jumps.
27389 The intervals are bounded by START and INSN. NBYTES is the total
27390 size of instructions in the interval including INSN and not including
27391 START. When the NBYTES is smaller than 16 bytes, it is possible
27392 that the end of START and INSN ends up in the same 16byte page.
27394 The smallest offset in the page INSN can start is the case where START
27395 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27396 We add p2align to 16byte window with maxskip 15 - NBYTES + sizeof (INSN).
27398 for (insn = start; insn; insn = NEXT_INSN (insn))
27402 if (LABEL_P (insn))
27404 int align = label_to_alignment (insn);
27405 int max_skip = label_to_max_skip (insn);
27409 /* If align > 3, only up to 16 - max_skip - 1 bytes can be
27410 already in the current 16 byte page, because otherwise
27411 ASM_OUTPUT_MAX_SKIP_ALIGN could skip max_skip or fewer
27412 bytes to reach 16 byte boundary. */
27414 || (align <= 3 && max_skip != (1 << align) - 1))
27417 fprintf (dump_file, "Label %i with max_skip %i\n",
27418 INSN_UID (insn), max_skip);
27421 while (nbytes + max_skip >= 16)
27423 start = NEXT_INSN (start);
27424 if ((JUMP_P (start)
27425 && GET_CODE (PATTERN (start)) != ADDR_VEC
27426 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27428 njumps--, isjump = 1;
27431 nbytes -= min_insn_size (start);
27437 min_size = min_insn_size (insn);
27438 nbytes += min_size;
27440 fprintf (dump_file, "Insn %i estimated to %i bytes\n",
27441 INSN_UID (insn), min_size);
27443 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27444 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27452 start = NEXT_INSN (start);
27453 if ((JUMP_P (start)
27454 && GET_CODE (PATTERN (start)) != ADDR_VEC
27455 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27457 njumps--, isjump = 1;
27460 nbytes -= min_insn_size (start);
27462 gcc_assert (njumps >= 0);
27464 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27465 INSN_UID (start), INSN_UID (insn), nbytes);
27467 if (njumps == 3 && isjump && nbytes < 16)
27469 int padsize = 15 - nbytes + min_insn_size (insn);
27472 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27473 INSN_UID (insn), padsize);
27474 emit_insn_before (gen_pad (GEN_INT (padsize)), insn);
27480 /* AMD Athlon works faster
27481 when RET is not destination of conditional jump or directly preceded
27482 by other jump instruction. We avoid the penalty by inserting NOP just
27483 before the RET instructions in such cases. */
27485 ix86_pad_returns (void)
27490 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27492 basic_block bb = e->src;
27493 rtx ret = BB_END (bb);
27495 bool replace = false;
27497 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27498 || optimize_bb_for_size_p (bb))
27500 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27501 if (active_insn_p (prev) || LABEL_P (prev))
27503 if (prev && LABEL_P (prev))
27508 FOR_EACH_EDGE (e, ei, bb->preds)
27509 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27510 && !(e->flags & EDGE_FALLTHRU))
27515 prev = prev_active_insn (ret);
27517 && ((JUMP_P (prev) && any_condjump_p (prev))
27520 /* Empty functions get branch mispredict even when the jump destination
27521 is not visible to us. */
27522 if (!prev && !optimize_function_for_size_p (cfun))
27527 emit_jump_insn_before (gen_return_internal_long (), ret);
27533 /* Implement machine specific optimizations. We implement padding of returns
27534 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27538 if (optimize && optimize_function_for_speed_p (cfun))
27540 if (TARGET_PAD_RETURNS)
27541 ix86_pad_returns ();
27542 #ifdef ASM_OUTPUT_MAX_SKIP_PAD
27543 if (TARGET_FOUR_JUMP_LIMIT)
27544 ix86_avoid_jump_mispredicts ();
27549 /* Return nonzero when QImode register that must be represented via REX prefix
27552 x86_extended_QIreg_mentioned_p (rtx insn)
27555 extract_insn_cached (insn);
27556 for (i = 0; i < recog_data.n_operands; i++)
27557 if (REG_P (recog_data.operand[i])
27558 && REGNO (recog_data.operand[i]) > BX_REG)
27563 /* Return nonzero when P points to register encoded via REX prefix.
27564 Called via for_each_rtx. */
27566 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27568 unsigned int regno;
27571 regno = REGNO (*p);
27572 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27575 /* Return true when INSN mentions register that must be encoded using REX
27578 x86_extended_reg_mentioned_p (rtx insn)
27580 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27581 extended_reg_mentioned_1, NULL);
27584 /* If profitable, negate (without causing overflow) integer constant
27585 of mode MODE at location LOC. Return true in this case. */
27587 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode)
27591 if (!CONST_INT_P (*loc))
27597 /* DImode x86_64 constants must fit in 32 bits. */
27598 gcc_assert (x86_64_immediate_operand (*loc, mode));
27609 gcc_unreachable ();
27612 /* Avoid overflows. */
27613 if (mode_signbit_p (mode, *loc))
27616 val = INTVAL (*loc);
27618 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'.
27619 Exceptions: -128 encodes smaller than 128, so swap sign and op. */
27620 if ((val < 0 && val != -128)
27623 *loc = GEN_INT (-val);
27630 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27631 optabs would emit if we didn't have TFmode patterns. */
27634 x86_emit_floatuns (rtx operands[2])
27636 rtx neglab, donelab, i0, i1, f0, in, out;
27637 enum machine_mode mode, inmode;
27639 inmode = GET_MODE (operands[1]);
27640 gcc_assert (inmode == SImode || inmode == DImode);
27643 in = force_reg (inmode, operands[1]);
27644 mode = GET_MODE (out);
27645 neglab = gen_label_rtx ();
27646 donelab = gen_label_rtx ();
27647 f0 = gen_reg_rtx (mode);
27649 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27651 expand_float (out, in, 0);
27653 emit_jump_insn (gen_jump (donelab));
27656 emit_label (neglab);
27658 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27660 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27662 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27664 expand_float (f0, i0, 0);
27666 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27668 emit_label (donelab);
27671 /* AVX does not support 32-byte integer vector operations,
27672 thus the longest vector we are faced with is V16QImode. */
27673 #define MAX_VECT_LEN 16
27675 struct expand_vec_perm_d
27677 rtx target, op0, op1;
27678 unsigned char perm[MAX_VECT_LEN];
27679 enum machine_mode vmode;
27680 unsigned char nelt;
27684 static bool expand_vec_perm_1 (struct expand_vec_perm_d *d);
27685 static bool expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d);
27687 /* Get a vector mode of the same size as the original but with elements
27688 twice as wide. This is only guaranteed to apply to integral vectors. */
27690 static inline enum machine_mode
27691 get_mode_wider_vector (enum machine_mode o)
27693 /* ??? Rely on the ordering that genmodes.c gives to vectors. */
27694 enum machine_mode n = GET_MODE_WIDER_MODE (o);
27695 gcc_assert (GET_MODE_NUNITS (o) == GET_MODE_NUNITS (n) * 2);
27696 gcc_assert (GET_MODE_SIZE (o) == GET_MODE_SIZE (n));
27700 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27701 with all elements equal to VAR. Return true if successful. */
27704 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27705 rtx target, rtx val)
27728 /* First attempt to recognize VAL as-is. */
27729 dup = gen_rtx_VEC_DUPLICATE (mode, val);
27730 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup));
27731 if (recog_memoized (insn) < 0)
27734 /* If that fails, force VAL into a register. */
27737 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val);
27738 seq = get_insns ();
27741 emit_insn_before (seq, insn);
27743 ok = recog_memoized (insn) >= 0;
27752 if (TARGET_SSE || TARGET_3DNOW_A)
27756 val = gen_lowpart (SImode, val);
27757 x = gen_rtx_TRUNCATE (HImode, val);
27758 x = gen_rtx_VEC_DUPLICATE (mode, x);
27759 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27772 struct expand_vec_perm_d dperm;
27776 memset (&dperm, 0, sizeof (dperm));
27777 dperm.target = target;
27778 dperm.vmode = mode;
27779 dperm.nelt = GET_MODE_NUNITS (mode);
27780 dperm.op0 = dperm.op1 = gen_reg_rtx (mode);
27782 /* Extend to SImode using a paradoxical SUBREG. */
27783 tmp1 = gen_reg_rtx (SImode);
27784 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27786 /* Insert the SImode value as low element of a V4SImode vector. */
27787 tmp2 = gen_lowpart (V4SImode, dperm.op0);
27788 emit_insn (gen_vec_setv4si_0 (tmp2, CONST0_RTX (V4SImode), tmp1));
27790 ok = (expand_vec_perm_1 (&dperm)
27791 || expand_vec_perm_broadcast_1 (&dperm));
27803 /* Replicate the value once into the next wider mode and recurse. */
27805 enum machine_mode smode, wsmode, wvmode;
27808 smode = GET_MODE_INNER (mode);
27809 wvmode = get_mode_wider_vector (mode);
27810 wsmode = GET_MODE_INNER (wvmode);
27812 val = convert_modes (wsmode, smode, val, true);
27813 x = expand_simple_binop (wsmode, ASHIFT, val,
27814 GEN_INT (GET_MODE_BITSIZE (smode)),
27815 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27816 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27818 x = gen_lowpart (wvmode, target);
27819 ok = ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val);
27827 enum machine_mode hvmode = (mode == V16HImode ? V8HImode : V16QImode);
27828 rtx x = gen_reg_rtx (hvmode);
27830 ok = ix86_expand_vector_init_duplicate (false, hvmode, x, val);
27833 x = gen_rtx_VEC_CONCAT (mode, x, x);
27834 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27843 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27844 whose ONE_VAR element is VAR, and other elements are zero. Return true
27848 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27849 rtx target, rtx var, int one_var)
27851 enum machine_mode vsimode;
27854 bool use_vector_set = false;
27859 /* For SSE4.1, we normally use vector set. But if the second
27860 element is zero and inter-unit moves are OK, we use movq
27862 use_vector_set = (TARGET_64BIT
27864 && !(TARGET_INTER_UNIT_MOVES
27870 use_vector_set = TARGET_SSE4_1;
27873 use_vector_set = TARGET_SSE2;
27876 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27883 use_vector_set = TARGET_AVX;
27886 /* Use ix86_expand_vector_set in 64bit mode only. */
27887 use_vector_set = TARGET_AVX && TARGET_64BIT;
27893 if (use_vector_set)
27895 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27896 var = force_reg (GET_MODE_INNER (mode), var);
27897 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27913 var = force_reg (GET_MODE_INNER (mode), var);
27914 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27915 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27920 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27921 new_target = gen_reg_rtx (mode);
27923 new_target = target;
27924 var = force_reg (GET_MODE_INNER (mode), var);
27925 x = gen_rtx_VEC_DUPLICATE (mode, var);
27926 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27927 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27930 /* We need to shuffle the value to the correct position, so
27931 create a new pseudo to store the intermediate result. */
27933 /* With SSE2, we can use the integer shuffle insns. */
27934 if (mode != V4SFmode && TARGET_SSE2)
27936 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27938 GEN_INT (one_var == 1 ? 0 : 1),
27939 GEN_INT (one_var == 2 ? 0 : 1),
27940 GEN_INT (one_var == 3 ? 0 : 1)));
27941 if (target != new_target)
27942 emit_move_insn (target, new_target);
27946 /* Otherwise convert the intermediate result to V4SFmode and
27947 use the SSE1 shuffle instructions. */
27948 if (mode != V4SFmode)
27950 tmp = gen_reg_rtx (V4SFmode);
27951 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27956 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27958 GEN_INT (one_var == 1 ? 0 : 1),
27959 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27960 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27962 if (mode != V4SFmode)
27963 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27964 else if (tmp != target)
27965 emit_move_insn (target, tmp);
27967 else if (target != new_target)
27968 emit_move_insn (target, new_target);
27973 vsimode = V4SImode;
27979 vsimode = V2SImode;
27985 /* Zero extend the variable element to SImode and recurse. */
27986 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27988 x = gen_reg_rtx (vsimode);
27989 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27991 gcc_unreachable ();
27993 emit_move_insn (target, gen_lowpart (mode, x));
28001 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
28002 consisting of the values in VALS. It is known that all elements
28003 except ONE_VAR are constants. Return true if successful. */
28006 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
28007 rtx target, rtx vals, int one_var)
28009 rtx var = XVECEXP (vals, 0, one_var);
28010 enum machine_mode wmode;
28013 const_vec = copy_rtx (vals);
28014 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
28015 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
28023 /* For the two element vectors, it's just as easy to use
28024 the general case. */
28028 /* Use ix86_expand_vector_set in 64bit mode only. */
28051 /* There's no way to set one QImode entry easily. Combine
28052 the variable value with its adjacent constant value, and
28053 promote to an HImode set. */
28054 x = XVECEXP (vals, 0, one_var ^ 1);
28057 var = convert_modes (HImode, QImode, var, true);
28058 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
28059 NULL_RTX, 1, OPTAB_LIB_WIDEN);
28060 x = GEN_INT (INTVAL (x) & 0xff);
28064 var = convert_modes (HImode, QImode, var, true);
28065 x = gen_int_mode (INTVAL (x) << 8, HImode);
28067 if (x != const0_rtx)
28068 var = expand_simple_binop (HImode, IOR, var, x, var,
28069 1, OPTAB_LIB_WIDEN);
28071 x = gen_reg_rtx (wmode);
28072 emit_move_insn (x, gen_lowpart (wmode, const_vec));
28073 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
28075 emit_move_insn (target, gen_lowpart (mode, x));
28082 emit_move_insn (target, const_vec);
28083 ix86_expand_vector_set (mmx_ok, target, var, one_var);
28087 /* A subroutine of ix86_expand_vector_init_general. Use vector
28088 concatenate to handle the most general case: all values variable,
28089 and none identical. */
28092 ix86_expand_vector_init_concat (enum machine_mode mode,
28093 rtx target, rtx *ops, int n)
28095 enum machine_mode cmode, hmode = VOIDmode;
28096 rtx first[8], second[4];
28136 gcc_unreachable ();
28139 if (!register_operand (ops[1], cmode))
28140 ops[1] = force_reg (cmode, ops[1]);
28141 if (!register_operand (ops[0], cmode))
28142 ops[0] = force_reg (cmode, ops[0]);
28143 emit_insn (gen_rtx_SET (VOIDmode, target,
28144 gen_rtx_VEC_CONCAT (mode, ops[0],
28164 gcc_unreachable ();
28180 gcc_unreachable ();
28185 /* FIXME: We process inputs backward to help RA. PR 36222. */
28188 for (; i > 0; i -= 2, j--)
28190 first[j] = gen_reg_rtx (cmode);
28191 v = gen_rtvec (2, ops[i - 1], ops[i]);
28192 ix86_expand_vector_init (false, first[j],
28193 gen_rtx_PARALLEL (cmode, v));
28199 gcc_assert (hmode != VOIDmode);
28200 for (i = j = 0; i < n; i += 2, j++)
28202 second[j] = gen_reg_rtx (hmode);
28203 ix86_expand_vector_init_concat (hmode, second [j],
28207 ix86_expand_vector_init_concat (mode, target, second, n);
28210 ix86_expand_vector_init_concat (mode, target, first, n);
28214 gcc_unreachable ();
28218 /* A subroutine of ix86_expand_vector_init_general. Use vector
28219 interleave to handle the most general case: all values variable,
28220 and none identical. */
28223 ix86_expand_vector_init_interleave (enum machine_mode mode,
28224 rtx target, rtx *ops, int n)
28226 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
28229 rtx (*gen_load_even) (rtx, rtx, rtx);
28230 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
28231 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
28236 gen_load_even = gen_vec_setv8hi;
28237 gen_interleave_first_low = gen_vec_interleave_lowv4si;
28238 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28239 inner_mode = HImode;
28240 first_imode = V4SImode;
28241 second_imode = V2DImode;
28242 third_imode = VOIDmode;
28245 gen_load_even = gen_vec_setv16qi;
28246 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
28247 gen_interleave_second_low = gen_vec_interleave_lowv4si;
28248 inner_mode = QImode;
28249 first_imode = V8HImode;
28250 second_imode = V4SImode;
28251 third_imode = V2DImode;
28254 gcc_unreachable ();
28257 for (i = 0; i < n; i++)
28259 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
28260 op0 = gen_reg_rtx (SImode);
28261 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
28263 /* Insert the SImode value as low element of V4SImode vector. */
28264 op1 = gen_reg_rtx (V4SImode);
28265 op0 = gen_rtx_VEC_MERGE (V4SImode,
28266 gen_rtx_VEC_DUPLICATE (V4SImode,
28268 CONST0_RTX (V4SImode),
28270 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
28272 /* Cast the V4SImode vector back to a vector in orignal mode. */
28273 op0 = gen_reg_rtx (mode);
28274 emit_move_insn (op0, gen_lowpart (mode, op1));
28276 /* Load even elements into the second positon. */
28277 emit_insn (gen_load_even (op0,
28278 force_reg (inner_mode,
28282 /* Cast vector to FIRST_IMODE vector. */
28283 ops[i] = gen_reg_rtx (first_imode);
28284 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
28287 /* Interleave low FIRST_IMODE vectors. */
28288 for (i = j = 0; i < n; i += 2, j++)
28290 op0 = gen_reg_rtx (first_imode);
28291 emit_insn (gen_interleave_first_low (op0, ops[i], ops[i + 1]));
28293 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
28294 ops[j] = gen_reg_rtx (second_imode);
28295 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
28298 /* Interleave low SECOND_IMODE vectors. */
28299 switch (second_imode)
28302 for (i = j = 0; i < n / 2; i += 2, j++)
28304 op0 = gen_reg_rtx (second_imode);
28305 emit_insn (gen_interleave_second_low (op0, ops[i],
28308 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
28310 ops[j] = gen_reg_rtx (third_imode);
28311 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
28313 second_imode = V2DImode;
28314 gen_interleave_second_low = gen_vec_interleave_lowv2di;
28318 op0 = gen_reg_rtx (second_imode);
28319 emit_insn (gen_interleave_second_low (op0, ops[0],
28322 /* Cast the SECOND_IMODE vector back to a vector on original
28324 emit_insn (gen_rtx_SET (VOIDmode, target,
28325 gen_lowpart (mode, op0)));
28329 gcc_unreachable ();
28333 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
28334 all values variable, and none identical. */
28337 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
28338 rtx target, rtx vals)
28340 rtx ops[32], op0, op1;
28341 enum machine_mode half_mode = VOIDmode;
28348 if (!mmx_ok && !TARGET_SSE)
28360 n = GET_MODE_NUNITS (mode);
28361 for (i = 0; i < n; i++)
28362 ops[i] = XVECEXP (vals, 0, i);
28363 ix86_expand_vector_init_concat (mode, target, ops, n);
28367 half_mode = V16QImode;
28371 half_mode = V8HImode;
28375 n = GET_MODE_NUNITS (mode);
28376 for (i = 0; i < n; i++)
28377 ops[i] = XVECEXP (vals, 0, i);
28378 op0 = gen_reg_rtx (half_mode);
28379 op1 = gen_reg_rtx (half_mode);
28380 ix86_expand_vector_init_interleave (half_mode, op0, ops,
28382 ix86_expand_vector_init_interleave (half_mode, op1,
28383 &ops [n >> 1], n >> 2);
28384 emit_insn (gen_rtx_SET (VOIDmode, target,
28385 gen_rtx_VEC_CONCAT (mode, op0, op1)));
28389 if (!TARGET_SSE4_1)
28397 /* Don't use ix86_expand_vector_init_interleave if we can't
28398 move from GPR to SSE register directly. */
28399 if (!TARGET_INTER_UNIT_MOVES)
28402 n = GET_MODE_NUNITS (mode);
28403 for (i = 0; i < n; i++)
28404 ops[i] = XVECEXP (vals, 0, i);
28405 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
28413 gcc_unreachable ();
28417 int i, j, n_elts, n_words, n_elt_per_word;
28418 enum machine_mode inner_mode;
28419 rtx words[4], shift;
28421 inner_mode = GET_MODE_INNER (mode);
28422 n_elts = GET_MODE_NUNITS (mode);
28423 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
28424 n_elt_per_word = n_elts / n_words;
28425 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
28427 for (i = 0; i < n_words; ++i)
28429 rtx word = NULL_RTX;
28431 for (j = 0; j < n_elt_per_word; ++j)
28433 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
28434 elt = convert_modes (word_mode, inner_mode, elt, true);
28440 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
28441 word, 1, OPTAB_LIB_WIDEN);
28442 word = expand_simple_binop (word_mode, IOR, word, elt,
28443 word, 1, OPTAB_LIB_WIDEN);
28451 emit_move_insn (target, gen_lowpart (mode, words[0]));
28452 else if (n_words == 2)
28454 rtx tmp = gen_reg_rtx (mode);
28455 emit_clobber (tmp);
28456 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
28457 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
28458 emit_move_insn (target, tmp);
28460 else if (n_words == 4)
28462 rtx tmp = gen_reg_rtx (V4SImode);
28463 gcc_assert (word_mode == SImode);
28464 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28465 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28466 emit_move_insn (target, gen_lowpart (mode, tmp));
28469 gcc_unreachable ();
28473 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28474 instructions unless MMX_OK is true. */
28477 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28479 enum machine_mode mode = GET_MODE (target);
28480 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28481 int n_elts = GET_MODE_NUNITS (mode);
28482 int n_var = 0, one_var = -1;
28483 bool all_same = true, all_const_zero = true;
28487 for (i = 0; i < n_elts; ++i)
28489 x = XVECEXP (vals, 0, i);
28490 if (!(CONST_INT_P (x)
28491 || GET_CODE (x) == CONST_DOUBLE
28492 || GET_CODE (x) == CONST_FIXED))
28493 n_var++, one_var = i;
28494 else if (x != CONST0_RTX (inner_mode))
28495 all_const_zero = false;
28496 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28500 /* Constants are best loaded from the constant pool. */
28503 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28507 /* If all values are identical, broadcast the value. */
28509 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28510 XVECEXP (vals, 0, 0)))
28513 /* Values where only one field is non-constant are best loaded from
28514 the pool and overwritten via move later. */
28518 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28519 XVECEXP (vals, 0, one_var),
28523 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28527 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28531 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28533 enum machine_mode mode = GET_MODE (target);
28534 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28535 enum machine_mode half_mode;
28536 bool use_vec_merge = false;
28538 static rtx (*gen_extract[6][2]) (rtx, rtx)
28540 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28541 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28542 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28543 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28544 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28545 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28547 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28549 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28550 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28551 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28552 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28553 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28554 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28564 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28565 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28567 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28569 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28570 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28576 use_vec_merge = TARGET_SSE4_1;
28584 /* For the two element vectors, we implement a VEC_CONCAT with
28585 the extraction of the other element. */
28587 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28588 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28591 op0 = val, op1 = tmp;
28593 op0 = tmp, op1 = val;
28595 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28596 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28601 use_vec_merge = TARGET_SSE4_1;
28608 use_vec_merge = true;
28612 /* tmp = target = A B C D */
28613 tmp = copy_to_reg (target);
28614 /* target = A A B B */
28615 emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
28616 /* target = X A B B */
28617 ix86_expand_vector_set (false, target, val, 0);
28618 /* target = A X C D */
28619 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28620 const1_rtx, const0_rtx,
28621 GEN_INT (2+4), GEN_INT (3+4)));
28625 /* tmp = target = A B C D */
28626 tmp = copy_to_reg (target);
28627 /* tmp = X B C D */
28628 ix86_expand_vector_set (false, tmp, val, 0);
28629 /* target = A B X D */
28630 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28631 const0_rtx, const1_rtx,
28632 GEN_INT (0+4), GEN_INT (3+4)));
28636 /* tmp = target = A B C D */
28637 tmp = copy_to_reg (target);
28638 /* tmp = X B C D */
28639 ix86_expand_vector_set (false, tmp, val, 0);
28640 /* target = A B X D */
28641 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28642 const0_rtx, const1_rtx,
28643 GEN_INT (2+4), GEN_INT (0+4)));
28647 gcc_unreachable ();
28652 use_vec_merge = TARGET_SSE4_1;
28656 /* Element 0 handled by vec_merge below. */
28659 use_vec_merge = true;
28665 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28666 store into element 0, then shuffle them back. */
28670 order[0] = GEN_INT (elt);
28671 order[1] = const1_rtx;
28672 order[2] = const2_rtx;
28673 order[3] = GEN_INT (3);
28674 order[elt] = const0_rtx;
28676 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28677 order[1], order[2], order[3]));
28679 ix86_expand_vector_set (false, target, val, 0);
28681 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28682 order[1], order[2], order[3]));
28686 /* For SSE1, we have to reuse the V4SF code. */
28687 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28688 gen_lowpart (SFmode, val), elt);
28693 use_vec_merge = TARGET_SSE2;
28696 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28700 use_vec_merge = TARGET_SSE4_1;
28707 half_mode = V16QImode;
28713 half_mode = V8HImode;
28719 half_mode = V4SImode;
28725 half_mode = V2DImode;
28731 half_mode = V4SFmode;
28737 half_mode = V2DFmode;
28743 /* Compute offset. */
28747 gcc_assert (i <= 1);
28749 /* Extract the half. */
28750 tmp = gen_reg_rtx (half_mode);
28751 emit_insn (gen_extract[j][i] (tmp, target));
28753 /* Put val in tmp at elt. */
28754 ix86_expand_vector_set (false, tmp, val, elt);
28757 emit_insn (gen_insert[j][i] (target, target, tmp));
28766 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28767 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28768 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28772 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28774 emit_move_insn (mem, target);
28776 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28777 emit_move_insn (tmp, val);
28779 emit_move_insn (target, mem);
28784 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28786 enum machine_mode mode = GET_MODE (vec);
28787 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28788 bool use_vec_extr = false;
28801 use_vec_extr = true;
28805 use_vec_extr = TARGET_SSE4_1;
28817 tmp = gen_reg_rtx (mode);
28818 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28819 GEN_INT (elt), GEN_INT (elt),
28820 GEN_INT (elt+4), GEN_INT (elt+4)));
28824 tmp = gen_reg_rtx (mode);
28825 emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
28829 gcc_unreachable ();
28832 use_vec_extr = true;
28837 use_vec_extr = TARGET_SSE4_1;
28851 tmp = gen_reg_rtx (mode);
28852 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28853 GEN_INT (elt), GEN_INT (elt),
28854 GEN_INT (elt), GEN_INT (elt)));
28858 tmp = gen_reg_rtx (mode);
28859 emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
28863 gcc_unreachable ();
28866 use_vec_extr = true;
28871 /* For SSE1, we have to reuse the V4SF code. */
28872 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28873 gen_lowpart (V4SFmode, vec), elt);
28879 use_vec_extr = TARGET_SSE2;
28882 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28886 use_vec_extr = TARGET_SSE4_1;
28890 /* ??? Could extract the appropriate HImode element and shift. */
28897 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28898 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28900 /* Let the rtl optimizers know about the zero extension performed. */
28901 if (inner_mode == QImode || inner_mode == HImode)
28903 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28904 target = gen_lowpart (SImode, target);
28907 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28911 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28913 emit_move_insn (mem, vec);
28915 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28916 emit_move_insn (target, tmp);
28920 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28921 pattern to reduce; DEST is the destination; IN is the input vector. */
28924 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28926 rtx tmp1, tmp2, tmp3;
28928 tmp1 = gen_reg_rtx (V4SFmode);
28929 tmp2 = gen_reg_rtx (V4SFmode);
28930 tmp3 = gen_reg_rtx (V4SFmode);
28932 emit_insn (gen_sse_movhlps (tmp1, in, in));
28933 emit_insn (fn (tmp2, tmp1, in));
28935 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28936 const1_rtx, const1_rtx,
28937 GEN_INT (1+4), GEN_INT (1+4)));
28938 emit_insn (fn (dest, tmp2, tmp3));
28941 /* Target hook for scalar_mode_supported_p. */
28943 ix86_scalar_mode_supported_p (enum machine_mode mode)
28945 if (DECIMAL_FLOAT_MODE_P (mode))
28946 return default_decimal_float_supported_p ();
28947 else if (mode == TFmode)
28950 return default_scalar_mode_supported_p (mode);
28953 /* Implements target hook vector_mode_supported_p. */
28955 ix86_vector_mode_supported_p (enum machine_mode mode)
28957 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28959 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28961 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28963 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28965 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28970 /* Target hook for c_mode_for_suffix. */
28971 static enum machine_mode
28972 ix86_c_mode_for_suffix (char suffix)
28982 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28984 We do this in the new i386 backend to maintain source compatibility
28985 with the old cc0-based compiler. */
28988 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28989 tree inputs ATTRIBUTE_UNUSED,
28992 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28994 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28999 /* Implements target vector targetm.asm.encode_section_info. This
29000 is not used by netware. */
29002 static void ATTRIBUTE_UNUSED
29003 ix86_encode_section_info (tree decl, rtx rtl, int first)
29005 default_encode_section_info (decl, rtl, first);
29007 if (TREE_CODE (decl) == VAR_DECL
29008 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
29009 && ix86_in_large_data_p (decl))
29010 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
29013 /* Worker function for REVERSE_CONDITION. */
29016 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
29018 return (mode != CCFPmode && mode != CCFPUmode
29019 ? reverse_condition (code)
29020 : reverse_condition_maybe_unordered (code));
29023 /* Output code to perform an x87 FP register move, from OPERANDS[1]
29027 output_387_reg_move (rtx insn, rtx *operands)
29029 if (REG_P (operands[0]))
29031 if (REG_P (operands[1])
29032 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29034 if (REGNO (operands[0]) == FIRST_STACK_REG)
29035 return output_387_ffreep (operands, 0);
29036 return "fstp\t%y0";
29038 if (STACK_TOP_P (operands[0]))
29039 return "fld%Z1\t%y1";
29042 else if (MEM_P (operands[0]))
29044 gcc_assert (REG_P (operands[1]));
29045 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
29046 return "fstp%Z0\t%y0";
29049 /* There is no non-popping store to memory for XFmode.
29050 So if we need one, follow the store with a load. */
29051 if (GET_MODE (operands[0]) == XFmode)
29052 return "fstp%Z0\t%y0\n\tfld%Z0\t%y0";
29054 return "fst%Z0\t%y0";
29061 /* Output code to perform a conditional jump to LABEL, if C2 flag in
29062 FP status register is set. */
29065 ix86_emit_fp_unordered_jump (rtx label)
29067 rtx reg = gen_reg_rtx (HImode);
29070 emit_insn (gen_x86_fnstsw_1 (reg));
29072 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
29074 emit_insn (gen_x86_sahf_1 (reg));
29076 temp = gen_rtx_REG (CCmode, FLAGS_REG);
29077 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
29081 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
29083 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
29084 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
29087 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
29088 gen_rtx_LABEL_REF (VOIDmode, label),
29090 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
29092 emit_jump_insn (temp);
29093 predict_jump (REG_BR_PROB_BASE * 10 / 100);
29096 /* Output code to perform a log1p XFmode calculation. */
29098 void ix86_emit_i387_log1p (rtx op0, rtx op1)
29100 rtx label1 = gen_label_rtx ();
29101 rtx label2 = gen_label_rtx ();
29103 rtx tmp = gen_reg_rtx (XFmode);
29104 rtx tmp2 = gen_reg_rtx (XFmode);
29107 emit_insn (gen_absxf2 (tmp, op1));
29108 test = gen_rtx_GE (VOIDmode, tmp,
29109 CONST_DOUBLE_FROM_REAL_VALUE (
29110 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
29112 emit_jump_insn (gen_cbranchxf4 (test, XEXP (test, 0), XEXP (test, 1), label1));
29114 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29115 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
29116 emit_jump (label2);
29118 emit_label (label1);
29119 emit_move_insn (tmp, CONST1_RTX (XFmode));
29120 emit_insn (gen_addxf3 (tmp, op1, tmp));
29121 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
29122 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
29124 emit_label (label2);
29127 /* Output code to perform a Newton-Rhapson approximation of a single precision
29128 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
29130 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
29132 rtx x0, x1, e0, e1, two;
29134 x0 = gen_reg_rtx (mode);
29135 e0 = gen_reg_rtx (mode);
29136 e1 = gen_reg_rtx (mode);
29137 x1 = gen_reg_rtx (mode);
29139 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
29141 if (VECTOR_MODE_P (mode))
29142 two = ix86_build_const_vector (SFmode, true, two);
29144 two = force_reg (mode, two);
29146 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
29148 /* x0 = rcp(b) estimate */
29149 emit_insn (gen_rtx_SET (VOIDmode, x0,
29150 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
29153 emit_insn (gen_rtx_SET (VOIDmode, e0,
29154 gen_rtx_MULT (mode, x0, a)));
29156 emit_insn (gen_rtx_SET (VOIDmode, e1,
29157 gen_rtx_MULT (mode, x0, b)));
29159 emit_insn (gen_rtx_SET (VOIDmode, x1,
29160 gen_rtx_MINUS (mode, two, e1)));
29161 /* res = e0 * x1 */
29162 emit_insn (gen_rtx_SET (VOIDmode, res,
29163 gen_rtx_MULT (mode, e0, x1)));
29166 /* Output code to perform a Newton-Rhapson approximation of a
29167 single precision floating point [reciprocal] square root. */
29169 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
29172 rtx x0, e0, e1, e2, e3, mthree, mhalf;
29175 x0 = gen_reg_rtx (mode);
29176 e0 = gen_reg_rtx (mode);
29177 e1 = gen_reg_rtx (mode);
29178 e2 = gen_reg_rtx (mode);
29179 e3 = gen_reg_rtx (mode);
29181 real_from_integer (&r, VOIDmode, -3, -1, 0);
29182 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29184 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
29185 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
29187 if (VECTOR_MODE_P (mode))
29189 mthree = ix86_build_const_vector (SFmode, true, mthree);
29190 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
29193 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
29194 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
29196 /* x0 = rsqrt(a) estimate */
29197 emit_insn (gen_rtx_SET (VOIDmode, x0,
29198 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
29201 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
29206 zero = gen_reg_rtx (mode);
29207 mask = gen_reg_rtx (mode);
29209 zero = force_reg (mode, CONST0_RTX(mode));
29210 emit_insn (gen_rtx_SET (VOIDmode, mask,
29211 gen_rtx_NE (mode, zero, a)));
29213 emit_insn (gen_rtx_SET (VOIDmode, x0,
29214 gen_rtx_AND (mode, x0, mask)));
29218 emit_insn (gen_rtx_SET (VOIDmode, e0,
29219 gen_rtx_MULT (mode, x0, a)));
29221 emit_insn (gen_rtx_SET (VOIDmode, e1,
29222 gen_rtx_MULT (mode, e0, x0)));
29225 mthree = force_reg (mode, mthree);
29226 emit_insn (gen_rtx_SET (VOIDmode, e2,
29227 gen_rtx_PLUS (mode, e1, mthree)));
29229 mhalf = force_reg (mode, mhalf);
29231 /* e3 = -.5 * x0 */
29232 emit_insn (gen_rtx_SET (VOIDmode, e3,
29233 gen_rtx_MULT (mode, x0, mhalf)));
29235 /* e3 = -.5 * e0 */
29236 emit_insn (gen_rtx_SET (VOIDmode, e3,
29237 gen_rtx_MULT (mode, e0, mhalf)));
29238 /* ret = e2 * e3 */
29239 emit_insn (gen_rtx_SET (VOIDmode, res,
29240 gen_rtx_MULT (mode, e2, e3)));
29243 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
29245 static void ATTRIBUTE_UNUSED
29246 i386_solaris_elf_named_section (const char *name, unsigned int flags,
29249 /* With Binutils 2.15, the "@unwind" marker must be specified on
29250 every occurrence of the ".eh_frame" section, not just the first
29253 && strcmp (name, ".eh_frame") == 0)
29255 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
29256 flags & SECTION_WRITE ? "aw" : "a");
29259 default_elf_asm_named_section (name, flags, decl);
29262 /* Return the mangling of TYPE if it is an extended fundamental type. */
29264 static const char *
29265 ix86_mangle_type (const_tree type)
29267 type = TYPE_MAIN_VARIANT (type);
29269 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29270 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29273 switch (TYPE_MODE (type))
29276 /* __float128 is "g". */
29279 /* "long double" or __float80 is "e". */
29286 /* For 32-bit code we can save PIC register setup by using
29287 __stack_chk_fail_local hidden function instead of calling
29288 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
29289 register, so it is better to call __stack_chk_fail directly. */
29292 ix86_stack_protect_fail (void)
29294 return TARGET_64BIT
29295 ? default_external_stack_protect_fail ()
29296 : default_hidden_stack_protect_fail ();
29299 /* Select a format to encode pointers in exception handling data. CODE
29300 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
29301 true if the symbol may be affected by dynamic relocations.
29303 ??? All x86 object file formats are capable of representing this.
29304 After all, the relocation needed is the same as for the call insn.
29305 Whether or not a particular assembler allows us to enter such, I
29306 guess we'll have to see. */
29308 asm_preferred_eh_data_format (int code, int global)
29312 int type = DW_EH_PE_sdata8;
29314 || ix86_cmodel == CM_SMALL_PIC
29315 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
29316 type = DW_EH_PE_sdata4;
29317 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
29319 if (ix86_cmodel == CM_SMALL
29320 || (ix86_cmodel == CM_MEDIUM && code))
29321 return DW_EH_PE_udata4;
29322 return DW_EH_PE_absptr;
29325 /* Expand copysign from SIGN to the positive value ABS_VALUE
29326 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
29329 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
29331 enum machine_mode mode = GET_MODE (sign);
29332 rtx sgn = gen_reg_rtx (mode);
29333 if (mask == NULL_RTX)
29335 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
29336 if (!VECTOR_MODE_P (mode))
29338 /* We need to generate a scalar mode mask in this case. */
29339 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29340 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29341 mask = gen_reg_rtx (mode);
29342 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29346 mask = gen_rtx_NOT (mode, mask);
29347 emit_insn (gen_rtx_SET (VOIDmode, sgn,
29348 gen_rtx_AND (mode, mask, sign)));
29349 emit_insn (gen_rtx_SET (VOIDmode, result,
29350 gen_rtx_IOR (mode, abs_value, sgn)));
29353 /* Expand fabs (OP0) and return a new rtx that holds the result. The
29354 mask for masking out the sign-bit is stored in *SMASK, if that is
29357 ix86_expand_sse_fabs (rtx op0, rtx *smask)
29359 enum machine_mode mode = GET_MODE (op0);
29362 xa = gen_reg_rtx (mode);
29363 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
29364 if (!VECTOR_MODE_P (mode))
29366 /* We need to generate a scalar mode mask in this case. */
29367 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
29368 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
29369 mask = gen_reg_rtx (mode);
29370 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
29372 emit_insn (gen_rtx_SET (VOIDmode, xa,
29373 gen_rtx_AND (mode, op0, mask)));
29381 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
29382 swapping the operands if SWAP_OPERANDS is true. The expanded
29383 code is a forward jump to a newly created label in case the
29384 comparison is true. The generated label rtx is returned. */
29386 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
29387 bool swap_operands)
29398 label = gen_label_rtx ();
29399 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
29400 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29401 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
29402 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
29403 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
29404 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
29405 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
29406 JUMP_LABEL (tmp) = label;
29411 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
29412 using comparison code CODE. Operands are swapped for the comparison if
29413 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
29415 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
29416 bool swap_operands)
29418 enum machine_mode mode = GET_MODE (op0);
29419 rtx mask = gen_reg_rtx (mode);
29428 if (mode == DFmode)
29429 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
29430 gen_rtx_fmt_ee (code, mode, op0, op1)));
29432 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
29433 gen_rtx_fmt_ee (code, mode, op0, op1)));
29438 /* Generate and return a rtx of mode MODE for 2**n where n is the number
29439 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
29441 ix86_gen_TWO52 (enum machine_mode mode)
29443 REAL_VALUE_TYPE TWO52r;
29446 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
29447 TWO52 = const_double_from_real_value (TWO52r, mode);
29448 TWO52 = force_reg (mode, TWO52);
29453 /* Expand SSE sequence for computing lround from OP1 storing
29456 ix86_expand_lround (rtx op0, rtx op1)
29458 /* C code for the stuff we're doing below:
29459 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29462 enum machine_mode mode = GET_MODE (op1);
29463 const struct real_format *fmt;
29464 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29467 /* load nextafter (0.5, 0.0) */
29468 fmt = REAL_MODE_FORMAT (mode);
29469 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29470 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29472 /* adj = copysign (0.5, op1) */
29473 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29474 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29476 /* adj = op1 + adj */
29477 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29479 /* op0 = (imode)adj */
29480 expand_fix (op0, adj, 0);
29483 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29486 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29488 /* C code for the stuff we're doing below (for do_floor):
29490 xi -= (double)xi > op1 ? 1 : 0;
29493 enum machine_mode fmode = GET_MODE (op1);
29494 enum machine_mode imode = GET_MODE (op0);
29495 rtx ireg, freg, label, tmp;
29497 /* reg = (long)op1 */
29498 ireg = gen_reg_rtx (imode);
29499 expand_fix (ireg, op1, 0);
29501 /* freg = (double)reg */
29502 freg = gen_reg_rtx (fmode);
29503 expand_float (freg, ireg, 0);
29505 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29506 label = ix86_expand_sse_compare_and_jump (UNLE,
29507 freg, op1, !do_floor);
29508 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29509 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29510 emit_move_insn (ireg, tmp);
29512 emit_label (label);
29513 LABEL_NUSES (label) = 1;
29515 emit_move_insn (op0, ireg);
29518 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29519 result in OPERAND0. */
29521 ix86_expand_rint (rtx operand0, rtx operand1)
29523 /* C code for the stuff we're doing below:
29524 xa = fabs (operand1);
29525 if (!isless (xa, 2**52))
29527 xa = xa + 2**52 - 2**52;
29528 return copysign (xa, operand1);
29530 enum machine_mode mode = GET_MODE (operand0);
29531 rtx res, xa, label, TWO52, mask;
29533 res = gen_reg_rtx (mode);
29534 emit_move_insn (res, operand1);
29536 /* xa = abs (operand1) */
29537 xa = ix86_expand_sse_fabs (res, &mask);
29539 /* if (!isless (xa, TWO52)) goto label; */
29540 TWO52 = ix86_gen_TWO52 (mode);
29541 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29543 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29544 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29546 ix86_sse_copysign_to_positive (res, xa, res, mask);
29548 emit_label (label);
29549 LABEL_NUSES (label) = 1;
29551 emit_move_insn (operand0, res);
29554 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29557 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29559 /* C code for the stuff we expand below.
29560 double xa = fabs (x), x2;
29561 if (!isless (xa, TWO52))
29563 xa = xa + TWO52 - TWO52;
29564 x2 = copysign (xa, x);
29573 enum machine_mode mode = GET_MODE (operand0);
29574 rtx xa, TWO52, tmp, label, one, res, mask;
29576 TWO52 = ix86_gen_TWO52 (mode);
29578 /* Temporary for holding the result, initialized to the input
29579 operand to ease control flow. */
29580 res = gen_reg_rtx (mode);
29581 emit_move_insn (res, operand1);
29583 /* xa = abs (operand1) */
29584 xa = ix86_expand_sse_fabs (res, &mask);
29586 /* if (!isless (xa, TWO52)) goto label; */
29587 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29589 /* xa = xa + TWO52 - TWO52; */
29590 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29591 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29593 /* xa = copysign (xa, operand1) */
29594 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29596 /* generate 1.0 or -1.0 */
29597 one = force_reg (mode,
29598 const_double_from_real_value (do_floor
29599 ? dconst1 : dconstm1, mode));
29601 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29602 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29603 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29604 gen_rtx_AND (mode, one, tmp)));
29605 /* We always need to subtract here to preserve signed zero. */
29606 tmp = expand_simple_binop (mode, MINUS,
29607 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29608 emit_move_insn (res, tmp);
29610 emit_label (label);
29611 LABEL_NUSES (label) = 1;
29613 emit_move_insn (operand0, res);
29616 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29619 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29621 /* C code for the stuff we expand below.
29622 double xa = fabs (x), x2;
29623 if (!isless (xa, TWO52))
29625 x2 = (double)(long)x;
29632 if (HONOR_SIGNED_ZEROS (mode))
29633 return copysign (x2, x);
29636 enum machine_mode mode = GET_MODE (operand0);
29637 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29639 TWO52 = ix86_gen_TWO52 (mode);
29641 /* Temporary for holding the result, initialized to the input
29642 operand to ease control flow. */
29643 res = gen_reg_rtx (mode);
29644 emit_move_insn (res, operand1);
29646 /* xa = abs (operand1) */
29647 xa = ix86_expand_sse_fabs (res, &mask);
29649 /* if (!isless (xa, TWO52)) goto label; */
29650 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29652 /* xa = (double)(long)x */
29653 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29654 expand_fix (xi, res, 0);
29655 expand_float (xa, xi, 0);
29658 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29660 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29661 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29662 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29663 gen_rtx_AND (mode, one, tmp)));
29664 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29665 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29666 emit_move_insn (res, tmp);
29668 if (HONOR_SIGNED_ZEROS (mode))
29669 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29671 emit_label (label);
29672 LABEL_NUSES (label) = 1;
29674 emit_move_insn (operand0, res);
29677 /* Expand SSE sequence for computing round from OPERAND1 storing
29678 into OPERAND0. Sequence that works without relying on DImode truncation
29679 via cvttsd2siq that is only available on 64bit targets. */
29681 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29683 /* C code for the stuff we expand below.
29684 double xa = fabs (x), xa2, x2;
29685 if (!isless (xa, TWO52))
29687 Using the absolute value and copying back sign makes
29688 -0.0 -> -0.0 correct.
29689 xa2 = xa + TWO52 - TWO52;
29694 else if (dxa > 0.5)
29696 x2 = copysign (xa2, x);
29699 enum machine_mode mode = GET_MODE (operand0);
29700 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29702 TWO52 = ix86_gen_TWO52 (mode);
29704 /* Temporary for holding the result, initialized to the input
29705 operand to ease control flow. */
29706 res = gen_reg_rtx (mode);
29707 emit_move_insn (res, operand1);
29709 /* xa = abs (operand1) */
29710 xa = ix86_expand_sse_fabs (res, &mask);
29712 /* if (!isless (xa, TWO52)) goto label; */
29713 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29715 /* xa2 = xa + TWO52 - TWO52; */
29716 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29717 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29719 /* dxa = xa2 - xa; */
29720 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29722 /* generate 0.5, 1.0 and -0.5 */
29723 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29724 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29725 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29729 tmp = gen_reg_rtx (mode);
29730 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29731 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29732 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29733 gen_rtx_AND (mode, one, tmp)));
29734 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29735 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29736 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29737 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29738 gen_rtx_AND (mode, one, tmp)));
29739 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29741 /* res = copysign (xa2, operand1) */
29742 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29744 emit_label (label);
29745 LABEL_NUSES (label) = 1;
29747 emit_move_insn (operand0, res);
29750 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29753 ix86_expand_trunc (rtx operand0, rtx operand1)
29755 /* C code for SSE variant we expand below.
29756 double xa = fabs (x), x2;
29757 if (!isless (xa, TWO52))
29759 x2 = (double)(long)x;
29760 if (HONOR_SIGNED_ZEROS (mode))
29761 return copysign (x2, x);
29764 enum machine_mode mode = GET_MODE (operand0);
29765 rtx xa, xi, TWO52, label, res, mask;
29767 TWO52 = ix86_gen_TWO52 (mode);
29769 /* Temporary for holding the result, initialized to the input
29770 operand to ease control flow. */
29771 res = gen_reg_rtx (mode);
29772 emit_move_insn (res, operand1);
29774 /* xa = abs (operand1) */
29775 xa = ix86_expand_sse_fabs (res, &mask);
29777 /* if (!isless (xa, TWO52)) goto label; */
29778 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29780 /* x = (double)(long)x */
29781 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29782 expand_fix (xi, res, 0);
29783 expand_float (res, xi, 0);
29785 if (HONOR_SIGNED_ZEROS (mode))
29786 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29788 emit_label (label);
29789 LABEL_NUSES (label) = 1;
29791 emit_move_insn (operand0, res);
29794 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29797 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29799 enum machine_mode mode = GET_MODE (operand0);
29800 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29802 /* C code for SSE variant we expand below.
29803 double xa = fabs (x), x2;
29804 if (!isless (xa, TWO52))
29806 xa2 = xa + TWO52 - TWO52;
29810 x2 = copysign (xa2, x);
29814 TWO52 = ix86_gen_TWO52 (mode);
29816 /* Temporary for holding the result, initialized to the input
29817 operand to ease control flow. */
29818 res = gen_reg_rtx (mode);
29819 emit_move_insn (res, operand1);
29821 /* xa = abs (operand1) */
29822 xa = ix86_expand_sse_fabs (res, &smask);
29824 /* if (!isless (xa, TWO52)) goto label; */
29825 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29827 /* res = xa + TWO52 - TWO52; */
29828 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29829 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29830 emit_move_insn (res, tmp);
29833 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29835 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29836 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29837 emit_insn (gen_rtx_SET (VOIDmode, mask,
29838 gen_rtx_AND (mode, mask, one)));
29839 tmp = expand_simple_binop (mode, MINUS,
29840 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29841 emit_move_insn (res, tmp);
29843 /* res = copysign (res, operand1) */
29844 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29846 emit_label (label);
29847 LABEL_NUSES (label) = 1;
29849 emit_move_insn (operand0, res);
29852 /* Expand SSE sequence for computing round from OPERAND1 storing
29855 ix86_expand_round (rtx operand0, rtx operand1)
29857 /* C code for the stuff we're doing below:
29858 double xa = fabs (x);
29859 if (!isless (xa, TWO52))
29861 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29862 return copysign (xa, x);
29864 enum machine_mode mode = GET_MODE (operand0);
29865 rtx res, TWO52, xa, label, xi, half, mask;
29866 const struct real_format *fmt;
29867 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29869 /* Temporary for holding the result, initialized to the input
29870 operand to ease control flow. */
29871 res = gen_reg_rtx (mode);
29872 emit_move_insn (res, operand1);
29874 TWO52 = ix86_gen_TWO52 (mode);
29875 xa = ix86_expand_sse_fabs (res, &mask);
29876 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29878 /* load nextafter (0.5, 0.0) */
29879 fmt = REAL_MODE_FORMAT (mode);
29880 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29881 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29883 /* xa = xa + 0.5 */
29884 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29885 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29887 /* xa = (double)(int64_t)xa */
29888 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29889 expand_fix (xi, xa, 0);
29890 expand_float (xa, xi, 0);
29892 /* res = copysign (xa, operand1) */
29893 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29895 emit_label (label);
29896 LABEL_NUSES (label) = 1;
29898 emit_move_insn (operand0, res);
29902 /* Table of valid machine attributes. */
29903 static const struct attribute_spec ix86_attribute_table[] =
29905 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29906 /* Stdcall attribute says callee is responsible for popping arguments
29907 if they are not variable. */
29908 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29909 /* Fastcall attribute says callee is responsible for popping arguments
29910 if they are not variable. */
29911 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29912 /* Thiscall attribute says callee is responsible for popping arguments
29913 if they are not variable. */
29914 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29915 /* Cdecl attribute says the callee is a normal C declaration */
29916 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29917 /* Regparm attribute specifies how many integer arguments are to be
29918 passed in registers. */
29919 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29920 /* Sseregparm attribute says we are using x86_64 calling conventions
29921 for FP arguments. */
29922 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29923 /* force_align_arg_pointer says this function realigns the stack at entry. */
29924 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29925 false, true, true, ix86_handle_cconv_attribute },
29926 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29927 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29928 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29929 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29931 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29932 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29933 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29934 SUBTARGET_ATTRIBUTE_TABLE,
29936 /* ms_abi and sysv_abi calling convention function attributes. */
29937 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29938 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29939 { "ms_hook_prologue", 0, 0, true, false, false, ix86_handle_fndecl_attribute },
29941 { NULL, 0, 0, false, false, false, NULL }
29944 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29946 ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
29947 tree vectype ATTRIBUTE_UNUSED,
29948 int misalign ATTRIBUTE_UNUSED)
29950 switch (type_of_cost)
29953 return ix86_cost->scalar_stmt_cost;
29956 return ix86_cost->scalar_load_cost;
29959 return ix86_cost->scalar_store_cost;
29962 return ix86_cost->vec_stmt_cost;
29965 return ix86_cost->vec_align_load_cost;
29968 return ix86_cost->vec_store_cost;
29970 case vec_to_scalar:
29971 return ix86_cost->vec_to_scalar_cost;
29973 case scalar_to_vec:
29974 return ix86_cost->scalar_to_vec_cost;
29976 case unaligned_load:
29977 case unaligned_store:
29978 return ix86_cost->vec_unalign_load_cost;
29980 case cond_branch_taken:
29981 return ix86_cost->cond_taken_branch_cost;
29983 case cond_branch_not_taken:
29984 return ix86_cost->cond_not_taken_branch_cost;
29990 gcc_unreachable ();
29995 /* Implement targetm.vectorize.builtin_vec_perm. */
29998 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type)
30000 tree itype = TREE_TYPE (vec_type);
30001 bool u = TYPE_UNSIGNED (itype);
30002 enum machine_mode vmode = TYPE_MODE (vec_type);
30003 enum ix86_builtins fcode;
30004 bool ok = TARGET_SSE2;
30010 fcode = IX86_BUILTIN_VEC_PERM_V4DF;
30013 fcode = IX86_BUILTIN_VEC_PERM_V2DF;
30015 itype = ix86_get_builtin_type (IX86_BT_DI);
30020 fcode = IX86_BUILTIN_VEC_PERM_V8SF;
30024 fcode = IX86_BUILTIN_VEC_PERM_V4SF;
30026 itype = ix86_get_builtin_type (IX86_BT_SI);
30030 fcode = u ? IX86_BUILTIN_VEC_PERM_V2DI_U : IX86_BUILTIN_VEC_PERM_V2DI;
30033 fcode = u ? IX86_BUILTIN_VEC_PERM_V4SI_U : IX86_BUILTIN_VEC_PERM_V4SI;
30036 fcode = u ? IX86_BUILTIN_VEC_PERM_V8HI_U : IX86_BUILTIN_VEC_PERM_V8HI;
30039 fcode = u ? IX86_BUILTIN_VEC_PERM_V16QI_U : IX86_BUILTIN_VEC_PERM_V16QI;
30049 *mask_type = itype;
30050 return ix86_builtins[(int) fcode];
30053 /* Return a vector mode with twice as many elements as VMODE. */
30054 /* ??? Consider moving this to a table generated by genmodes.c. */
30056 static enum machine_mode
30057 doublesize_vector_mode (enum machine_mode vmode)
30061 case V2SFmode: return V4SFmode;
30062 case V1DImode: return V2DImode;
30063 case V2SImode: return V4SImode;
30064 case V4HImode: return V8HImode;
30065 case V8QImode: return V16QImode;
30067 case V2DFmode: return V4DFmode;
30068 case V4SFmode: return V8SFmode;
30069 case V2DImode: return V4DImode;
30070 case V4SImode: return V8SImode;
30071 case V8HImode: return V16HImode;
30072 case V16QImode: return V32QImode;
30074 case V4DFmode: return V8DFmode;
30075 case V8SFmode: return V16SFmode;
30076 case V4DImode: return V8DImode;
30077 case V8SImode: return V16SImode;
30078 case V16HImode: return V32HImode;
30079 case V32QImode: return V64QImode;
30082 gcc_unreachable ();
30086 /* Construct (set target (vec_select op0 (parallel perm))) and
30087 return true if that's a valid instruction in the active ISA. */
30090 expand_vselect (rtx target, rtx op0, const unsigned char *perm, unsigned nelt)
30092 rtx rperm[MAX_VECT_LEN], x;
30095 for (i = 0; i < nelt; ++i)
30096 rperm[i] = GEN_INT (perm[i]);
30098 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, rperm));
30099 x = gen_rtx_VEC_SELECT (GET_MODE (target), op0, x);
30100 x = gen_rtx_SET (VOIDmode, target, x);
30103 if (recog_memoized (x) < 0)
30111 /* Similar, but generate a vec_concat from op0 and op1 as well. */
30114 expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
30115 const unsigned char *perm, unsigned nelt)
30117 enum machine_mode v2mode;
30120 v2mode = doublesize_vector_mode (GET_MODE (op0));
30121 x = gen_rtx_VEC_CONCAT (v2mode, op0, op1);
30122 return expand_vselect (target, x, perm, nelt);
30125 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30126 in terms of blendp[sd] / pblendw / pblendvb. */
30129 expand_vec_perm_blend (struct expand_vec_perm_d *d)
30131 enum machine_mode vmode = d->vmode;
30132 unsigned i, mask, nelt = d->nelt;
30133 rtx target, op0, op1, x;
30135 if (!TARGET_SSE4_1 || d->op0 == d->op1)
30137 if (!(GET_MODE_SIZE (vmode) == 16 || vmode == V4DFmode || vmode == V8SFmode))
30140 /* This is a blend, not a permute. Elements must stay in their
30141 respective lanes. */
30142 for (i = 0; i < nelt; ++i)
30144 unsigned e = d->perm[i];
30145 if (!(e == i || e == i + nelt))
30152 /* ??? Without SSE4.1, we could implement this with and/andn/or. This
30153 decision should be extracted elsewhere, so that we only try that
30154 sequence once all budget==3 options have been tried. */
30156 /* For bytes, see if bytes move in pairs so we can use pblendw with
30157 an immediate argument, rather than pblendvb with a vector argument. */
30158 if (vmode == V16QImode)
30160 bool pblendw_ok = true;
30161 for (i = 0; i < 16 && pblendw_ok; i += 2)
30162 pblendw_ok = (d->perm[i] + 1 == d->perm[i + 1]);
30166 rtx rperm[16], vperm;
30168 for (i = 0; i < nelt; ++i)
30169 rperm[i] = (d->perm[i] < nelt ? const0_rtx : constm1_rtx);
30171 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30172 vperm = force_reg (V16QImode, vperm);
30174 emit_insn (gen_sse4_1_pblendvb (d->target, d->op0, d->op1, vperm));
30179 target = d->target;
30191 for (i = 0; i < nelt; ++i)
30192 mask |= (d->perm[i] >= nelt) << i;
30196 for (i = 0; i < 2; ++i)
30197 mask |= (d->perm[i] >= 2 ? 15 : 0) << (i * 4);
30201 for (i = 0; i < 4; ++i)
30202 mask |= (d->perm[i] >= 4 ? 3 : 0) << (i * 2);
30206 for (i = 0; i < 8; ++i)
30207 mask |= (d->perm[i * 2] >= 16) << i;
30211 target = gen_lowpart (vmode, target);
30212 op0 = gen_lowpart (vmode, op0);
30213 op1 = gen_lowpart (vmode, op1);
30217 gcc_unreachable ();
30220 /* This matches five different patterns with the different modes. */
30221 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask));
30222 x = gen_rtx_SET (VOIDmode, target, x);
30228 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30229 in terms of the variable form of vpermilps.
30231 Note that we will have already failed the immediate input vpermilps,
30232 which requires that the high and low part shuffle be identical; the
30233 variable form doesn't require that. */
30236 expand_vec_perm_vpermil (struct expand_vec_perm_d *d)
30238 rtx rperm[8], vperm;
30241 if (!TARGET_AVX || d->vmode != V8SFmode || d->op0 != d->op1)
30244 /* We can only permute within the 128-bit lane. */
30245 for (i = 0; i < 8; ++i)
30247 unsigned e = d->perm[i];
30248 if (i < 4 ? e >= 4 : e < 4)
30255 for (i = 0; i < 8; ++i)
30257 unsigned e = d->perm[i];
30259 /* Within each 128-bit lane, the elements of op0 are numbered
30260 from 0 and the elements of op1 are numbered from 4. */
30266 rperm[i] = GEN_INT (e);
30269 vperm = gen_rtx_CONST_VECTOR (V8SImode, gen_rtvec_v (8, rperm));
30270 vperm = force_reg (V8SImode, vperm);
30271 emit_insn (gen_avx_vpermilvarv8sf3 (d->target, d->op0, vperm));
30276 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30277 in terms of pshufb or vpperm. */
30280 expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
30282 unsigned i, nelt, eltsz;
30283 rtx rperm[16], vperm, target, op0, op1;
30285 if (!(d->op0 == d->op1 ? TARGET_SSSE3 : TARGET_XOP))
30287 if (GET_MODE_SIZE (d->vmode) != 16)
30294 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30296 for (i = 0; i < nelt; ++i)
30298 unsigned j, e = d->perm[i];
30299 for (j = 0; j < eltsz; ++j)
30300 rperm[i * eltsz + j] = GEN_INT (e * eltsz + j);
30303 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm));
30304 vperm = force_reg (V16QImode, vperm);
30306 target = gen_lowpart (V16QImode, d->target);
30307 op0 = gen_lowpart (V16QImode, d->op0);
30308 if (d->op0 == d->op1)
30309 emit_insn (gen_ssse3_pshufbv16qi3 (target, op0, vperm));
30312 op1 = gen_lowpart (V16QImode, d->op1);
30313 emit_insn (gen_xop_pperm (target, op0, op1, vperm));
30319 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to instantiate D
30320 in a single instruction. */
30323 expand_vec_perm_1 (struct expand_vec_perm_d *d)
30325 unsigned i, nelt = d->nelt;
30326 unsigned char perm2[MAX_VECT_LEN];
30328 /* Check plain VEC_SELECT first, because AVX has instructions that could
30329 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory
30330 input where SEL+CONCAT may not. */
30331 if (d->op0 == d->op1)
30333 int mask = nelt - 1;
30335 for (i = 0; i < nelt; i++)
30336 perm2[i] = d->perm[i] & mask;
30338 if (expand_vselect (d->target, d->op0, perm2, nelt))
30341 /* There are plenty of patterns in sse.md that are written for
30342 SEL+CONCAT and are not replicated for a single op. Perhaps
30343 that should be changed, to avoid the nastiness here. */
30345 /* Recognize interleave style patterns, which means incrementing
30346 every other permutation operand. */
30347 for (i = 0; i < nelt; i += 2)
30349 perm2[i] = d->perm[i] & mask;
30350 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt;
30352 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30355 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */
30358 for (i = 0; i < nelt; i += 4)
30360 perm2[i + 0] = d->perm[i + 0] & mask;
30361 perm2[i + 1] = d->perm[i + 1] & mask;
30362 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt;
30363 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt;
30366 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt))
30371 /* Finally, try the fully general two operand permute. */
30372 if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt))
30375 /* Recognize interleave style patterns with reversed operands. */
30376 if (d->op0 != d->op1)
30378 for (i = 0; i < nelt; ++i)
30380 unsigned e = d->perm[i];
30388 if (expand_vselect_vconcat (d->target, d->op1, d->op0, perm2, nelt))
30392 /* Try the SSE4.1 blend variable merge instructions. */
30393 if (expand_vec_perm_blend (d))
30396 /* Try one of the AVX vpermil variable permutations. */
30397 if (expand_vec_perm_vpermil (d))
30400 /* Try the SSSE3 pshufb or XOP vpperm variable permutation. */
30401 if (expand_vec_perm_pshufb (d))
30407 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
30408 in terms of a pair of pshuflw + pshufhw instructions. */
30411 expand_vec_perm_pshuflw_pshufhw (struct expand_vec_perm_d *d)
30413 unsigned char perm2[MAX_VECT_LEN];
30417 if (d->vmode != V8HImode || d->op0 != d->op1)
30420 /* The two permutations only operate in 64-bit lanes. */
30421 for (i = 0; i < 4; ++i)
30422 if (d->perm[i] >= 4)
30424 for (i = 4; i < 8; ++i)
30425 if (d->perm[i] < 4)
30431 /* Emit the pshuflw. */
30432 memcpy (perm2, d->perm, 4);
30433 for (i = 4; i < 8; ++i)
30435 ok = expand_vselect (d->target, d->op0, perm2, 8);
30438 /* Emit the pshufhw. */
30439 memcpy (perm2 + 4, d->perm + 4, 4);
30440 for (i = 0; i < 4; ++i)
30442 ok = expand_vselect (d->target, d->target, perm2, 8);
30448 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30449 the permutation using the SSSE3 palignr instruction. This succeeds
30450 when all of the elements in PERM fit within one vector and we merely
30451 need to shift them down so that a single vector permutation has a
30452 chance to succeed. */
30455 expand_vec_perm_palignr (struct expand_vec_perm_d *d)
30457 unsigned i, nelt = d->nelt;
30462 /* Even with AVX, palignr only operates on 128-bit vectors. */
30463 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30466 min = nelt, max = 0;
30467 for (i = 0; i < nelt; ++i)
30469 unsigned e = d->perm[i];
30475 if (min == 0 || max - min >= nelt)
30478 /* Given that we have SSSE3, we know we'll be able to implement the
30479 single operand permutation after the palignr with pshufb. */
30483 shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
30484 emit_insn (gen_ssse3_palignrti (gen_lowpart (TImode, d->target),
30485 gen_lowpart (TImode, d->op1),
30486 gen_lowpart (TImode, d->op0), shift));
30488 d->op0 = d->op1 = d->target;
30491 for (i = 0; i < nelt; ++i)
30493 unsigned e = d->perm[i] - min;
30499 /* Test for the degenerate case where the alignment by itself
30500 produces the desired permutation. */
30504 ok = expand_vec_perm_1 (d);
30510 /* A subroutine of ix86_expand_vec_perm_builtin_1. Try to simplify
30511 a two vector permutation into a single vector permutation by using
30512 an interleave operation to merge the vectors. */
30515 expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
30517 struct expand_vec_perm_d dremap, dfinal;
30518 unsigned i, nelt = d->nelt, nelt2 = nelt / 2;
30519 unsigned contents, h1, h2, h3, h4;
30520 unsigned char remap[2 * MAX_VECT_LEN];
30524 if (d->op0 == d->op1)
30527 /* The 256-bit unpck[lh]p[sd] instructions only operate within the 128-bit
30528 lanes. We can use similar techniques with the vperm2f128 instruction,
30529 but it requires slightly different logic. */
30530 if (GET_MODE_SIZE (d->vmode) != 16)
30533 /* Examine from whence the elements come. */
30535 for (i = 0; i < nelt; ++i)
30536 contents |= 1u << d->perm[i];
30538 /* Split the two input vectors into 4 halves. */
30539 h1 = (1u << nelt2) - 1;
30544 memset (remap, 0xff, sizeof (remap));
30547 /* If the elements from the low halves use interleave low, and similarly
30548 for interleave high. If the elements are from mis-matched halves, we
30549 can use shufps for V4SF/V4SI or do a DImode shuffle. */
30550 if ((contents & (h1 | h3)) == contents)
30552 for (i = 0; i < nelt2; ++i)
30555 remap[i + nelt] = i * 2 + 1;
30556 dremap.perm[i * 2] = i;
30557 dremap.perm[i * 2 + 1] = i + nelt;
30560 else if ((contents & (h2 | h4)) == contents)
30562 for (i = 0; i < nelt2; ++i)
30564 remap[i + nelt2] = i * 2;
30565 remap[i + nelt + nelt2] = i * 2 + 1;
30566 dremap.perm[i * 2] = i + nelt2;
30567 dremap.perm[i * 2 + 1] = i + nelt + nelt2;
30570 else if ((contents & (h1 | h4)) == contents)
30572 for (i = 0; i < nelt2; ++i)
30575 remap[i + nelt + nelt2] = i + nelt2;
30576 dremap.perm[i] = i;
30577 dremap.perm[i + nelt2] = i + nelt + nelt2;
30581 dremap.vmode = V2DImode;
30583 dremap.perm[0] = 0;
30584 dremap.perm[1] = 3;
30587 else if ((contents & (h2 | h3)) == contents)
30589 for (i = 0; i < nelt2; ++i)
30591 remap[i + nelt2] = i;
30592 remap[i + nelt] = i + nelt2;
30593 dremap.perm[i] = i + nelt2;
30594 dremap.perm[i + nelt2] = i + nelt;
30598 dremap.vmode = V2DImode;
30600 dremap.perm[0] = 1;
30601 dremap.perm[1] = 2;
30607 /* Use the remapping array set up above to move the elements from their
30608 swizzled locations into their final destinations. */
30610 for (i = 0; i < nelt; ++i)
30612 unsigned e = remap[d->perm[i]];
30613 gcc_assert (e < nelt);
30614 dfinal.perm[i] = e;
30616 dfinal.op0 = gen_reg_rtx (dfinal.vmode);
30617 dfinal.op1 = dfinal.op0;
30618 dremap.target = dfinal.op0;
30620 /* Test if the final remap can be done with a single insn. For V4SFmode or
30621 V4SImode this *will* succeed. For V8HImode or V16QImode it may not. */
30623 ok = expand_vec_perm_1 (&dfinal);
30624 seq = get_insns ();
30630 if (dremap.vmode != dfinal.vmode)
30632 dremap.target = gen_lowpart (dremap.vmode, dremap.target);
30633 dremap.op0 = gen_lowpart (dremap.vmode, dremap.op0);
30634 dremap.op1 = gen_lowpart (dremap.vmode, dremap.op1);
30637 ok = expand_vec_perm_1 (&dremap);
30644 /* A subroutine of expand_vec_perm_even_odd_1. Implement the double-word
30645 permutation with two pshufb insns and an ior. We should have already
30646 failed all two instruction sequences. */
30649 expand_vec_perm_pshufb2 (struct expand_vec_perm_d *d)
30651 rtx rperm[2][16], vperm, l, h, op, m128;
30652 unsigned int i, nelt, eltsz;
30654 if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
30656 gcc_assert (d->op0 != d->op1);
30659 eltsz = GET_MODE_SIZE (GET_MODE_INNER (d->vmode));
30661 /* Generate two permutation masks. If the required element is within
30662 the given vector it is shuffled into the proper lane. If the required
30663 element is in the other vector, force a zero into the lane by setting
30664 bit 7 in the permutation mask. */
30665 m128 = GEN_INT (-128);
30666 for (i = 0; i < nelt; ++i)
30668 unsigned j, e = d->perm[i];
30669 unsigned which = (e >= nelt);
30673 for (j = 0; j < eltsz; ++j)
30675 rperm[which][i*eltsz + j] = GEN_INT (e*eltsz + j);
30676 rperm[1-which][i*eltsz + j] = m128;
30680 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[0]));
30681 vperm = force_reg (V16QImode, vperm);
30683 l = gen_reg_rtx (V16QImode);
30684 op = gen_lowpart (V16QImode, d->op0);
30685 emit_insn (gen_ssse3_pshufbv16qi3 (l, op, vperm));
30687 vperm = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, rperm[1]));
30688 vperm = force_reg (V16QImode, vperm);
30690 h = gen_reg_rtx (V16QImode);
30691 op = gen_lowpart (V16QImode, d->op1);
30692 emit_insn (gen_ssse3_pshufbv16qi3 (h, op, vperm));
30694 op = gen_lowpart (V16QImode, d->target);
30695 emit_insn (gen_iorv16qi3 (op, l, h));
30700 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement extract-even
30701 and extract-odd permutations. */
30704 expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
30706 rtx t1, t2, t3, t4;
30711 t1 = gen_reg_rtx (V4DFmode);
30712 t2 = gen_reg_rtx (V4DFmode);
30714 /* Shuffle the lanes around into { 0 1 4 5 } and { 2 3 6 7 }. */
30715 emit_insn (gen_avx_vperm2f128v4df3 (t1, d->op0, d->op1, GEN_INT (0x20)));
30716 emit_insn (gen_avx_vperm2f128v4df3 (t2, d->op0, d->op1, GEN_INT (0x31)));
30718 /* Now an unpck[lh]pd will produce the result required. */
30720 t3 = gen_avx_unpckhpd256 (d->target, t1, t2);
30722 t3 = gen_avx_unpcklpd256 (d->target, t1, t2);
30728 static const unsigned char perm1[8] = { 0, 2, 1, 3, 5, 6, 5, 7 };
30729 static const unsigned char perme[8] = { 0, 1, 8, 9, 4, 5, 12, 13 };
30730 static const unsigned char permo[8] = { 2, 3, 10, 11, 6, 7, 14, 15 };
30732 t1 = gen_reg_rtx (V8SFmode);
30733 t2 = gen_reg_rtx (V8SFmode);
30734 t3 = gen_reg_rtx (V8SFmode);
30735 t4 = gen_reg_rtx (V8SFmode);
30737 /* Shuffle within the 128-bit lanes to produce:
30738 { 0 2 1 3 4 6 5 7 } and { 8 a 9 b c e d f }. */
30739 expand_vselect (t1, d->op0, perm1, 8);
30740 expand_vselect (t2, d->op1, perm1, 8);
30742 /* Shuffle the lanes around to produce:
30743 { 0 2 1 3 8 a 9 b } and { 4 6 5 7 c e d f }. */
30744 emit_insn (gen_avx_vperm2f128v8sf3 (t3, t1, t2, GEN_INT (0x20)));
30745 emit_insn (gen_avx_vperm2f128v8sf3 (t4, t1, t2, GEN_INT (0x31)));
30747 /* Now a vpermil2p will produce the result required. */
30748 /* ??? The vpermil2p requires a vector constant. Another option
30749 is a unpck[lh]ps to merge the two vectors to produce
30750 { 0 4 2 6 8 c a e } or { 1 5 3 7 9 d b f }. Then use another
30751 vpermilps to get the elements into the final order. */
30754 memcpy (d->perm, odd ? permo: perme, 8);
30755 expand_vec_perm_vpermil (d);
30763 /* These are always directly implementable by expand_vec_perm_1. */
30764 gcc_unreachable ();
30768 return expand_vec_perm_pshufb2 (d);
30771 /* We need 2*log2(N)-1 operations to achieve odd/even
30772 with interleave. */
30773 t1 = gen_reg_rtx (V8HImode);
30774 t2 = gen_reg_rtx (V8HImode);
30775 emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
30776 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
30777 emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
30778 emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
30780 t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
30782 t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
30789 return expand_vec_perm_pshufb2 (d);
30792 t1 = gen_reg_rtx (V16QImode);
30793 t2 = gen_reg_rtx (V16QImode);
30794 t3 = gen_reg_rtx (V16QImode);
30795 emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
30796 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
30797 emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
30798 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
30799 emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
30800 emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
30802 t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
30804 t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
30810 gcc_unreachable ();
30816 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
30817 extract-even and extract-odd permutations. */
30820 expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
30822 unsigned i, odd, nelt = d->nelt;
30825 if (odd != 0 && odd != 1)
30828 for (i = 1; i < nelt; ++i)
30829 if (d->perm[i] != 2 * i + odd)
30832 return expand_vec_perm_even_odd_1 (d, odd);
30835 /* A subroutine of ix86_expand_vec_perm_builtin_1. Implement broadcast
30836 permutations. We assume that expand_vec_perm_1 has already failed. */
30839 expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
30841 unsigned elt = d->perm[0], nelt2 = d->nelt / 2;
30842 enum machine_mode vmode = d->vmode;
30843 unsigned char perm2[4];
30851 /* These are special-cased in sse.md so that we can optionally
30852 use the vbroadcast instruction. They expand to two insns
30853 if the input happens to be in a register. */
30854 gcc_unreachable ();
30860 /* These are always implementable using standard shuffle patterns. */
30861 gcc_unreachable ();
30865 /* These can be implemented via interleave. We save one insn by
30866 stopping once we have promoted to V4SImode and then use pshufd. */
30869 optab otab = vec_interleave_low_optab;
30873 otab = vec_interleave_high_optab;
30878 op0 = expand_binop (vmode, otab, op0, op0, NULL, 0, OPTAB_DIRECT);
30879 vmode = get_mode_wider_vector (vmode);
30880 op0 = gen_lowpart (vmode, op0);
30882 while (vmode != V4SImode);
30884 memset (perm2, elt, 4);
30885 ok = expand_vselect (gen_lowpart (V4SImode, d->target), op0, perm2, 4);
30890 gcc_unreachable ();
30894 /* A subroutine of ix86_expand_vec_perm_builtin_1. Pattern match
30895 broadcast permutations. */
30898 expand_vec_perm_broadcast (struct expand_vec_perm_d *d)
30900 unsigned i, elt, nelt = d->nelt;
30902 if (d->op0 != d->op1)
30906 for (i = 1; i < nelt; ++i)
30907 if (d->perm[i] != elt)
30910 return expand_vec_perm_broadcast_1 (d);
30913 /* The guts of ix86_expand_vec_perm_builtin, also used by the ok hook.
30914 With all of the interface bits taken care of, perform the expansion
30915 in D and return true on success. */
30918 ix86_expand_vec_perm_builtin_1 (struct expand_vec_perm_d *d)
30920 /* Try a single instruction expansion. */
30921 if (expand_vec_perm_1 (d))
30924 /* Try sequences of two instructions. */
30926 if (expand_vec_perm_pshuflw_pshufhw (d))
30929 if (expand_vec_perm_palignr (d))
30932 if (expand_vec_perm_interleave2 (d))
30935 if (expand_vec_perm_broadcast (d))
30938 /* Try sequences of three instructions. */
30940 if (expand_vec_perm_pshufb2 (d))
30943 /* ??? Look for narrow permutations whose element orderings would
30944 allow the promotion to a wider mode. */
30946 /* ??? Look for sequences of interleave or a wider permute that place
30947 the data into the correct lanes for a half-vector shuffle like
30948 pshuf[lh]w or vpermilps. */
30950 /* ??? Look for sequences of interleave that produce the desired results.
30951 The combinatorics of punpck[lh] get pretty ugly... */
30953 if (expand_vec_perm_even_odd (d))
30959 /* Extract the values from the vector CST into the permutation array in D.
30960 Return 0 on error, 1 if all values from the permutation come from the
30961 first vector, 2 if all values from the second vector, and 3 otherwise. */
30964 extract_vec_perm_cst (struct expand_vec_perm_d *d, tree cst)
30966 tree list = TREE_VECTOR_CST_ELTS (cst);
30967 unsigned i, nelt = d->nelt;
30970 for (i = 0; i < nelt; ++i, list = TREE_CHAIN (list))
30972 unsigned HOST_WIDE_INT e;
30974 if (!host_integerp (TREE_VALUE (list), 1))
30976 e = tree_low_cst (TREE_VALUE (list), 1);
30980 ret |= (e < nelt ? 1 : 2);
30983 gcc_assert (list == NULL);
30985 /* For all elements from second vector, fold the elements to first. */
30987 for (i = 0; i < nelt; ++i)
30988 d->perm[i] -= nelt;
30994 ix86_expand_vec_perm_builtin (tree exp)
30996 struct expand_vec_perm_d d;
30997 tree arg0, arg1, arg2;
30999 arg0 = CALL_EXPR_ARG (exp, 0);
31000 arg1 = CALL_EXPR_ARG (exp, 1);
31001 arg2 = CALL_EXPR_ARG (exp, 2);
31003 d.vmode = TYPE_MODE (TREE_TYPE (arg0));
31004 d.nelt = GET_MODE_NUNITS (d.vmode);
31005 d.testing_p = false;
31006 gcc_assert (VECTOR_MODE_P (d.vmode));
31008 if (TREE_CODE (arg2) != VECTOR_CST)
31010 error_at (EXPR_LOCATION (exp),
31011 "vector permutation requires vector constant");
31015 switch (extract_vec_perm_cst (&d, arg2))
31021 error_at (EXPR_LOCATION (exp), "invalid vector permutation constant");
31025 if (!operand_equal_p (arg0, arg1, 0))
31027 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31028 d.op0 = force_reg (d.vmode, d.op0);
31029 d.op1 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31030 d.op1 = force_reg (d.vmode, d.op1);
31034 /* The elements of PERM do not suggest that only the first operand
31035 is used, but both operands are identical. Allow easier matching
31036 of the permutation by folding the permutation into the single
31039 unsigned i, nelt = d.nelt;
31040 for (i = 0; i < nelt; ++i)
31041 if (d.perm[i] >= nelt)
31047 d.op0 = expand_expr (arg0, NULL_RTX, d.vmode, EXPAND_NORMAL);
31048 d.op0 = force_reg (d.vmode, d.op0);
31053 d.op0 = expand_expr (arg1, NULL_RTX, d.vmode, EXPAND_NORMAL);
31054 d.op0 = force_reg (d.vmode, d.op0);
31059 d.target = gen_reg_rtx (d.vmode);
31060 if (ix86_expand_vec_perm_builtin_1 (&d))
31063 /* For compiler generated permutations, we should never got here, because
31064 the compiler should also be checking the ok hook. But since this is a
31065 builtin the user has access too, so don't abort. */
31069 sorry ("vector permutation (%d %d)", d.perm[0], d.perm[1]);
31072 sorry ("vector permutation (%d %d %d %d)",
31073 d.perm[0], d.perm[1], d.perm[2], d.perm[3]);
31076 sorry ("vector permutation (%d %d %d %d %d %d %d %d)",
31077 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31078 d.perm[4], d.perm[5], d.perm[6], d.perm[7]);
31081 sorry ("vector permutation "
31082 "(%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d)",
31083 d.perm[0], d.perm[1], d.perm[2], d.perm[3],
31084 d.perm[4], d.perm[5], d.perm[6], d.perm[7],
31085 d.perm[8], d.perm[9], d.perm[10], d.perm[11],
31086 d.perm[12], d.perm[13], d.perm[14], d.perm[15]);
31089 gcc_unreachable ();
31092 return CONST0_RTX (d.vmode);
31095 /* Implement targetm.vectorize.builtin_vec_perm_ok. */
31098 ix86_vectorize_builtin_vec_perm_ok (tree vec_type, tree mask)
31100 struct expand_vec_perm_d d;
31104 d.vmode = TYPE_MODE (vec_type);
31105 d.nelt = GET_MODE_NUNITS (d.vmode);
31106 d.testing_p = true;
31108 /* Given sufficient ISA support we can just return true here
31109 for selected vector modes. */
31110 if (GET_MODE_SIZE (d.vmode) == 16)
31112 /* All implementable with a single vpperm insn. */
31115 /* All implementable with 2 pshufb + 1 ior. */
31118 /* All implementable with shufpd or unpck[lh]pd. */
31123 vec_mask = extract_vec_perm_cst (&d, mask);
31125 /* This hook is cannot be called in response to something that the
31126 user does (unlike the builtin expander) so we shouldn't ever see
31127 an error generated from the extract. */
31128 gcc_assert (vec_mask > 0 && vec_mask <= 3);
31129 one_vec = (vec_mask != 3);
31131 /* Implementable with shufps or pshufd. */
31132 if (one_vec && (d.vmode == V4SFmode || d.vmode == V4SImode))
31135 /* Otherwise we have to go through the motions and see if we can
31136 figure out how to generate the requested permutation. */
31137 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
31138 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
31140 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
31143 ret = ix86_expand_vec_perm_builtin_1 (&d);
31150 ix86_expand_vec_extract_even_odd (rtx targ, rtx op0, rtx op1, unsigned odd)
31152 struct expand_vec_perm_d d;
31158 d.vmode = GET_MODE (targ);
31159 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
31160 d.testing_p = false;
31162 for (i = 0; i < nelt; ++i)
31163 d.perm[i] = i * 2 + odd;
31165 /* We'll either be able to implement the permutation directly... */
31166 if (expand_vec_perm_1 (&d))
31169 /* ... or we use the special-case patterns. */
31170 expand_vec_perm_even_odd_1 (&d, odd);
31173 /* This function returns the calling abi specific va_list type node.
31174 It returns the FNDECL specific va_list type. */
31177 ix86_fn_abi_va_list (tree fndecl)
31180 return va_list_type_node;
31181 gcc_assert (fndecl != NULL_TREE);
31183 if (ix86_function_abi ((const_tree) fndecl) == MS_ABI)
31184 return ms_va_list_type_node;
31186 return sysv_va_list_type_node;
31189 /* Returns the canonical va_list type specified by TYPE. If there
31190 is no valid TYPE provided, it return NULL_TREE. */
31193 ix86_canonical_va_list_type (tree type)
31197 /* Resolve references and pointers to va_list type. */
31198 if (TREE_CODE (type) == MEM_REF)
31199 type = TREE_TYPE (type);
31200 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
31201 type = TREE_TYPE (type);
31202 else if (POINTER_TYPE_P (type) && TREE_CODE (TREE_TYPE (type)) == ARRAY_TYPE)
31203 type = TREE_TYPE (type);
31207 wtype = va_list_type_node;
31208 gcc_assert (wtype != NULL_TREE);
31210 if (TREE_CODE (wtype) == ARRAY_TYPE)
31212 /* If va_list is an array type, the argument may have decayed
31213 to a pointer type, e.g. by being passed to another function.
31214 In that case, unwrap both types so that we can compare the
31215 underlying records. */
31216 if (TREE_CODE (htype) == ARRAY_TYPE
31217 || POINTER_TYPE_P (htype))
31219 wtype = TREE_TYPE (wtype);
31220 htype = TREE_TYPE (htype);
31223 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31224 return va_list_type_node;
31225 wtype = sysv_va_list_type_node;
31226 gcc_assert (wtype != NULL_TREE);
31228 if (TREE_CODE (wtype) == ARRAY_TYPE)
31230 /* If va_list is an array type, the argument may have decayed
31231 to a pointer type, e.g. by being passed to another function.
31232 In that case, unwrap both types so that we can compare the
31233 underlying records. */
31234 if (TREE_CODE (htype) == ARRAY_TYPE
31235 || POINTER_TYPE_P (htype))
31237 wtype = TREE_TYPE (wtype);
31238 htype = TREE_TYPE (htype);
31241 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31242 return sysv_va_list_type_node;
31243 wtype = ms_va_list_type_node;
31244 gcc_assert (wtype != NULL_TREE);
31246 if (TREE_CODE (wtype) == ARRAY_TYPE)
31248 /* If va_list is an array type, the argument may have decayed
31249 to a pointer type, e.g. by being passed to another function.
31250 In that case, unwrap both types so that we can compare the
31251 underlying records. */
31252 if (TREE_CODE (htype) == ARRAY_TYPE
31253 || POINTER_TYPE_P (htype))
31255 wtype = TREE_TYPE (wtype);
31256 htype = TREE_TYPE (htype);
31259 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
31260 return ms_va_list_type_node;
31263 return std_canonical_va_list_type (type);
31266 /* Iterate through the target-specific builtin types for va_list.
31267 IDX denotes the iterator, *PTREE is set to the result type of
31268 the va_list builtin, and *PNAME to its internal type.
31269 Returns zero if there is no element for this index, otherwise
31270 IDX should be increased upon the next call.
31271 Note, do not iterate a base builtin's name like __builtin_va_list.
31272 Used from c_common_nodes_and_builtins. */
31275 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
31285 *ptree = ms_va_list_type_node;
31286 *pname = "__builtin_ms_va_list";
31290 *ptree = sysv_va_list_type_node;
31291 *pname = "__builtin_sysv_va_list";
31299 /* Initialize the GCC target structure. */
31300 #undef TARGET_RETURN_IN_MEMORY
31301 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
31303 #undef TARGET_LEGITIMIZE_ADDRESS
31304 #define TARGET_LEGITIMIZE_ADDRESS ix86_legitimize_address
31306 #undef TARGET_ATTRIBUTE_TABLE
31307 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
31308 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31309 # undef TARGET_MERGE_DECL_ATTRIBUTES
31310 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
31313 #undef TARGET_COMP_TYPE_ATTRIBUTES
31314 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
31316 #undef TARGET_INIT_BUILTINS
31317 #define TARGET_INIT_BUILTINS ix86_init_builtins
31318 #undef TARGET_BUILTIN_DECL
31319 #define TARGET_BUILTIN_DECL ix86_builtin_decl
31320 #undef TARGET_EXPAND_BUILTIN
31321 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
31323 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
31324 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
31325 ix86_builtin_vectorized_function
31327 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
31328 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
31330 #undef TARGET_BUILTIN_RECIPROCAL
31331 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
31333 #undef TARGET_ASM_FUNCTION_EPILOGUE
31334 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
31336 #undef TARGET_ENCODE_SECTION_INFO
31337 #ifndef SUBTARGET_ENCODE_SECTION_INFO
31338 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
31340 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
31343 #undef TARGET_ASM_OPEN_PAREN
31344 #define TARGET_ASM_OPEN_PAREN ""
31345 #undef TARGET_ASM_CLOSE_PAREN
31346 #define TARGET_ASM_CLOSE_PAREN ""
31348 #undef TARGET_ASM_BYTE_OP
31349 #define TARGET_ASM_BYTE_OP ASM_BYTE
31351 #undef TARGET_ASM_ALIGNED_HI_OP
31352 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
31353 #undef TARGET_ASM_ALIGNED_SI_OP
31354 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
31356 #undef TARGET_ASM_ALIGNED_DI_OP
31357 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
31360 #undef TARGET_PROFILE_BEFORE_PROLOGUE
31361 #define TARGET_PROFILE_BEFORE_PROLOGUE ix86_profile_before_prologue
31363 #undef TARGET_ASM_UNALIGNED_HI_OP
31364 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
31365 #undef TARGET_ASM_UNALIGNED_SI_OP
31366 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
31367 #undef TARGET_ASM_UNALIGNED_DI_OP
31368 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
31370 #undef TARGET_PRINT_OPERAND
31371 #define TARGET_PRINT_OPERAND ix86_print_operand
31372 #undef TARGET_PRINT_OPERAND_ADDRESS
31373 #define TARGET_PRINT_OPERAND_ADDRESS ix86_print_operand_address
31374 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
31375 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P ix86_print_operand_punct_valid_p
31377 #undef TARGET_SCHED_ADJUST_COST
31378 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
31379 #undef TARGET_SCHED_ISSUE_RATE
31380 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
31381 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
31382 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
31383 ia32_multipass_dfa_lookahead
31385 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
31386 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
31389 #undef TARGET_HAVE_TLS
31390 #define TARGET_HAVE_TLS true
31392 #undef TARGET_CANNOT_FORCE_CONST_MEM
31393 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
31394 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
31395 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
31397 #undef TARGET_DELEGITIMIZE_ADDRESS
31398 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
31400 #undef TARGET_MS_BITFIELD_LAYOUT_P
31401 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
31404 #undef TARGET_BINDS_LOCAL_P
31405 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
31407 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
31408 #undef TARGET_BINDS_LOCAL_P
31409 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
31412 #undef TARGET_ASM_OUTPUT_MI_THUNK
31413 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
31414 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
31415 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
31417 #undef TARGET_ASM_FILE_START
31418 #define TARGET_ASM_FILE_START x86_file_start
31420 #undef TARGET_DEFAULT_TARGET_FLAGS
31421 #define TARGET_DEFAULT_TARGET_FLAGS \
31423 | TARGET_SUBTARGET_DEFAULT \
31424 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT \
31427 #undef TARGET_HANDLE_OPTION
31428 #define TARGET_HANDLE_OPTION ix86_handle_option
31430 #undef TARGET_REGISTER_MOVE_COST
31431 #define TARGET_REGISTER_MOVE_COST ix86_register_move_cost
31432 #undef TARGET_MEMORY_MOVE_COST
31433 #define TARGET_MEMORY_MOVE_COST ix86_memory_move_cost
31434 #undef TARGET_RTX_COSTS
31435 #define TARGET_RTX_COSTS ix86_rtx_costs
31436 #undef TARGET_ADDRESS_COST
31437 #define TARGET_ADDRESS_COST ix86_address_cost
31439 #undef TARGET_FIXED_CONDITION_CODE_REGS
31440 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
31441 #undef TARGET_CC_MODES_COMPATIBLE
31442 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
31444 #undef TARGET_MACHINE_DEPENDENT_REORG
31445 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
31447 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
31448 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
31450 #undef TARGET_BUILD_BUILTIN_VA_LIST
31451 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
31453 #undef TARGET_ENUM_VA_LIST_P
31454 #define TARGET_ENUM_VA_LIST_P ix86_enum_va_list
31456 #undef TARGET_FN_ABI_VA_LIST
31457 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
31459 #undef TARGET_CANONICAL_VA_LIST_TYPE
31460 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
31462 #undef TARGET_EXPAND_BUILTIN_VA_START
31463 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
31465 #undef TARGET_MD_ASM_CLOBBERS
31466 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
31468 #undef TARGET_PROMOTE_PROTOTYPES
31469 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
31470 #undef TARGET_STRUCT_VALUE_RTX
31471 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
31472 #undef TARGET_SETUP_INCOMING_VARARGS
31473 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
31474 #undef TARGET_MUST_PASS_IN_STACK
31475 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
31476 #undef TARGET_FUNCTION_ARG_ADVANCE
31477 #define TARGET_FUNCTION_ARG_ADVANCE ix86_function_arg_advance
31478 #undef TARGET_FUNCTION_ARG
31479 #define TARGET_FUNCTION_ARG ix86_function_arg
31480 #undef TARGET_PASS_BY_REFERENCE
31481 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
31482 #undef TARGET_INTERNAL_ARG_POINTER
31483 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
31484 #undef TARGET_UPDATE_STACK_BOUNDARY
31485 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
31486 #undef TARGET_GET_DRAP_RTX
31487 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
31488 #undef TARGET_STRICT_ARGUMENT_NAMING
31489 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
31490 #undef TARGET_STATIC_CHAIN
31491 #define TARGET_STATIC_CHAIN ix86_static_chain
31492 #undef TARGET_TRAMPOLINE_INIT
31493 #define TARGET_TRAMPOLINE_INIT ix86_trampoline_init
31494 #undef TARGET_RETURN_POPS_ARGS
31495 #define TARGET_RETURN_POPS_ARGS ix86_return_pops_args
31497 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
31498 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
31500 #undef TARGET_SCALAR_MODE_SUPPORTED_P
31501 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
31503 #undef TARGET_VECTOR_MODE_SUPPORTED_P
31504 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
31506 #undef TARGET_C_MODE_FOR_SUFFIX
31507 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
31510 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
31511 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
31514 #ifdef SUBTARGET_INSERT_ATTRIBUTES
31515 #undef TARGET_INSERT_ATTRIBUTES
31516 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
31519 #undef TARGET_MANGLE_TYPE
31520 #define TARGET_MANGLE_TYPE ix86_mangle_type
31522 #undef TARGET_STACK_PROTECT_FAIL
31523 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
31525 #undef TARGET_FUNCTION_VALUE
31526 #define TARGET_FUNCTION_VALUE ix86_function_value
31528 #undef TARGET_FUNCTION_VALUE_REGNO_P
31529 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p
31531 #undef TARGET_SECONDARY_RELOAD
31532 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
31534 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
31535 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
31536 ix86_builtin_vectorization_cost
31537 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
31538 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM \
31539 ix86_vectorize_builtin_vec_perm
31540 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK
31541 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM_OK \
31542 ix86_vectorize_builtin_vec_perm_ok
31544 #undef TARGET_SET_CURRENT_FUNCTION
31545 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
31547 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
31548 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
31550 #undef TARGET_OPTION_SAVE
31551 #define TARGET_OPTION_SAVE ix86_function_specific_save
31553 #undef TARGET_OPTION_RESTORE
31554 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
31556 #undef TARGET_OPTION_PRINT
31557 #define TARGET_OPTION_PRINT ix86_function_specific_print
31559 #undef TARGET_CAN_INLINE_P
31560 #define TARGET_CAN_INLINE_P ix86_can_inline_p
31562 #undef TARGET_EXPAND_TO_RTL_HOOK
31563 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
31565 #undef TARGET_LEGITIMATE_ADDRESS_P
31566 #define TARGET_LEGITIMATE_ADDRESS_P ix86_legitimate_address_p
31568 #undef TARGET_IRA_COVER_CLASSES
31569 #define TARGET_IRA_COVER_CLASSES i386_ira_cover_classes
31571 #undef TARGET_FRAME_POINTER_REQUIRED
31572 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required
31574 #undef TARGET_CAN_ELIMINATE
31575 #define TARGET_CAN_ELIMINATE ix86_can_eliminate
31577 #undef TARGET_ASM_CODE_END
31578 #define TARGET_ASM_CODE_END ix86_code_end
31580 struct gcc_target targetm = TARGET_INITIALIZER;
31582 #include "gt-i386.h"